diff --git a/.github/workflows/fpm-deployment.yml b/.github/workflows/fpm-deployment.yml index 137fdd960..de19693f2 100644 --- a/.github/workflows/fpm-deployment.yml +++ b/.github/workflows/fpm-deployment.yml @@ -31,9 +31,9 @@ jobs: --slave /usr/bin/gcov gcov /usr/bin/gcov-${{ env.GCC_V }} - name: Install fpm latest release - uses: fortran-lang/setup-fpm@v3 + uses: fortran-lang/setup-fpm@v5 with: - fpm-version: 'v0.4.0' + fpm-version: 'v0.10.0' - name: Run fpm test ⚙ run: | diff --git a/ci/fpm.toml b/ci/fpm.toml index e85a54f69..6a0509007 100644 --- a/ci/fpm.toml +++ b/ci/fpm.toml @@ -8,3 +8,6 @@ copyright = "2019-2021 stdlib contributors" [dev-dependencies] test-drive.git = "https://github.com/fortran-lang/test-drive" test-drive.tag = "v0.4.0" + +[preprocess] +[preprocess.cpp] diff --git a/cmake/stdlib.cmake b/cmake/stdlib.cmake index 9a1fec059..bdaf87d1f 100644 --- a/cmake/stdlib.cmake +++ b/cmake/stdlib.cmake @@ -40,3 +40,10 @@ function (fypp_f90 fyppopts fyppfiles f90files) preprocess("${FYPP}" "${fyppopts}" "fypp" "f90" "${fyppfiles}" _f90files) set(${f90files} ${_f90files} PARENT_SCOPE) endfunction() + +# For fortran sources that contain C preprocessor flags: create ".F90" files +function (fypp_f90pp fyppopts fyppfiles F90files) + preprocess("${FYPP}" "${fyppopts}" "fypp" "F90" "${fyppfiles}" _F90files) + set(${F90files} ${_F90files} PARENT_SCOPE) +endfunction() + diff --git a/doc/specs/stdlib_linalg.md b/doc/specs/stdlib_linalg.md index 671cfee2f..912cd4828 100644 --- a/doc/specs/stdlib_linalg.md +++ b/doc/specs/stdlib_linalg.md @@ -6,6 +6,176 @@ title: linalg [TOC] +The `stdlib` linear algebra library provides high-level APIs for dealing with common linear algebra operations. + +## BLAS and LAPACK + +### Status + +Experimental + +### Description + +`BLAS` and `LAPACK` backends provide efficient low level implementations of many linear algebra algorithms, and are employed for non-trivial operators. +A Modern Fortran version of the [Reference-LAPACK 3.10.1](http://github.com/reference-LAPACK) implementation is provided as a backend. +Modern Fortran modules with full explicit typing features are provided after an +[automated conversion](https://github.com/perazz/fortran-lapack/blob/main/scripts/modularize_blas.py) +of the legacy codes: +- [stdlib_linalg_blas(module)], [stdlib_linalg_lapack(module)] provide kind-agnostic interfaces to all functions. +- Both libraries are available for 32- (`sp`), 64- (`dp`) and 128-bit (`qp`) `real` and `complex` numbers (the latter if available in the current build) +- Free format, lower-case style +- `implicit none(type, external)` applied to all procedures and modules +- `intent` added and all `pure` procedures where possible +- `stdlib` provides all procedures in two different flavors: (a) original BLAS/LAPACK names with a prefix `stdlib_?` (ex: `stdlib_dgemv`, `stdlib_sgemv`); (b) A generic, kind agnostic ``, i.e. `gemv`. +- F77-style `parameter`s removed, and all numeric constants have been generalized with KIND-dependent Fortran intrinsics. +- preprocessor-based OpenMP directives retained. +The single-source module structure hopefully allows for cross-procedural inlining which is otherwise impossible without link-time optimization. + +When available, highly optimized libraries that take advantage of specialized processor instructions should be preferred over the `stdlib` implementation. +Examples of such libraries are: OpenBLAS, MKL (TM), Accelerate, and ATLAS. In order to enable their usage, simply ensure that the following pre-processor macros are defined: + +- `STDLIB_EXTERNAL_BLAS` wraps all BLAS procedures (except for the 128-bit ones) to an external library +- `STDLIB_EXTERNAL_LAPACK` wraps all LAPACK procedures (except for the 128-bit ones) to an external library + +These can be enabled during the build process. For example, with CMake, one can enable these preprocessor directives using `add_compile_definitions(STDLIB_EXTERNAL_BLAS STDLIB_EXTERNAL_LAPACK)`. +The same is possible from the `fpm` branch, where the `cpp` preprocessor is enabled by default. For example, the macros can be added to the project's manifest: + +```toml +[dependencies] +stdlib="*" + +# Macros are only needed if using an external library +[preprocess] +[preprocess.cpp] +macros = ["STDLIB_EXTERNAL_BLAS", "STDLIB_EXTERNAL_LAPACK"] +``` + +or directly via compiler flags: + +`fpm build --flag "-DSTDLIB_EXTERNAL_BLAS -DSTDLIB_EXTERNAL_LAPACK -lblas -llapack"`. + +### Syntax + +All procedures in the `BLAS` and `LAPACK` backends follow the standard interfaces from the +[Reference LAPACK](https://www.netlib.org/lapack/). So, the online [Users Guide](https://www.netlib.org/lapack/explore-html/) +should be consulted for the full API and descriptions of procedure arguments and their usage. + +The `stdlib` implementation makes both kind-agnostic and specific procedure interfaces available via modules +[stdlib_linalg_blas(module)] and [stdlib_linalg_lapack(module)]. Because all procedures start with a letter +[that indicates the base datatype](https://www.netlib.org/lapack/lug/node24.html), the `stdlib` generic +interface drops the heading letter and contains all kind-dependent implementations. For example, the generic +interface to the `axpy` function looks like: + +```fortran +!> AXPY: constant times a vector plus a vector. +interface axpy + module procedure stdlib_saxpy + module procedure stdlib_daxpy + module procedure stdlib_qaxpy + module procedure stdlib_caxpy + module procedure stdlib_zaxpy + module procedure stdlib_waxpy +end interface axpy +``` + +The generic interface is the endpoint for using an external library. Whenever the latter is used, references +to the internal `module procedure`s are replaced with interfaces to the external library, +for example: + +```fortran +!> AXPY: constant times a vector plus a vector. +interface axpy + pure subroutine caxpy(n,ca,cx,incx,cy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: ca,cx(*) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(inout) :: cy(*) + end subroutine caxpy + ! [....] + module procedure stdlib_qaxpy +end interface axpy +``` + +Note that the 128-bit functions are only provided by `stdlib` and always point to the internal implementation. +Because 128-bit precision is identified as [stdlib_kinds(module):qp], initials for 128-bit procedures were +labelled as `q` (quadruple-precision reals) and `w` ("wide" or quadruple-precision complex numbers). +Extended precision ([stdlib_kinds(module):xdp]) calculations are currently not supported. + +### Example + +```fortran +{!example/linalg/example_blas_gemv.f90!} +``` + +```fortran +{!example/linalg/example_lapack_getrf.f90!} +``` + +### Licensing + +The Fortran Standard Library is distributed under the MIT License. `LAPACK` and its contained `BLAS` are a +freely-available software package. They are available from [netlib](https://www.netlib.org/lapack/) via anonymous +ftp and the World Wide Web. Thus, they can be included in commercial software packages (and have been). +The license used for the `BLAS` and `LAPACK` backends is the [modified BSD license](https://www.netlib.org/lapack/LICENSE.txt). + +The header of the `LICENSE.txt` file has as its licensing requirements: + + Copyright (c) 1992-2013 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. + Copyright (c) 2000-2013 The University of California Berkeley. All + rights reserved. + Copyright (c) 2006-2013 The University of Colorado Denver. All rights + reserved. + + $COPYRIGHT$ + + Additional copyrights may follow + + $HEADER$ + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + + - Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + The copyright holders provide no reassurances that the source code + provided does not infringe any patent, copyright, or any other + intellectual property rights of third parties. The copyright holders + disclaim any liability to any recipient for claims brought against + recipient by any third party for infringement of that parties + intellectual property rights. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +So the license for the `LICENSE.txt` code is compatible with the use of +modified versions of the code in the Fortran Standard Library under the MIT license. +Credit for the `BLAS`, `LAPACK` libraries should be given to the [LAPACK authors](https://www.netlib.org/lapack/contributor-list.html). +According to the original license, we also changed the name of the routines and commented the changes made +to the original. + ## `diag` - Create a diagonal array or extract the diagonal elements of an array ### Status diff --git a/example/linalg/example_blas_gemv.f90 b/example/linalg/example_blas_gemv.f90 new file mode 100644 index 000000000..e5d0e0799 --- /dev/null +++ b/example/linalg/example_blas_gemv.f90 @@ -0,0 +1,14 @@ +program example_gemv + use stdlib_linalg, only: eye + use stdlib_linalg_blas, only: sp,gemv + implicit none(type,external) + real(sp) :: A(2, 2), B(2) + B = [1.0,2.0] + A = eye(2) + + ! Use legacy BLAS interface + call gemv('No transpose',m=size(A,1),n=size(A,2),alpha=1.0,a=A,lda=size(A,1),x=B,incx=1,beta=0.0,y=B,incy=1) + + print *, B ! returns 1.0 2.0 + +end program example_gemv diff --git a/example/linalg/example_lapack_getrf.f90 b/example/linalg/example_lapack_getrf.f90 new file mode 100644 index 000000000..221d83a71 --- /dev/null +++ b/example/linalg/example_lapack_getrf.f90 @@ -0,0 +1,14 @@ +program example_getrf + use stdlib_linalg, only: eye + use stdlib_linalg_lapack, only: dp,ilp,getrf + implicit none(type,external) + real(dp) :: A(3, 3) + integer(ilp) :: ipiv(3),info + + A = eye(3) + + ! LAPACK matrix factorization interface (overwrite result) + call getrf(size(A,1),size(A,2),A,size(A,1),ipiv,info) + print *, info ! info==0: Success! + +end program example_getrf diff --git a/include/common.fypp b/include/common.fypp index 52ca02941..1683239e4 100644 --- a/include/common.fypp +++ b/include/common.fypp @@ -87,7 +87,6 @@ #! Whether Fortran 90 compatible code should be generated #:set VERSION90 = defined('VERSION90') - #! Ranks to be generated when templates are created #:if not defined('MAXRANK') #:if VERSION90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89890f4a0..6285ed0e5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,8 +64,29 @@ set(fppFiles stdlib_version.fypp ) +# Preprocessed files to contain preprocessor directives -> .F90 +set(cppFiles + stdlib_linalg_constants.fypp + stdlib_linalg_blas.fypp + stdlib_linalg_blas_aux.fypp + stdlib_linalg_blas_s.fypp + stdlib_linalg_blas_d.fypp + stdlib_linalg_blas_q.fypp + stdlib_linalg_blas_c.fypp + stdlib_linalg_blas_z.fypp + stdlib_linalg_blas_w.fypp + stdlib_linalg_lapack.fypp + stdlib_linalg_lapack_aux.fypp + stdlib_linalg_lapack_s.fypp + stdlib_linalg_lapack_d.fypp + stdlib_linalg_lapack_q.fypp + stdlib_linalg_lapack_c.fypp + stdlib_linalg_lapack_z.fypp + stdlib_linalg_lapack_w.fypp +) fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) +fypp_f90pp("${fyppFlags}" "${cppFiles}" outPreprocFiles) set(SRC stdlib_ansi.f90 @@ -85,6 +106,7 @@ set(SRC stdlib_quadrature_gauss.f90 stdlib_stringlist_type.f90 ${outFiles} + ${outPreprocFiles} ) add_library(${PROJECT_NAME} ${SRC}) diff --git a/src/stdlib_linalg_blas.fypp b/src/stdlib_linalg_blas.fypp new file mode 100644 index 000000000..8c5bdb60e --- /dev/null +++ b/src/stdlib_linalg_blas.fypp @@ -0,0 +1,2242 @@ +#:include "common.fypp" +module stdlib_linalg_blas + use stdlib_linalg_constants + use stdlib_linalg_blas_aux + use stdlib_linalg_blas_s + use stdlib_linalg_blas_d +#:if WITH_QP + use stdlib_linalg_blas_q +#:endif + use stdlib_linalg_blas_c + use stdlib_linalg_blas_z +#:if WITH_QP + use stdlib_linalg_blas_w +#:endif + implicit none(type,external) + public + + !> AXPY: constant times a vector plus a vector. + interface axpy +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine caxpy(n,ca,cx,incx,cy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: ca,cx(*) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(inout) :: cy(*) + end subroutine caxpy +#else + module procedure stdlib_caxpy +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine daxpy(n,da,dx,incx,dy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: da,dx(*) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(inout) :: dy(*) + end subroutine daxpy +#else + module procedure stdlib_daxpy +#endif +#:if WITH_QP + module procedure stdlib_qaxpy +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine saxpy(n,sa,sx,incx,sy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: sa,sx(*) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(inout) :: sy(*) + end subroutine saxpy +#else + module procedure stdlib_saxpy +#endif +#:if WITH_QP + module procedure stdlib_waxpy +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zaxpy(n,za,zx,incx,zy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: za,zx(*) + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(inout) :: zy(*) + end subroutine zaxpy +#else + module procedure stdlib_zaxpy +#endif + end interface axpy + + + + !> COPY: copies a vector x to a vector y. + interface copy +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ccopy(n,cx,incx,cy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(in) :: cx(*) + complex(sp), intent(out) :: cy(*) + end subroutine ccopy +#else + module procedure stdlib_ccopy +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dcopy(n,dx,incx,dy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(in) :: dx(*) + real(dp), intent(out) :: dy(*) + end subroutine dcopy +#else + module procedure stdlib_dcopy +#endif +#:if WITH_QP + module procedure stdlib_qcopy +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine scopy(n,sx,incx,sy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(in) :: sx(*) + real(sp), intent(out) :: sy(*) + end subroutine scopy +#else + module procedure stdlib_scopy +#endif +#:if WITH_QP + module procedure stdlib_wcopy +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zcopy(n,zx,incx,zy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(in) :: zx(*) + complex(dp), intent(out) :: zy(*) + end subroutine zcopy +#else + module procedure stdlib_zcopy +#endif + end interface copy + + + + !> DOT: forms the dot product of two vectors. + !> uses unrolled loops for increments equal to one. + interface dot +#ifdef STDLIB_EXTERNAL_BLAS + pure real(dp) function ddot(n,dx,incx,dy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(in) :: dx(*),dy(*) + end function ddot +#else + module procedure stdlib_ddot +#endif +#:if WITH_QP + module procedure stdlib_qdot +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure real(sp) function sdot(n,sx,incx,sy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(in) :: sx(*),sy(*) + end function sdot +#else + module procedure stdlib_sdot +#endif + end interface dot + + + + !> DOTC: forms the dot product of two complex vectors + !> DOTC = X^H * Y + interface dotc +#ifdef STDLIB_EXTERNAL_BLAS + pure complex(sp) function cdotc(n,cx,incx,cy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(in) :: cx(*),cy(*) + end function cdotc +#else + module procedure stdlib_cdotc +#endif +#:if WITH_QP + module procedure stdlib_wdotc +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure complex(dp) function zdotc(n,zx,incx,zy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(in) :: zx(*),zy(*) + end function zdotc +#else + module procedure stdlib_zdotc +#endif + end interface dotc + + + + !> DOTU: forms the dot product of two complex vectors + !> DOTU = X^T * Y + interface dotu +#ifdef STDLIB_EXTERNAL_BLAS + pure complex(sp) function cdotu(n,cx,incx,cy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(in) :: cx(*),cy(*) + end function cdotu +#else + module procedure stdlib_cdotu +#endif +#:if WITH_QP + module procedure stdlib_wdotu +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure complex(dp) function zdotu(n,zx,incx,zy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(in) :: zx(*),zy(*) + end function zdotu +#else + module procedure stdlib_zdotu +#endif + end interface dotu + + + + !> GBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + interface gbmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,kl,ku,lda,m,n + character, intent(in) :: trans + complex(sp), intent(inout) :: y(*) + end subroutine cgbmv +#else + module procedure stdlib_cgbmv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,kl,ku,lda,m,n + character, intent(in) :: trans + real(dp), intent(inout) :: y(*) + end subroutine dgbmv +#else + module procedure stdlib_dgbmv +#endif +#:if WITH_QP + module procedure stdlib_qgbmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,kl,ku,lda,m,n + character, intent(in) :: trans + real(sp), intent(inout) :: y(*) + end subroutine sgbmv +#else + module procedure stdlib_sgbmv +#endif +#:if WITH_QP + module procedure stdlib_wgbmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,kl,ku,lda,m,n + character, intent(in) :: trans + complex(dp), intent(inout) :: y(*) + end subroutine zgbmv +#else + module procedure stdlib_zgbmv +#endif + end interface gbmv + + + + !> GEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T or op( X ) = X**H, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + interface gemm +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,m,n + character, intent(in) :: transa,transb + complex(sp), intent(inout) :: c(ldc,*) + end subroutine cgemm +#else + module procedure stdlib_cgemm +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,m,n + character, intent(in) :: transa,transb + real(dp), intent(inout) :: c(ldc,*) + end subroutine dgemm +#else + module procedure stdlib_dgemm +#endif +#:if WITH_QP + module procedure stdlib_qgemm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,m,n + character, intent(in) :: transa,transb + real(sp), intent(inout) :: c(ldc,*) + end subroutine sgemm +#else + module procedure stdlib_sgemm +#endif +#:if WITH_QP + module procedure stdlib_wgemm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,m,n + character, intent(in) :: transa,transb + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zgemm +#else + module procedure stdlib_zgemm +#endif + end interface gemm + + + + !> GEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + interface gemv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + character, intent(in) :: trans + complex(sp), intent(inout) :: y(*) + end subroutine cgemv +#else + module procedure stdlib_cgemv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + character, intent(in) :: trans + real(dp), intent(inout) :: y(*) + end subroutine dgemv +#else + module procedure stdlib_dgemv +#endif +#:if WITH_QP + module procedure stdlib_qgemv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + character, intent(in) :: trans + real(sp), intent(inout) :: y(*) + end subroutine sgemv +#else + module procedure stdlib_sgemv +#endif +#:if WITH_QP + module procedure stdlib_wgemv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + character, intent(in) :: trans + complex(dp), intent(inout) :: y(*) + end subroutine zgemv +#else + module procedure stdlib_zgemv +#endif + end interface gemv + + + + !> GER: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + interface ger +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dger(m,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dger +#else + module procedure stdlib_dger +#endif +#:if WITH_QP + module procedure stdlib_qger +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sger(m,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + real(sp), intent(inout) :: a(lda,*) + end subroutine sger +#else + module procedure stdlib_sger +#endif + end interface ger + + + + !> GERC: performs the rank 1 operation + !> A := alpha*x*y**H + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + interface gerc +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cgerc(m,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cgerc +#else + module procedure stdlib_cgerc +#endif +#:if WITH_QP + module procedure stdlib_wgerc +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zgerc(m,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zgerc +#else + module procedure stdlib_zgerc +#endif + end interface gerc + + + + !> GERU: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + interface geru +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cgeru(m,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cgeru +#else + module procedure stdlib_cgeru +#endif +#:if WITH_QP + module procedure stdlib_wgeru +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zgeru(m,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zgeru +#else + module procedure stdlib_zgeru +#endif + end interface geru + + + + !> HBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian band matrix, with k super-diagonals. + interface hbmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine chbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,k,lda,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: y(*) + end subroutine chbmv +#else + module procedure stdlib_chbmv +#endif +#:if WITH_QP + module procedure stdlib_whbmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zhbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,k,lda,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: y(*) + end subroutine zhbmv +#else + module procedure stdlib_zhbmv +#endif + end interface hbmv + + + + !> HEMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is an hermitian matrix and B and + !> C are m by n matrices. + interface hemm +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine chemm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + character, intent(in) :: side,uplo + complex(sp), intent(inout) :: c(ldc,*) + end subroutine chemm +#else + module procedure stdlib_chemm +#endif +#:if WITH_QP + module procedure stdlib_whemm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zhemm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + character, intent(in) :: side,uplo + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zhemm +#else + module procedure stdlib_zhemm +#endif + end interface hemm + + + + !> HEMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix. + interface hemv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine chemv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: y(*) + end subroutine chemv +#else + module procedure stdlib_chemv +#endif +#:if WITH_QP + module procedure stdlib_whemv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zhemv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: y(*) + end subroutine zhemv +#else + module procedure stdlib_zhemv +#endif + end interface hemv + + + + !> HER: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix. + interface her +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cher(uplo,n,alpha,x,incx,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: x(*) + end subroutine cher +#else + module procedure stdlib_cher +#endif +#:if WITH_QP + module procedure stdlib_wher +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zher(uplo,n,alpha,x,incx,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: x(*) + end subroutine zher +#else + module procedure stdlib_zher +#endif + end interface her + + + + !> HER2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n hermitian matrix. + interface her2 +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cher2(uplo,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: a(lda,*) + end subroutine cher2 +#else + module procedure stdlib_cher2 +#endif +#:if WITH_QP + module procedure stdlib_wher2 +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zher2(uplo,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: a(lda,*) + end subroutine zher2 +#else + module procedure stdlib_zher2 +#endif + end interface her2 + + + + !> HER2K: performs one of the hermitian rank 2k operations + !> C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, + !> or + !> C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, + !> where alpha and beta are scalars with beta real, C is an n by n + !> hermitian matrix and A and B are n by k matrices in the first case + !> and k by n matrices in the second case. + interface her2k +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cher2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,a(lda,*),b(ldb,*) + real(sp), intent(in) :: beta + integer(ilp), intent(in) :: k,lda,ldb,ldc,n + character, intent(in) :: trans,uplo + complex(sp), intent(inout) :: c(ldc,*) + end subroutine cher2k +#else + module procedure stdlib_cher2k +#endif +#:if WITH_QP + module procedure stdlib_wher2k +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zher2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,a(lda,*),b(ldb,*) + real(dp), intent(in) :: beta + integer(ilp), intent(in) :: k,lda,ldb,ldc,n + character, intent(in) :: trans,uplo + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zher2k +#else + module procedure stdlib_zher2k +#endif + end interface her2k + + + + !> HERK: performs one of the hermitian rank k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n by n hermitian + !> matrix and A is an n by k matrix in the first case and a k by n + !> matrix in the second case. + interface herk +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cherk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: k,lda,ldc,n + character, intent(in) :: trans,uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: c(ldc,*) + end subroutine cherk +#else + module procedure stdlib_cherk +#endif +#:if WITH_QP + module procedure stdlib_wherk +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zherk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: k,lda,ldc,n + character, intent(in) :: trans,uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zherk +#else + module procedure stdlib_zherk +#endif + end interface herk + + + + !> HPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix, supplied in packed form. + interface hpmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine chpmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,ap(*),x(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: y(*) + end subroutine chpmv +#else + module procedure stdlib_chpmv +#endif +#:if WITH_QP + module procedure stdlib_whpmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zhpmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,ap(*),x(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: y(*) + end subroutine zhpmv +#else + module procedure stdlib_zhpmv +#endif + end interface hpmv + + + + !> HPR: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix, supplied in packed form. + interface hpr +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine chpr(uplo,n,alpha,x,incx,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(in) :: x(*) + end subroutine chpr +#else + module procedure stdlib_chpr +#endif +#:if WITH_QP + module procedure stdlib_whpr +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zhpr(uplo,n,alpha,x,incx,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(in) :: x(*) + end subroutine zhpr +#else + module procedure stdlib_zhpr +#endif + end interface hpr + + + + !> HPR2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n hermitian matrix, supplied in packed form. + interface hpr2 +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine chpr2(uplo,n,alpha,x,incx,y,incy,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: ap(*) + end subroutine chpr2 +#else + module procedure stdlib_chpr2 +#endif +#:if WITH_QP + module procedure stdlib_whpr2 +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zhpr2(uplo,n,alpha,x,incx,y,incy,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: ap(*) + end subroutine zhpr2 +#else + module procedure stdlib_zhpr2 +#endif + end interface hpr2 + + + + !> ! + !> + !> NRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> NRM2 := sqrt( x'*x ) + interface nrm2 +#ifdef STDLIB_EXTERNAL_BLAS + pure real(dp) function dnrm2( n, x, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(dp), intent(in) :: x(*) + end function dnrm2 +#else + module procedure stdlib_dnrm2 +#endif +#:if WITH_QP + module procedure stdlib_qnrm2 +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure real(sp) function snrm2( n, x, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(sp), intent(in) :: x(*) + end function snrm2 +#else + module procedure stdlib_snrm2 +#endif + end interface nrm2 + + + + !> ROT: applies a plane rotation. + interface rot +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine drot(n,dx,incx,dy,incy,c,s) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: c,s + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(inout) :: dx(*),dy(*) + end subroutine drot +#else + module procedure stdlib_drot +#endif +#:if WITH_QP + module procedure stdlib_qrot +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine srot(n,sx,incx,sy,incy,c,s) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: c,s + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(inout) :: sx(*),sy(*) + end subroutine srot +#else + module procedure stdlib_srot +#endif + end interface rot + + + + !> ! + !> + !> The computation uses the formulas + !> |x| = sqrt( Re(x)**2 + Im(x)**2 ) + !> sgn(x) = x / |x| if x /= 0 + !> = 1 if x = 0 + !> c = |a| / sqrt(|a|**2 + |b|**2) + !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) + !> When a and b are real and r /= 0, the formulas simplify to + !> r = sgn(a)*sqrt(|a|**2 + |b|**2) + !> c = a / r + !> s = b / r + !> the same as in SROTG when |a| > |b|. When |b| >= |a|, the + !> sign of c and s will be different from those computed by SROTG + !> if the signs of a and b are not the same. + interface rotg +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine crotg( a, b, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(out) :: c + complex(sp), intent(inout) :: a + complex(sp), intent(in) :: b + complex(sp), intent(out) :: s + end subroutine crotg +#else + module procedure stdlib_crotg +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine drotg( a, b, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(inout) :: a,b + real(dp), intent(out) :: c,s + end subroutine drotg +#else + module procedure stdlib_drotg +#endif +#:if WITH_QP + module procedure stdlib_qrotg +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine srotg( a, b, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(inout) :: a,b + real(sp), intent(out) :: c,s + end subroutine srotg +#else + module procedure stdlib_srotg +#endif +#:if WITH_QP + module procedure stdlib_wrotg +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zrotg( a, b, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(out) :: c + complex(dp), intent(inout) :: a + complex(dp), intent(in) :: b + complex(dp), intent(out) :: s + end subroutine zrotg +#else + module procedure stdlib_zrotg +#endif + end interface rotg + + + + !> APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX + !> (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN + !> (DY**T) + !> DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX >= 0, ELSE + !> LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. + !> WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> DFLAG=-1._dp DFLAG=0._dp DFLAG=1._dp DFLAG=-2.D0 + !> (DH11 DH12) (1._dp DH12) (DH11 1._dp) (1._dp 0._dp) + !> H=( ) ( ) ( ) ( ) + !> (DH21 DH22), (DH21 1._dp), (-1._dp DH22), (0._dp 1._dp). + !> SEE ROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. + interface rotm +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine drotm(n,dx,incx,dy,incy,dparam) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(in) :: dparam(5) + real(dp), intent(inout) :: dx(*),dy(*) + end subroutine drotm +#else + module procedure stdlib_drotm +#endif +#:if WITH_QP + module procedure stdlib_qrotm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine srotm(n,sx,incx,sy,incy,sparam) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(in) :: sparam(5) + real(sp), intent(inout) :: sx(*),sy(*) + end subroutine srotm +#else + module procedure stdlib_srotm +#endif + end interface rotm + + + + !> CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS + !> THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(DD1)*DX1,SQRT(DD2) DY2)**T. + !> WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> DFLAG=-1._dp DFLAG=0._dp DFLAG=1._dp DFLAG=-2.D0 + !> (DH11 DH12) (1._dp DH12) (DH11 1._dp) (1._dp 0._dp) + !> H=( ) ( ) ( ) ( ) + !> (DH21 DH22), (DH21 1._dp), (-1._dp DH22), (0._dp 1._dp). + !> LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 + !> RESPECTIVELY. (VALUES OF 1._dp, -1._dp, OR 0._dp IMPLIED BY THE + !> VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) + !> THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE + !> INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE + !> OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. + interface rotmg +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine drotmg(dd1,dd2,dx1,dy1,dparam) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(inout) :: dd1,dd2,dx1 + real(dp), intent(in) :: dy1 + real(dp), intent(out) :: dparam(5) + end subroutine drotmg +#else + module procedure stdlib_drotmg +#endif +#:if WITH_QP + module procedure stdlib_qrotmg +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine srotmg(sd1,sd2,sx1,sy1,sparam) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(inout) :: sd1,sd2,sx1 + real(sp), intent(in) :: sy1 + real(sp), intent(out) :: sparam(5) + end subroutine srotmg +#else + module procedure stdlib_srotmg +#endif + end interface rotmg + + + + !> SBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric band matrix, with k super-diagonals. + interface sbmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,k,lda,n + character, intent(in) :: uplo + real(dp), intent(inout) :: y(*) + end subroutine dsbmv +#else + module procedure stdlib_dsbmv +#endif +#:if WITH_QP + module procedure stdlib_qsbmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,k,lda,n + character, intent(in) :: uplo + real(sp), intent(inout) :: y(*) + end subroutine ssbmv +#else + module procedure stdlib_ssbmv +#endif + end interface sbmv + + + + !> SCAL: scales a vector by a constant. + interface scal +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cscal(n,ca,cx,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: ca + integer(ilp), intent(in) :: incx,n + complex(sp), intent(inout) :: cx(*) + end subroutine cscal +#else + module procedure stdlib_cscal +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dscal(n,da,dx,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: da + integer(ilp), intent(in) :: incx,n + real(dp), intent(inout) :: dx(*) + end subroutine dscal +#else + module procedure stdlib_dscal +#endif +#:if WITH_QP + module procedure stdlib_qscal +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sscal(n,sa,sx,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: sa + integer(ilp), intent(in) :: incx,n + real(sp), intent(inout) :: sx(*) + end subroutine sscal +#else + module procedure stdlib_sscal +#endif +#:if WITH_QP + module procedure stdlib_wscal +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zscal(n,za,zx,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: za + integer(ilp), intent(in) :: incx,n + complex(dp), intent(inout) :: zx(*) + end subroutine zscal +#else + module procedure stdlib_zscal +#endif + end interface scal + + + + !> Compute the inner product of two vectors with extended + !> precision accumulation and result. + !> Returns D.P. dot product accumulated in D.P., for S.P. SX and SY + !> SDOT: = sum for I = 0 to N-1 of SX(LX+I*INCX) * SY(LY+I*INCY), + !> where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, and LY is + !> defined in a similar way using INCY. + interface sdot +#ifdef STDLIB_EXTERNAL_BLAS + pure real(dp) function dsdot(n,sx,incx,sy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(in) :: sx(*),sy(*) + end function dsdot +#else + module procedure stdlib_dsdot +#endif +#:if WITH_QP + module procedure stdlib_qsdot +#:endif + end interface sdot + + + + !> SPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + interface spmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dspmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,ap(*),x(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + real(dp), intent(inout) :: y(*) + end subroutine dspmv +#else + module procedure stdlib_dspmv +#endif +#:if WITH_QP + module procedure stdlib_qspmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sspmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,ap(*),x(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + real(sp), intent(inout) :: y(*) + end subroutine sspmv +#else + module procedure stdlib_sspmv +#endif + end interface spmv + + + + !> SPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + interface spr +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dspr(uplo,n,alpha,x,incx,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,x(*) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: uplo + real(dp), intent(inout) :: ap(*) + end subroutine dspr +#else + module procedure stdlib_dspr +#endif +#:if WITH_QP + module procedure stdlib_qspr +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sspr(uplo,n,alpha,x,incx,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,x(*) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: uplo + real(sp), intent(inout) :: ap(*) + end subroutine sspr +#else + module procedure stdlib_sspr +#endif + end interface spr + + + + !> SPR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n symmetric matrix, supplied in packed form. + interface spr2 +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dspr2(uplo,n,alpha,x,incx,y,incy,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + real(dp), intent(inout) :: ap(*) + end subroutine dspr2 +#else + module procedure stdlib_dspr2 +#endif +#:if WITH_QP + module procedure stdlib_qspr2 +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sspr2(uplo,n,alpha,x,incx,y,incy,ap) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,n + character, intent(in) :: uplo + real(sp), intent(inout) :: ap(*) + end subroutine sspr2 +#else + module procedure stdlib_sspr2 +#endif + end interface spr2 + + + + !> SROT: applies a plane rotation, where the cos and sin (c and s) are real + !> and the vectors cx and cy are complex. + !> jack dongarra, linpack, 3/11/78. + interface srot +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine csrot( n, cx, incx, cy, incy, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(in) :: c,s + complex(sp), intent(inout) :: cx(*),cy(*) + end subroutine csrot +#else + module procedure stdlib_csrot +#endif + end interface srot + + + + !> SSCAL: scales a complex vector by a real constant. + interface sscal +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine csscal(n,sa,cx,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: sa + integer(ilp), intent(in) :: incx,n + complex(sp), intent(inout) :: cx(*) + end subroutine csscal +#else + module procedure stdlib_csscal +#endif + end interface sscal + + + + !> SWAP: interchanges two vectors. + interface swap +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine cswap(n,cx,incx,cy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(inout) :: cx(*),cy(*) + end subroutine cswap +#else + module procedure stdlib_cswap +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dswap(n,dx,incx,dy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(inout) :: dx(*),dy(*) + end subroutine dswap +#else + module procedure stdlib_dswap +#endif +#:if WITH_QP + module procedure stdlib_qswap +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine sswap(n,sx,incx,sy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(inout) :: sx(*),sy(*) + end subroutine sswap +#else + module procedure stdlib_sswap +#endif +#:if WITH_QP + module procedure stdlib_wswap +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zswap(n,zx,incx,zy,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(inout) :: zx(*),zy(*) + end subroutine zswap +#else + module procedure stdlib_zswap +#endif + end interface swap + + + + !> SYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + interface symm +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine csymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + character, intent(in) :: side,uplo + complex(sp), intent(inout) :: c(ldc,*) + end subroutine csymm +#else + module procedure stdlib_csymm +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + character, intent(in) :: side,uplo + real(dp), intent(inout) :: c(ldc,*) + end subroutine dsymm +#else + module procedure stdlib_dsymm +#endif +#:if WITH_QP + module procedure stdlib_qsymm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + character, intent(in) :: side,uplo + real(sp), intent(inout) :: c(ldc,*) + end subroutine ssymm +#else + module procedure stdlib_ssymm +#endif +#:if WITH_QP + module procedure stdlib_wsymm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zsymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + character, intent(in) :: side,uplo + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zsymm +#else + module procedure stdlib_zsymm +#endif + end interface symm + + + + !> SYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + interface symv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsymv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + real(dp), intent(inout) :: y(*) + end subroutine dsymv +#else + module procedure stdlib_dsymv +#endif +#:if WITH_QP + module procedure stdlib_qsymv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssymv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + real(sp), intent(inout) :: y(*) + end subroutine ssymv +#else + module procedure stdlib_ssymv +#endif + end interface symv + + + + !> SYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + interface syr +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsyr(uplo,n,alpha,x,incx,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,x(*) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: uplo + real(dp), intent(inout) :: a(lda,*) + end subroutine dsyr +#else + module procedure stdlib_dsyr +#endif +#:if WITH_QP + module procedure stdlib_qsyr +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssyr(uplo,n,alpha,x,incx,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,x(*) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: uplo + real(sp), intent(inout) :: a(lda,*) + end subroutine ssyr +#else + module procedure stdlib_ssyr +#endif + end interface syr + + + + !> SYR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n symmetric matrix. + interface syr2 +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsyr2(uplo,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + real(dp), intent(inout) :: a(lda,*) + end subroutine dsyr2 +#else + module procedure stdlib_dsyr2 +#endif +#:if WITH_QP + module procedure stdlib_qsyr2 +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssyr2(uplo,n,alpha,x,incx,y,incy,a,lda) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,x(*),y(*) + integer(ilp), intent(in) :: incx,incy,lda,n + character, intent(in) :: uplo + real(sp), intent(inout) :: a(lda,*) + end subroutine ssyr2 +#else + module procedure stdlib_ssyr2 +#endif + end interface syr2 + + + + !> SYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + interface syr2k +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine csyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,n + character, intent(in) :: trans,uplo + complex(sp), intent(inout) :: c(ldc,*) + end subroutine csyr2k +#else + module procedure stdlib_csyr2k +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,n + character, intent(in) :: trans,uplo + real(dp), intent(inout) :: c(ldc,*) + end subroutine dsyr2k +#else + module procedure stdlib_dsyr2k +#endif +#:if WITH_QP + module procedure stdlib_qsyr2k +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,n + character, intent(in) :: trans,uplo + real(sp), intent(inout) :: c(ldc,*) + end subroutine ssyr2k +#else + module procedure stdlib_ssyr2k +#endif +#:if WITH_QP + module procedure stdlib_wsyr2k +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zsyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*),b(ldb,*) + integer(ilp), intent(in) :: k,lda,ldb,ldc,n + character, intent(in) :: trans,uplo + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zsyr2k +#else + module procedure stdlib_zsyr2k +#endif + end interface syr2k + + + + !> SYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + interface syrk +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine csyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,beta,a(lda,*) + integer(ilp), intent(in) :: k,lda,ldc,n + character, intent(in) :: trans,uplo + complex(sp), intent(inout) :: c(ldc,*) + end subroutine csyrk +#else + module procedure stdlib_csyrk +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dsyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*) + integer(ilp), intent(in) :: k,lda,ldc,n + character, intent(in) :: trans,uplo + real(dp), intent(inout) :: c(ldc,*) + end subroutine dsyrk +#else + module procedure stdlib_dsyrk +#endif +#:if WITH_QP + module procedure stdlib_qsyrk +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ssyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*) + integer(ilp), intent(in) :: k,lda,ldc,n + character, intent(in) :: trans,uplo + real(sp), intent(inout) :: c(ldc,*) + end subroutine ssyrk +#else + module procedure stdlib_ssyrk +#endif +#:if WITH_QP + module procedure stdlib_wsyrk +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine zsyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,beta,a(lda,*) + integer(ilp), intent(in) :: k,lda,ldc,n + character, intent(in) :: trans,uplo + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zsyrk +#else + module procedure stdlib_zsyrk +#endif + end interface syrk + + + + !> TBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + interface tbmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctbmv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + end subroutine ctbmv +#else + module procedure stdlib_ctbmv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtbmv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + end subroutine dtbmv +#else + module procedure stdlib_dtbmv +#endif +#:if WITH_QP + module procedure stdlib_qtbmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine stbmv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + end subroutine stbmv +#else + module procedure stdlib_stbmv +#endif +#:if WITH_QP + module procedure stdlib_wtbmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztbmv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + end subroutine ztbmv +#else + module procedure stdlib_ztbmv +#endif + end interface tbmv + + + + !> TBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + interface tbsv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctbsv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + end subroutine ctbsv +#else + module procedure stdlib_ctbsv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtbsv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + end subroutine dtbsv +#else + module procedure stdlib_dtbsv +#endif +#:if WITH_QP + module procedure stdlib_qtbsv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine stbsv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + end subroutine stbsv +#else + module procedure stdlib_stbsv +#endif +#:if WITH_QP + module procedure stdlib_wtbsv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztbsv(uplo,trans,diag,n,k,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k,lda,n + character, intent(in) :: diag,trans,uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + end subroutine ztbsv +#else + module procedure stdlib_ztbsv +#endif + end interface tbsv + + + + !> TPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + interface tpmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctpmv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: x(*) + end subroutine ctpmv +#else + module procedure stdlib_ctpmv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtpmv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: x(*) + end subroutine dtpmv +#else + module procedure stdlib_dtpmv +#endif +#:if WITH_QP + module procedure stdlib_qtpmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine stpmv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: x(*) + end subroutine stpmv +#else + module procedure stdlib_stpmv +#endif +#:if WITH_QP + module procedure stdlib_wtpmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztpmv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: x(*) + end subroutine ztpmv +#else + module procedure stdlib_ztpmv +#endif + end interface tpmv + + + + !> TPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + interface tpsv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctpsv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: x(*) + end subroutine ctpsv +#else + module procedure stdlib_ctpsv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtpsv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: x(*) + end subroutine dtpsv +#else + module procedure stdlib_dtpsv +#endif +#:if WITH_QP + module procedure stdlib_qtpsv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine stpsv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: x(*) + end subroutine stpsv +#else + module procedure stdlib_stpsv +#endif +#:if WITH_QP + module procedure stdlib_wtpsv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztpsv(uplo,trans,diag,n,ap,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + character, intent(in) :: diag,trans,uplo + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: x(*) + end subroutine ztpsv +#else + module procedure stdlib_ztpsv +#endif + end interface tpsv + + + + !> TRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ) + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + interface trmm +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + complex(sp), intent(inout) :: b(ldb,*) + end subroutine ctrmm +#else + module procedure stdlib_ctrmm +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + real(dp), intent(inout) :: b(ldb,*) + end subroutine dtrmm +#else + module procedure stdlib_dtrmm +#endif +#:if WITH_QP + module procedure stdlib_qtrmm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine strmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + real(sp), intent(inout) :: b(ldb,*) + end subroutine strmm +#else + module procedure stdlib_strmm +#endif +#:if WITH_QP + module procedure stdlib_wtrmm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + complex(dp), intent(inout) :: b(ldb,*) + end subroutine ztrmm +#else + module procedure stdlib_ztrmm +#endif + end interface trmm + + + + !> TRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + interface trmv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctrmv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + end subroutine ctrmv +#else + module procedure stdlib_ctrmv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtrmv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + end subroutine dtrmv +#else + module procedure stdlib_dtrmv +#endif +#:if WITH_QP + module procedure stdlib_qtrmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine strmv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + end subroutine strmv +#else + module procedure stdlib_strmv +#endif +#:if WITH_QP + module procedure stdlib_wtrmv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztrmv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + end subroutine ztrmv +#else + module procedure stdlib_ztrmv +#endif + end interface trmv + + + + !> TRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + !> The matrix X is overwritten on B. + interface trsm +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + complex(sp), intent(inout) :: b(ldb,*) + end subroutine ctrsm +#else + module procedure stdlib_ctrsm +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + real(dp), intent(inout) :: b(ldb,*) + end subroutine dtrsm +#else + module procedure stdlib_dtrsm +#endif +#:if WITH_QP + module procedure stdlib_qtrsm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine strsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + real(sp), intent(inout) :: b(ldb,*) + end subroutine strsm +#else + module procedure stdlib_strsm +#endif +#:if WITH_QP + module procedure stdlib_wtrsm +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: alpha,a(lda,*) + integer(ilp), intent(in) :: lda,ldb,m,n + character, intent(in) :: diag,side,transa,uplo + complex(dp), intent(inout) :: b(ldb,*) + end subroutine ztrsm +#else + module procedure stdlib_ztrsm +#endif + end interface trsm + + + + !> TRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + interface trsv +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ctrsv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + end subroutine ctrsv +#else + module procedure stdlib_ctrsv +#endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine dtrsv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + end subroutine dtrsv +#else + module procedure stdlib_dtrsv +#endif +#:if WITH_QP + module procedure stdlib_qtrsv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine strsv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + end subroutine strsv +#else + module procedure stdlib_strsv +#endif +#:if WITH_QP + module procedure stdlib_wtrsv +#:endif +#ifdef STDLIB_EXTERNAL_BLAS + pure subroutine ztrsv(uplo,trans,diag,n,a,lda,x,incx) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,lda,n + character, intent(in) :: diag,trans,uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + end subroutine ztrsv +#else + module procedure stdlib_ztrsv +#endif + end interface trsv + + + + + +end module stdlib_linalg_blas diff --git a/src/stdlib_linalg_blas_aux.fypp b/src/stdlib_linalg_blas_aux.fypp new file mode 100644 index 000000000..20e024487 --- /dev/null +++ b/src/stdlib_linalg_blas_aux.fypp @@ -0,0 +1,460 @@ +#:include "common.fypp" +module stdlib_linalg_blas_aux + use stdlib_linalg_constants + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_dcabs1 + public :: stdlib_icamax + public :: stdlib_idamax + public :: stdlib_isamax + public :: stdlib_izamax + public :: stdlib_lsame + public :: stdlib_scabs1 + public :: stdlib_xerbla + public :: stdlib_xerbla_array +#:if WITH_QP + public :: stdlib_qcabs1 +#:endif +#:if WITH_QP + public :: stdlib_iqamax +#:endif +#:if WITH_QP + public :: stdlib_iwamax +#:endif + + + contains + + !> DCABS1: computes |Re(.)| + |Im(.)| of a double complex number + + pure real(dp) function stdlib_dcabs1(z) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: z + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: abs,real,aimag + stdlib_dcabs1 = abs(real(z,KIND=dp)) + abs(aimag(z)) + return + end function stdlib_dcabs1 + + !> ISAMAX: finds the index of the first element having maximum absolute value. + + pure integer(ilp) function stdlib_isamax(n,sx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(sp), intent(in) :: sx(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: smax + integer(ilp) :: i, ix + ! Intrinsic Functions + intrinsic :: abs + stdlib_isamax = 0 + if (n<1 .or. incx<=0) return + stdlib_isamax = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + smax = abs(sx(1)) + do i = 2,n + if (abs(sx(i))>smax) then + stdlib_isamax = i + smax = abs(sx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + smax = abs(sx(1)) + ix = ix + incx + do i = 2,n + if (abs(sx(ix))>smax) then + stdlib_isamax = i + smax = abs(sx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_isamax + + !> IZAMAX: finds the index of the first element having maximum |Re(.)| + |Im(.)| + + pure integer(ilp) function stdlib_izamax(n,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(in) :: zx(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dmax + integer(ilp) :: i, ix + stdlib_izamax = 0 + if (n<1 .or. incx<=0) return + stdlib_izamax = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + dmax = stdlib_dcabs1(zx(1)) + do i = 2,n + if (stdlib_dcabs1(zx(i))>dmax) then + stdlib_izamax = i + dmax = stdlib_dcabs1(zx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + dmax = stdlib_dcabs1(zx(1)) + ix = ix + incx + do i = 2,n + if (stdlib_dcabs1(zx(ix))>dmax) then + stdlib_izamax = i + dmax = stdlib_dcabs1(zx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_izamax + + !> LSAME: returns .TRUE. if CA is the same letter as CB regardless of + !> case. + + pure logical(lk) function stdlib_lsame(ca,cb) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ca, cb + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: ichar + ! Local Scalars + integer(ilp) :: inta, intb, zcode + ! test if the characters are equal + stdlib_lsame = ca == cb + if (stdlib_lsame) return + ! now test for equivalence if both characters are alphabetic. + zcode = ichar('Z') + ! use 'z' rather than 'a' so that ascii can be detected on prime + ! machines, on which ichar returns a value with bit 8 set. + ! ichar('a') on prime machines returns 193 which is the same as + ! ichar('a') on an ebcdic machine. + inta = ichar(ca) + intb = ichar(cb) + if (zcode==90 .or. zcode==122) then + ! ascii is assumed - zcode is the ascii code of either lower or + ! upper case 'z'. + if (inta>=97 .and. inta<=122) inta = inta - 32 + if (intb>=97 .and. intb<=122) intb = intb - 32 + else if (zcode==233 .or. zcode==169) then + ! ebcdic is assumed - zcode is the ebcdic code of either lower or + ! upper case 'z'. + if (inta>=129 .and. inta<=137 .or.inta>=145 .and. inta<=153 .or.inta>=162 .and. & + inta<=169) inta = inta + 64 + if (intb>=129 .and. intb<=137 .or.intb>=145 .and. intb<=153 .or.intb>=162 .and. & + intb<=169) intb = intb + 64 + else if (zcode==218 .or. zcode==250) then + ! ascii is assumed, on prime machines - zcode is the ascii code + ! plus 128 of either lower or upper case 'z'. + if (inta>=225 .and. inta<=250) inta = inta - 32 + if (intb>=225 .and. intb<=250) intb = intb - 32 + end if + stdlib_lsame = inta == intb + ! return + end function stdlib_lsame + + !> SCABS1: computes |Re(.)| + |Im(.)| of a complex number + + pure real(sp) function stdlib_scabs1(z) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: z + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: abs,aimag,real + stdlib_scabs1 = abs(real(z,KIND=sp)) + abs(aimag(z)) + return + end function stdlib_scabs1 + + !> XERBLA: is an error handler for the LAPACK routines. + !> It is called by an LAPACK routine if an input parameter has an + !> invalid value. A message is printed and execution stops. + !> Installers may consider modifying the STOP statement in order to + !> call system-specific exception-handling facilities. + + pure subroutine stdlib_xerbla( srname, info ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character(len=*), intent(in) :: srname + integer(ilp), intent(in) :: info + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: len_trim + ! Executable Statements + 9999 format( ' ** ON ENTRY TO ', a, ' PARAMETER NUMBER ', i2, ' HAD ','AN ILLEGAL VALUE' ) + + end subroutine stdlib_xerbla + + !> XERBLA_ARRAY: assists other languages in calling XERBLA, the LAPACK + !> and BLAS error handler. Rather than taking a Fortran string argument + !> as the function's name, XERBLA_ARRAY takes an array of single + !> characters along with the array's length. XERBLA_ARRAY then copies + !> up to 32 characters of that array into a Fortran string and passes + !> that to XERBLA. If called with a non-positive SRNAME_LEN, + !> XERBLA_ARRAY will call XERBLA with a string of all blank characters. + !> Say some macro or other device makes XERBLA_ARRAY available to C99 + !> by a name lapack_xerbla and with a common Fortran calling convention. + !> Then a C99 program could invoke XERBLA via: + !> { + !> int flen = strlen(__func__); + !> lapack_xerbla(__func__, + !> } + !> Providing XERBLA_ARRAY is not necessary for intercepting LAPACK + !> errors. XERBLA_ARRAY calls XERBLA. + + pure subroutine stdlib_xerbla_array(srname_array, srname_len, info) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: srname_len, info + ! Array Arguments + character(1), intent(in) :: srname_array(srname_len) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i + ! Local Arrays + character*32 srname + ! Intrinsic Functions + intrinsic :: min,len + ! Executable Statements + srname = '' + do i = 1, min( srname_len, len( srname ) ) + srname( i:i ) = srname_array( i ) + end do + call stdlib_xerbla( srname, info ) + return + end subroutine stdlib_xerbla_array + +#:if WITH_QP + + !> DCABS1: computes |Re(.)| + |Im(.)| of a double complex number + + pure real(qp) function stdlib_qcabs1(z) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: z + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: abs,real,aimag + stdlib_qcabs1 = abs(real(z,KIND=qp)) + abs(aimag(z)) + return + end function stdlib_qcabs1 +#:endif + +#:if WITH_QP + + !> IDAMAX: finds the index of the first element having maximum absolute value. + + pure integer(ilp) function stdlib_iqamax(n,dx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(qp), intent(in) :: dx(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dmax + integer(ilp) :: i, ix + ! Intrinsic Functions + intrinsic :: abs + stdlib_iqamax = 0 + if (n<1 .or. incx<=0) return + stdlib_iqamax = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + dmax = abs(dx(1)) + do i = 2,n + if (abs(dx(i))>dmax) then + stdlib_iqamax = i + dmax = abs(dx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + dmax = abs(dx(1)) + ix = ix + incx + do i = 2,n + if (abs(dx(ix))>dmax) then + stdlib_iqamax = i + dmax = abs(dx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_iqamax +#:endif + +#:if WITH_QP + + !> IZAMAX: finds the index of the first element having maximum |Re(.)| + |Im(.)| + + pure integer(ilp) function stdlib_iwamax(n,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(in) :: zx(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dmax + integer(ilp) :: i, ix + stdlib_iwamax = 0 + if (n<1 .or. incx<=0) return + stdlib_iwamax = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + dmax = stdlib_qcabs1(zx(1)) + do i = 2,n + if (stdlib_qcabs1(zx(i))>dmax) then + stdlib_iwamax = i + dmax = stdlib_qcabs1(zx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + dmax = stdlib_qcabs1(zx(1)) + ix = ix + incx + do i = 2,n + if (stdlib_qcabs1(zx(ix))>dmax) then + stdlib_iwamax = i + dmax = stdlib_qcabs1(zx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_iwamax +#:endif + + !> ICAMAX: finds the index of the first element having maximum |Re(.)| + |Im(.)| + + pure integer(ilp) function stdlib_icamax(n,cx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(in) :: cx(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: smax + integer(ilp) :: i, ix + stdlib_icamax = 0 + if (n<1 .or. incx<=0) return + stdlib_icamax = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + smax = stdlib_scabs1(cx(1)) + do i = 2,n + if (stdlib_scabs1(cx(i))>smax) then + stdlib_icamax = i + smax = stdlib_scabs1(cx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + smax = stdlib_scabs1(cx(1)) + ix = ix + incx + do i = 2,n + if (stdlib_scabs1(cx(ix))>smax) then + stdlib_icamax = i + smax = stdlib_scabs1(cx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_icamax + + !> IDAMAX: finds the index of the first element having maximum absolute value. + + pure integer(ilp) function stdlib_idamax(n,dx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(dp), intent(in) :: dx(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dmax + integer(ilp) :: i, ix + ! Intrinsic Functions + intrinsic :: abs + stdlib_idamax = 0 + if (n<1 .or. incx<=0) return + stdlib_idamax = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + dmax = abs(dx(1)) + do i = 2,n + if (abs(dx(i))>dmax) then + stdlib_idamax = i + dmax = abs(dx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + dmax = abs(dx(1)) + ix = ix + incx + do i = 2,n + if (abs(dx(ix))>dmax) then + stdlib_idamax = i + dmax = abs(dx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_idamax + + + +end module stdlib_linalg_blas_aux diff --git a/src/stdlib_linalg_blas_c.fypp b/src/stdlib_linalg_blas_c.fypp new file mode 100644 index 000000000..68890f8d4 --- /dev/null +++ b/src/stdlib_linalg_blas_c.fypp @@ -0,0 +1,5061 @@ +#:include "common.fypp" +module stdlib_linalg_blas_c + use stdlib_linalg_constants + use stdlib_linalg_blas_aux + use stdlib_linalg_blas_s + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_caxpy + public :: stdlib_ccopy + public :: stdlib_cdotc + public :: stdlib_cdotu + public :: stdlib_cgbmv + public :: stdlib_cgemm + public :: stdlib_cgemv + public :: stdlib_cgerc + public :: stdlib_cgeru + public :: stdlib_chbmv + public :: stdlib_chemm + public :: stdlib_chemv + public :: stdlib_cher + public :: stdlib_cher2 + public :: stdlib_cher2k + public :: stdlib_cherk + public :: stdlib_chpmv + public :: stdlib_chpr + public :: stdlib_chpr2 + public :: stdlib_crotg + public :: stdlib_cscal + public :: stdlib_csrot + public :: stdlib_csscal + public :: stdlib_cswap + public :: stdlib_csymm + public :: stdlib_csyr2k + public :: stdlib_csyrk + public :: stdlib_ctbmv + public :: stdlib_ctbsv + public :: stdlib_ctpmv + public :: stdlib_ctpsv + public :: stdlib_ctrmm + public :: stdlib_ctrmv + public :: stdlib_ctrsm + public :: stdlib_ctrsv + + ! 32-bit real constants + real(sp), parameter, private :: negone = -1.00_sp + real(sp), parameter, private :: zero = 0.00_sp + real(sp), parameter, private :: half = 0.50_sp + real(sp), parameter, private :: one = 1.00_sp + real(sp), parameter, private :: two = 2.00_sp + real(sp), parameter, private :: three = 3.00_sp + real(sp), parameter, private :: four = 4.00_sp + real(sp), parameter, private :: eight = 8.00_sp + real(sp), parameter, private :: ten = 10.00_sp + + ! 32-bit complex constants + complex(sp), parameter, private :: czero = ( 0.0_sp,0.0_sp) + complex(sp), parameter, private :: chalf = ( 0.5_sp,0.0_sp) + complex(sp), parameter, private :: cone = ( 1.0_sp,0.0_sp) + complex(sp), parameter, private :: cnegone = (-1.0_sp,0.0_sp) + + ! 32-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(sp), parameter, private :: rradix = real(radix(zero),sp) + real(sp), parameter, private :: ulp = epsilon(zero) + real(sp), parameter, private :: eps = ulp*half + real(sp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(sp), parameter, private :: safmax = one/safmin + real(sp), parameter, private :: smlnum = safmin/ulp + real(sp), parameter, private :: bignum = safmax*ulp + real(sp), parameter, private :: rtmin = sqrt(smlnum) + real(sp), parameter, private :: rtmax = sqrt(bignum) + + ! 32-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(sp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(sp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(sp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(sp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> CAXPY: constant times a vector plus a vector. + + pure subroutine stdlib_caxpy(n,ca,cx,incx,cy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: ca + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(sp), intent(in) :: cx(*) + complex(sp), intent(inout) :: cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + if (n<=0) return + if (stdlib_scabs1(ca)==0.0e+0_sp) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + cy(i) = cy(i) + ca*cx(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + cy(iy) = cy(iy) + ca*cx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_caxpy + + !> CCOPY: copies a vector x to a vector y. + + pure subroutine stdlib_ccopy(n,cx,incx,cy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(sp), intent(in) :: cx(*) + complex(sp), intent(out) :: cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + cy(i) = cx(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + cy(iy) = cx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_ccopy + + !> CDOTC: forms the dot product of two complex vectors + !> CDOTC = X^H * Y + + pure complex(sp) function stdlib_cdotc(n,cx,incx,cy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(sp), intent(in) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + complex(sp) :: ctemp + integer(ilp) :: i, ix, iy + ! Intrinsic Functions + intrinsic :: conjg + ctemp = (0.0_sp,0.0_sp) + stdlib_cdotc = (0.0_sp,0.0_sp) + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ctemp = ctemp + conjg(cx(i))*cy(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ctemp = ctemp + conjg(cx(ix))*cy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_cdotc = ctemp + return + end function stdlib_cdotc + + !> CDOTU: forms the dot product of two complex vectors + !> CDOTU = X^T * Y + + pure complex(sp) function stdlib_cdotu(n,cx,incx,cy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(sp), intent(in) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + complex(sp) :: ctemp + integer(ilp) :: i, ix, iy + ctemp = (0.0_sp,0.0_sp) + stdlib_cdotu = (0.0_sp,0.0_sp) + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ctemp = ctemp + cx(i)*cy(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ctemp = ctemp + cx(ix)*cy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_cdotu = ctemp + return + end function stdlib_cdotu + + !> CGBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + pure subroutine stdlib_cgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, kl, ku, lda, m, n + character, intent(in) :: trans + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kup1, kx, ky, lenx, leny + logical(lk) :: noconj + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (kl<0) then + info = 4 + else if (ku<0) then + info = 5 + else if (lda< (kl+ku+1)) then + info = 8 + else if (incx==0) then + info = 10 + else if (incy==0) then + info = 13 + end if + if (info/=0) then + call stdlib_xerbla('CGBMV ',info) + return + end if + ! quick return if possible. + if ((m==0) .or. (n==0) .or.((alpha==czero).and. (beta==cone))) return + noconj = stdlib_lsame(trans,'T') + ! set lenx and leny, the lengths of the vectors x and y, and set + ! up the start points in x and y. + if (stdlib_lsame(trans,'N')) then + lenx = n + leny = m + else + lenx = m + leny = n + end if + if (incx>0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the band part of a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,leny + y(i) = czero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,leny + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + kup1 = ku + 1 + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(i) = y(i) + temp*a(k+i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(iy) = y(iy) + temp*a(k+i,j) + iy = iy + incy + end do + jx = jx + incx + if (j>ku) ky = ky + incy + end do + end if + else + ! form y := alpha*a**t*x + y or y := alpha*a**h*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = czero + k = kup1 - j + if (noconj) then + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(i) + end do + else + do i = max(1,j-ku),min(m,j+kl) + temp = temp + conjg(a(k+i,j))*x(i) + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = czero + ix = kx + k = kup1 - j + if (noconj) then + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(ix) + ix = ix + incx + end do + else + do i = max(1,j-ku),min(m,j+kl) + temp = temp + conjg(a(k+i,j))*x(ix) + ix = ix + incx + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + if (j>ku) kx = kx + incx + end do + end if + end if + return + end subroutine stdlib_cgbmv + + !> CGEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T or op( X ) = X**H, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + pure subroutine stdlib_cgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, m, n + character, intent(in) :: transa, transb + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, j, l, nrowa, nrowb + logical(lk) :: conja, conjb, nota, notb + + + ! set nota and notb as true if a and b respectively are not + ! conjugated or transposed, set conja and conjb as true if a and + ! b respectively are to be transposed but not conjugated and set + ! nrowa and nrowb as the number of rows of a and b respectively. + nota = stdlib_lsame(transa,'N') + notb = stdlib_lsame(transb,'N') + conja = stdlib_lsame(transa,'C') + conjb = stdlib_lsame(transb,'C') + if (nota) then + nrowa = m + else + nrowa = k + end if + if (notb) then + nrowb = k + else + nrowb = n + end if + ! test the input parameters. + info = 0 + if ((.not.nota) .and. (.not.conja) .and.(.not.stdlib_lsame(transa,'T'))) then + info = 1 + else if ((.not.notb) .and. (.not.conjb) .and.(.not.stdlib_lsame(transb,'T'))) & + then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda CGEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + + pure subroutine stdlib_cgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + character, intent(in) :: trans + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky, lenx, leny + logical(lk) :: noconj + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (lda0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,leny + y(i) = czero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,leny + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + do i = 1,m + y(i) = y(i) + temp*a(i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + do i = 1,m + y(iy) = y(iy) + temp*a(i,j) + iy = iy + incy + end do + jx = jx + incx + end do + end if + else + ! form y := alpha*a**t*x + y or y := alpha*a**h*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = czero + if (noconj) then + do i = 1,m + temp = temp + a(i,j)*x(i) + end do + else + do i = 1,m + temp = temp + conjg(a(i,j))*x(i) + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = czero + ix = kx + if (noconj) then + do i = 1,m + temp = temp + a(i,j)*x(ix) + ix = ix + incx + end do + else + do i = 1,m + temp = temp + conjg(a(i,j))*x(ix) + ix = ix + incx + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_cgemv + + !> CGERC: performs the rank 1 operation + !> A := alpha*x*y**H + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_cgerc(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*conjg(y(jy)) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*conjg(y(jy)) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_cgerc + + !> CGERU: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_cgeru(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*y(jy) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*y(jy) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_cgeru + + !> CHBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian band matrix, with k super-diagonals. + + pure subroutine stdlib_chbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, k, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kplus1, kx, ky, l + ! Intrinsic Functions + intrinsic :: conjg,max,min,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (k<0) then + info = 3 + else if (lda< (k+1)) then + info = 6 + else if (incx==0) then + info = 8 + else if (incy==0) then + info = 11 + end if + if (info/=0) then + call stdlib_xerbla('CHBMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==czero).and. (beta==cone))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array a + ! are accessed sequentially with cone pass through a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when upper triangle of a is stored. + kplus1 = k + 1 + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(i) + end do + y(j) = y(j) + temp1*real(a(kplus1,j),KIND=sp) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(a(kplus1,j),KIND=sp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + if (j>k) then + kx = kx + incx + ky = ky + incy + end if + end do + end if + else + ! form y when lower triangle of a is stored. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(a(1,j),KIND=sp) + l = 1 - j + do i = j + 1,min(n,j+k) + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(a(1,j),KIND=sp) + l = 1 - j + ix = jx + iy = jy + do i = j + 1,min(n,j+k) + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_chbmv + + !> CHEMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is an hermitian matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_chemm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda CHEMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix. + + pure subroutine stdlib_chemv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when a is stored in upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + do i = 1,j - 1 + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(i) + end do + y(j) = y(j) + temp1*real(a(j,j),KIND=sp) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + do i = 1,j - 1 + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(a(j,j),KIND=sp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(a(j,j),KIND=sp) + do i = j + 1,n + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(a(j,j),KIND=sp) + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_chemv + + !> CHER: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix. + + pure subroutine stdlib_cher(uplo,n,alpha,x,incx,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (lda CHER2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n hermitian matrix. + + pure subroutine stdlib_cher2(uplo,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + if (stdlib_lsame(uplo,'U')) then + ! form a when a is stored in the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + do i = 1,j - 1 + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + a(j,j) = real(a(j,j),KIND=sp) +real(x(j)*temp1+y(j)*temp2,KIND=sp) + + else + a(j,j) = real(a(j,j),KIND=sp) + end if + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ix = kx + iy = ky + do i = 1,j - 1 + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + a(j,j) = real(a(j,j),KIND=sp) +real(x(jx)*temp1+y(jy)*temp2,KIND=sp) + + else + a(j,j) = real(a(j,j),KIND=sp) + end if + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form a when a is stored in the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + a(j,j) = real(a(j,j),KIND=sp) +real(x(j)*temp1+y(j)*temp2,KIND=sp) + + do i = j + 1,n + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + else + a(j,j) = real(a(j,j),KIND=sp) + end if + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + a(j,j) = real(a(j,j),KIND=sp) +real(x(jx)*temp1+y(jy)*temp2,KIND=sp) + + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + end do + else + a(j,j) = real(a(j,j),KIND=sp) + end if + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_cher2 + + !> CHER2K: performs one of the hermitian rank 2k operations + !> C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, + !> or + !> C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, + !> where alpha and beta are scalars with beta real, C is an n by n + !> hermitian matrix and A and B are n by k matrices in the first case + !> and k by n matrices in the second case. + + pure subroutine stdlib_cher2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + real(sp), intent(in) :: beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'C'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda CHERK: performs one of the hermitian rank k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n by n hermitian + !> matrix and A is an n by k matrix in the first case and a k by n + !> matrix in the second case. + + pure subroutine stdlib_cherk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: cmplx,conjg,max,real + ! Local Scalars + complex(sp) :: temp + real(sp) :: rtemp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'C'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda CHPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_chpmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(in) :: ap(*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! Intrinsic Functions + intrinsic :: conjg,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 6 + else if (incy==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('CHPMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==czero).and. (beta==cone))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form y when ap contains the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + k = kk + do i = 1,j - 1 + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(i) + k = k + 1 + end do + y(j) = y(j) + temp1*real(ap(kk+j-1),KIND=sp) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + do k = kk,kk + j - 2 + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(ap(kk+j-1),KIND=sp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(ap(kk),KIND=sp) + k = kk + 1 + do i = j + 1,n + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(i) + k = k + 1 + end do + y(j) = y(j) + alpha*temp2 + kk = kk + (n-j+1) + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(ap(kk),KIND=sp) + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + (n-j+1) + end do + end if + end if + return + end subroutine stdlib_chpmv + + !> CHPR: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_chpr(uplo,n,alpha,x,incx,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + ! Intrinsic Functions + intrinsic :: conjg,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + end if + if (info/=0) then + call stdlib_xerbla('CHPR ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==real(czero,KIND=sp))) return + ! set the start point in x if the increment is not unity. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = alpha*conjg(x(j)) + k = kk + do i = 1,j - 1 + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) + real(x(j)*temp,KIND=sp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = alpha*conjg(x(jx)) + ix = kx + do k = kk,kk + j - 2 + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) + real(x(jx)*temp,KIND=sp) + + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = alpha*conjg(x(j)) + ap(kk) = real(ap(kk),KIND=sp) + real(temp*x(j),KIND=sp) + k = kk + 1 + do i = j + 1,n + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + else + ap(kk) = real(ap(kk),KIND=sp) + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = alpha*conjg(x(jx)) + ap(kk) = real(ap(kk),KIND=sp) + real(temp*x(jx),KIND=sp) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + ap(k) = ap(k) + x(ix)*temp + end do + else + ap(kk) = real(ap(kk),KIND=sp) + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_chpr + + !> CHPR2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_chpr2(uplo,n,alpha,x,incx,y,incy,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! Intrinsic Functions + intrinsic :: conjg,real + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('CHPR2 ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==czero)) return + ! set up the start points in x and y if the increments are not both + ! unity. + if ((incx/=1) .or. (incy/=1)) then + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + k = kk + do i = 1,j - 1 + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) +real(x(j)*temp1+y(j)*temp2,& + KIND=sp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) + end if + kk = kk + j + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ix = kx + iy = ky + do k = kk,kk + j - 2 + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) +real(x(jx)*temp1+y(jy)*temp2,& + KIND=sp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=sp) + end if + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + ap(kk) = real(ap(kk),KIND=sp) +real(x(j)*temp1+y(j)*temp2,KIND=sp) + + k = kk + 1 + do i = j + 1,n + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + else + ap(kk) = real(ap(kk),KIND=sp) + end if + kk = kk + n - j + 1 + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ap(kk) = real(ap(kk),KIND=sp) +real(x(jx)*temp1+y(jy)*temp2,KIND=sp) + + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + end do + else + ap(kk) = real(ap(kk),KIND=sp) + end if + jx = jx + incx + jy = jy + incy + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_chpr2 + + !> ! + !> + !> The computation uses the formulas + !> |x| = sqrt( Re(x)**2 + Im(x)**2 ) + !> sgn(x) = x / |x| if x /= 0 + !> = 1 if x = 0 + !> c = |a| / sqrt(|a|**2 + |b|**2) + !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) + !> When a and b are real and r /= 0, the formulas simplify to + !> r = sgn(a)*sqrt(|a|**2 + |b|**2) + !> c = a / r + !> s = b / r + !> the same as in SROTG when |a| > |b|. When |b| >= |a|, the + !> sign of c and s will be different from those computed by SROTG + !> if the signs of a and b are not the same. + + pure subroutine stdlib_crotg( a, b, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Constants + integer, parameter :: wp = kind(1._sp) + ! Scaling Constants + ! Scalar Arguments + real(sp), intent(out) :: c + complex(sp), intent(inout) :: a + complex(sp), intent(in) :: b + complex(sp), intent(out) :: s + ! Local Scalars + real(sp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + complex(sp) :: f, fs, g, gs, r, t + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(sp) :: abssq + ! Statement Function Definitions + abssq( t ) = real( t,KIND=sp)**2 + aimag( t )**2 + ! Executable Statements + f = a + g = b + if( g == czero ) then + c = one + s = czero + r = f + else if( f == czero ) then + c = zero + g1 = max( abs(real(g,KIND=sp)), abs(aimag(g)) ) + if( g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + g2 = abssq( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else + ! use scaled algorithm + u = min( safmax, max( safmin, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if + else + f1 = max( abs(real(f,KIND=sp)), abs(aimag(f)) ) + g1 = max( abs(real(g,KIND=sp)), abs(aimag(g)) ) + if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + f2 = abssq( f ) + g2 = abssq( g ) + h2 = f2 + g2 + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = f2*p + s = conjg( g )*( f*p ) + r = f*( h2*p ) + else + ! use scaled algorithm + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + if( f1*uu < rtmin ) then + ! f is not well-scaled when scaled by g1. + ! use a different scaling for f. + v = min( safmax, max( safmin, f1 ) ) + vv = one / v + w = v * uu + fs = f*vv + f2 = abssq( fs ) + h2 = f2*w**2 + g2 + else + ! otherwise use the same scaling for f and g. + w = one + fs = f*uu + f2 = abssq( fs ) + h2 = f2 + g2 + end if + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = ( f2*p )*w + s = conjg( gs )*( fs*p ) + r = ( fs*( h2*p ) )*u + end if + end if + a = r + return + end subroutine stdlib_crotg + + !> CSCAL: scales a vector by a constant. + + pure subroutine stdlib_cscal(n,ca,cx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: ca + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(inout) :: cx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + cx(i) = ca*cx(i) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + cx(i) = ca*cx(i) + end do + end if + return + end subroutine stdlib_cscal + + !> CSROT: applies a plane rotation, where the cos and sin (c and s) are real + !> and the vectors cx and cy are complex. + !> jack dongarra, linpack, 3/11/78. + + pure subroutine stdlib_csrot( n, cx, incx, cy, incy, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(sp), intent(in) :: c, s + ! Array Arguments + complex(sp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(sp) :: ctemp + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 ) then + ! code for both increments equal to 1 + do i = 1, n + ctemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - s*cx( i ) + cx( i ) = ctemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + ctemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - s*cx( ix ) + cx( ix ) = ctemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_csrot + + !> CSSCAL: scales a complex vector by a real constant. + + pure subroutine stdlib_csscal(n,sa,cx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: sa + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(inout) :: cx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + ! Intrinsic Functions + intrinsic :: aimag,cmplx,real + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + cx(i) = cmplx(sa*real(cx(i),KIND=sp),sa*aimag(cx(i)),KIND=sp) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + cx(i) = cmplx(sa*real(cx(i),KIND=sp),sa*aimag(cx(i)),KIND=sp) + end do + end if + return + end subroutine stdlib_csscal + + !> CSWAP: interchanges two vectors. + + pure subroutine stdlib_cswap(n,cx,incx,cy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(sp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + complex(sp) :: ctemp + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ctemp = cx(i) + cx(i) = cy(i) + cy(i) = ctemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ctemp = cx(ix) + cx(ix) = cy(iy) + cy(iy) = ctemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_cswap + + !> CSYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_csymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda CSYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + + pure subroutine stdlib_csyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(sp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda CSYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + + pure subroutine stdlib_csyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda CTBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + pure subroutine stdlib_ctbmv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('CTBMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := a*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(kplus1,j) + end if + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*a(kplus1,j) + end if + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(1,j) + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*a(1,j) + end if + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + else + ! form x := a**t*x or x := a**h*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = kplus1 - j + if (noconj) then + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(i) + end do + else + if (nounit) temp = temp*conjg(a(kplus1,j)) + do i = j - 1,max(1,j-k),-1 + temp = temp + conjg(a(l+i,j))*x(i) + end do + end if + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + kx = kx - incx + ix = kx + l = kplus1 - j + if (noconj) then + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(ix) + ix = ix - incx + end do + else + if (nounit) temp = temp*conjg(a(kplus1,j)) + do i = j - 1,max(1,j-k),-1 + temp = temp + conjg(a(l+i,j))*x(ix) + ix = ix - incx + end do + end if + x(jx) = temp + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + temp = x(j) + l = 1 - j + if (noconj) then + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(i) + end do + else + if (nounit) temp = temp*conjg(a(1,j)) + do i = j + 1,min(n,j+k) + temp = temp + conjg(a(l+i,j))*x(i) + end do + end if + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + kx = kx + incx + ix = kx + l = 1 - j + if (noconj) then + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(ix) + ix = ix + incx + end do + else + if (nounit) temp = temp*conjg(a(1,j)) + do i = j + 1,min(n,j+k) + temp = temp + conjg(a(l+i,j))*x(ix) + ix = ix + incx + end do + end if + x(jx) = temp + jx = jx + incx + end do + end if + end if + end if + return + end subroutine stdlib_ctbmv + + !> CTBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_ctbsv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('CTBSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed by sequentially with cone pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + l = kplus1 - j + if (nounit) x(j) = x(j)/a(kplus1,j) + temp = x(j) + do i = j - 1,max(1,j-k),-1 + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + kx = kx - incx + if (x(jx)/=czero) then + ix = kx + l = kplus1 - j + if (nounit) x(jx) = x(jx)/a(kplus1,j) + temp = x(jx) + do i = j - 1,max(1,j-k),-1 + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix - incx + end do + end if + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + l = 1 - j + if (nounit) x(j) = x(j)/a(1,j) + temp = x(j) + do i = j + 1,min(n,j+k) + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + jx = kx + do j = 1,n + kx = kx + incx + if (x(jx)/=czero) then + ix = kx + l = 1 - j + if (nounit) x(jx) = x(jx)/a(1,j) + temp = x(jx) + do i = j + 1,min(n,j+k) + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix + incx + end do + end if + jx = jx + incx + end do + end if + end if + else + ! form x := inv( a**t )*x or x := inv( a**h )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + temp = x(j) + l = kplus1 - j + if (noconj) then + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(kplus1,j) + else + do i = max(1,j-k),j - 1 + temp = temp - conjg(a(l+i,j))*x(i) + end do + if (nounit) temp = temp/conjg(a(kplus1,j)) + end if + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + l = kplus1 - j + if (noconj) then + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/a(kplus1,j) + else + do i = max(1,j-k),j - 1 + temp = temp - conjg(a(l+i,j))*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/conjg(a(kplus1,j)) + end if + x(jx) = temp + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = 1 - j + if (noconj) then + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(1,j) + else + do i = min(n,j+k),j + 1,-1 + temp = temp - conjg(a(l+i,j))*x(i) + end do + if (nounit) temp = temp/conjg(a(1,j)) + end if + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + l = 1 - j + if (noconj) then + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/a(1,j) + else + do i = min(n,j+k),j + 1,-1 + temp = temp - conjg(a(l+i,j))*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/conjg(a(1,j)) + end if + x(jx) = temp + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + end if + return + end subroutine stdlib_ctbsv + + !> CTPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + + pure subroutine stdlib_ctpmv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('CTPMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with cone pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x:= a*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = x(j) + k = kk + do i = 1,j - 1 + x(i) = x(i) + temp*ap(k) + k = k + 1 + end do + if (nounit) x(j) = x(j)*ap(kk+j-1) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + x(ix) = x(ix) + temp*ap(k) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*ap(kk+j-1) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + temp = x(j) + k = kk + do i = n,j + 1,-1 + x(i) = x(i) + temp*ap(k) + k = k - 1 + end do + if (nounit) x(j) = x(j)*ap(kk-n+j) + end if + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + x(ix) = x(ix) + temp*ap(k) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*ap(kk-n+j) + end if + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + else + ! form x := a**t*x or x := a**h*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk - 1 + if (noconj) then + if (nounit) temp = temp*ap(kk) + do i = j - 1,1,-1 + temp = temp + ap(k)*x(i) + k = k - 1 + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do i = j - 1,1,-1 + temp = temp + conjg(ap(k))*x(i) + k = k - 1 + end do + end if + x(j) = temp + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + temp = x(jx) + ix = jx + if (noconj) then + if (nounit) temp = temp*ap(kk) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + ap(k)*x(ix) + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + conjg(ap(k))*x(ix) + end do + end if + x(jx) = temp + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + 1 + if (noconj) then + if (nounit) temp = temp*ap(kk) + do i = j + 1,n + temp = temp + ap(k)*x(i) + k = k + 1 + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do i = j + 1,n + temp = temp + conjg(ap(k))*x(i) + k = k + 1 + end do + end if + x(j) = temp + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = jx + if (noconj) then + if (nounit) temp = temp*ap(kk) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + ap(k)*x(ix) + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + conjg(ap(k))*x(ix) + end do + end if + x(jx) = temp + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_ctpmv + + !> CTPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_ctpsv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('CTPSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with cone pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk - 1 + do i = j - 1,1,-1 + x(i) = x(i) - temp*ap(k) + k = k - 1 + end do + end if + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + if (x(jx)/=czero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk + 1 + do i = j + 1,n + x(i) = x(i) - temp*ap(k) + k = k + 1 + end do + end if + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + else + ! form x := inv( a**t )*x or x := inv( a**h )*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + if (noconj) then + do i = 1,j - 1 + temp = temp - ap(k)*x(i) + k = k + 1 + end do + if (nounit) temp = temp/ap(kk+j-1) + else + do i = 1,j - 1 + temp = temp - conjg(ap(k))*x(i) + k = k + 1 + end do + if (nounit) temp = temp/conjg(ap(kk+j-1)) + end if + x(j) = temp + kk = kk + j + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + if (noconj) then + do k = kk,kk + j - 2 + temp = temp - ap(k)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/ap(kk+j-1) + else + do k = kk,kk + j - 2 + temp = temp - conjg(ap(k))*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/conjg(ap(kk+j-1)) + end if + x(jx) = temp + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk + if (noconj) then + do i = n,j + 1,-1 + temp = temp - ap(k)*x(i) + k = k - 1 + end do + if (nounit) temp = temp/ap(kk-n+j) + else + do i = n,j + 1,-1 + temp = temp - conjg(ap(k))*x(i) + k = k - 1 + end do + if (nounit) temp = temp/conjg(ap(kk-n+j)) + end if + x(j) = temp + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + if (noconj) then + do k = kk,kk - (n- (j+1)),-1 + temp = temp - ap(k)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/ap(kk-n+j) + else + do k = kk,kk - (n- (j+1)),-1 + temp = temp - conjg(ap(k))*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/conjg(ap(kk-n+j)) + end if + x(jx) = temp + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_ctpsv + + !> CTRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ) + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + + pure subroutine stdlib_ctrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, noconj, nounit, upper + + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + noconj = stdlib_lsame(transa,'T') + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda CTRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + + pure subroutine stdlib_ctrmv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda CTRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_ctrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, noconj, nounit, upper + + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + noconj = stdlib_lsame(transa,'T') + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda CTRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_ctrsv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(dp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(dp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(dp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(dp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> DASUM: takes the sum of the absolute values. + + pure real(dp) function stdlib_dasum(n,dx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(dp), intent(in) :: dx(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dtemp + integer(ilp) :: i, m, mp1, nincx + ! Intrinsic Functions + intrinsic :: abs,mod + stdlib_dasum = zero + dtemp = zero + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + ! clean-up loop + m = mod(n,6) + if (m/=0) then + do i = 1,m + dtemp = dtemp + abs(dx(i)) + end do + if (n<6) then + stdlib_dasum = dtemp + return + end if + end if + mp1 = m + 1 + do i = mp1,n,6 + dtemp = dtemp + abs(dx(i)) + abs(dx(i+1)) +abs(dx(i+2)) + abs(dx(i+3)) +abs(dx(i+& + 4)) + abs(dx(i+5)) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + dtemp = dtemp + abs(dx(i)) + end do + end if + stdlib_dasum = dtemp + return + end function stdlib_dasum + + !> DAXPY: constant times a vector plus a vector. + !> uses unrolled loops for increments equal to one. + + pure subroutine stdlib_daxpy(n,da,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: da + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(in) :: dx(*) + real(dp), intent(inout) :: dy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (da==0.0_dp) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,4) + if (m/=0) then + do i = 1,m + dy(i) = dy(i) + da*dx(i) + end do + end if + if (n<4) return + mp1 = m + 1 + do i = mp1,n,4 + dy(i) = dy(i) + da*dx(i) + dy(i+1) = dy(i+1) + da*dx(i+1) + dy(i+2) = dy(i+2) + da*dx(i+2) + dy(i+3) = dy(i+3) + da*dx(i+3) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dy(iy) = dy(iy) + da*dx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_daxpy + + !> DCOPY: copies a vector, x, to a vector, y. + !> uses unrolled loops for increments equal to 1. + + pure subroutine stdlib_dcopy(n,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(in) :: dx(*) + real(dp), intent(out) :: dy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,7) + if (m/=0) then + do i = 1,m + dy(i) = dx(i) + end do + if (n<7) return + end if + mp1 = m + 1 + do i = mp1,n,7 + dy(i) = dx(i) + dy(i+1) = dx(i+1) + dy(i+2) = dx(i+2) + dy(i+3) = dx(i+3) + dy(i+4) = dx(i+4) + dy(i+5) = dx(i+5) + dy(i+6) = dx(i+6) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dy(iy) = dx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_dcopy + + !> DDOT: forms the dot product of two vectors. + !> uses unrolled loops for increments equal to one. + + pure real(dp) function stdlib_ddot(n,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(in) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dtemp + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + stdlib_ddot = zero + dtemp = zero + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,5) + if (m/=0) then + do i = 1,m + dtemp = dtemp + dx(i)*dy(i) + end do + if (n<5) then + stdlib_ddot=dtemp + return + end if + end if + mp1 = m + 1 + do i = mp1,n,5 + dtemp = dtemp + dx(i)*dy(i) + dx(i+1)*dy(i+1) +dx(i+2)*dy(i+2) + dx(i+3)*dy(i+3) + & + dx(i+4)*dy(i+4) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dtemp = dtemp + dx(ix)*dy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_ddot = dtemp + return + end function stdlib_ddot + + !> DGBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + pure subroutine stdlib_dgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, kl, ku, lda, m, n + character, intent(in) :: trans + ! Array Arguments + real(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kup1, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (kl<0) then + info = 4 + else if (ku<0) then + info = 5 + else if (lda< (kl+ku+1)) then + info = 8 + else if (incx==0) then + info = 10 + else if (incy==0) then + info = 13 + end if + if (info/=0) then + call stdlib_xerbla('DGBMV ',info) + return + end if + ! quick return if possible. + if ((m==0) .or. (n==0) .or.((alpha==zero).and. (beta==one))) return + ! set lenx and leny, the lengths of the vectors x and y, and set + ! up the start points in x and y. + if (stdlib_lsame(trans,'N')) then + lenx = n + leny = m + else + lenx = m + leny = n + end if + if (incx>0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the band part of a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,leny + y(i) = zero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,leny + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + kup1 = ku + 1 + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(i) = y(i) + temp*a(k+i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(iy) = y(iy) + temp*a(k+i,j) + iy = iy + incy + end do + jx = jx + incx + if (j>ku) ky = ky + incy + end do + end if + else + ! form y := alpha*a**t*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = zero + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(i) + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = zero + ix = kx + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(ix) + ix = ix + incx + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + if (j>ku) kx = kx + incx + end do + end if + end if + return + end subroutine stdlib_dgbmv + + !> DGEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + pure subroutine stdlib_dgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, m, n + character, intent(in) :: transa, transb + ! Array Arguments + real(dp), intent(in) :: a(lda,*), b(ldb,*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, j, l, nrowa, nrowb + logical(lk) :: nota, notb + + ! set nota and notb as true if a and b respectively are not + ! transposed and set nrowa and nrowb as the number of rows of a + ! and b respectively. + nota = stdlib_lsame(transa,'N') + notb = stdlib_lsame(transb,'N') + if (nota) then + nrowa = m + else + nrowa = k + end if + if (notb) then + nrowb = k + else + nrowb = n + end if + ! test the input parameters. + info = 0 + if ((.not.nota) .and. (.not.stdlib_lsame(transa,'C')) .and.(.not.stdlib_lsame(transa,& + 'T'))) then + info = 1 + else if ((.not.notb) .and. (.not.stdlib_lsame(transb,'C')) .and.(.not.stdlib_lsame(& + transb,'T'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda DGEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + + pure subroutine stdlib_dgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + character, intent(in) :: trans + ! Array Arguments + real(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (lda0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,leny + y(i) = zero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,leny + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + do i = 1,m + y(i) = y(i) + temp*a(i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + do i = 1,m + y(iy) = y(iy) + temp*a(i,j) + iy = iy + incy + end do + jx = jx + incx + end do + end if + else + ! form y := alpha*a**t*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = zero + do i = 1,m + temp = temp + a(i,j)*x(i) + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = zero + ix = kx + do i = 1,m + temp = temp + a(i,j)*x(ix) + ix = ix + incx + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_dgemv + + !> DGER: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_dger(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=zero) then + temp = alpha*y(jy) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=zero) then + temp = alpha*y(jy) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_dger + + !> ! + !> + !> DNRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> DNRM2 := sqrt( x'*x ) + + pure function stdlib_dnrm2( n, x, incx ) + real(dp) :: stdlib_dnrm2 + ! -- reference blas level1 routine (version 3.9.1_dp) -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! march 2021 + ! Constants + integer, parameter :: wp = kind(1._dp) + real(dp), parameter :: maxn = huge(0.0_dp) + ! .. blue's scaling constants .. + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(dp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(dp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin + ! quick return if possible + stdlib_dnrm2 = zero + if( n <= 0 ) return + scl = one + sumsq = zero + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(x(ix)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range + scl = one + sumsq = amed + end if + stdlib_dnrm2 = scl*sqrt( sumsq ) + return + end function stdlib_dnrm2 + + !> DROT: applies a plane rotation. + + pure subroutine stdlib_drot(n,dx,incx,dy,incy,c,s) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: c, s + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(inout) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dtemp + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + dtemp = c*dx(i) + s*dy(i) + dy(i) = c*dy(i) - s*dx(i) + dx(i) = dtemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dtemp = c*dx(ix) + s*dy(iy) + dy(iy) = c*dy(iy) - s*dx(ix) + dx(ix) = dtemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_drot + + !> ! + !> + !> The computation uses the formulas + !> sigma = sgn(a) if |a| > |b| + !> = sgn(b) if |b| >= |a| + !> r = sigma*sqrt( a**2 + b**2 ) + !> c = 1; s = 0 if r = 0 + !> c = a/r; s = b/r if r != 0 + !> The subroutine also computes + !> z = s if |a| > |b|, + !> = 1/c if |b| >= |a| and c != 0 + !> = 1 if c = 0 + !> This allows c and s to be reconstructed from z as follows: + !> If z = 1, set c = 0, s = 1. + !> If |z| < 1, set c = sqrt(1 - z**2) and s = z. + !> If |z| > 1, set c = 1/z and s = sqrt( 1 - c**2). + + pure subroutine stdlib_drotg( a, b, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Constants + integer, parameter :: wp = kind(1._dp) + ! Scaling Constants + ! Scalar Arguments + real(dp), intent(inout) :: a, b + real(dp), intent(out) :: c, s + ! Local Scalars + real(dp) :: anorm, bnorm, scl, sigma, r, z + anorm = abs(a) + bnorm = abs(b) + if( bnorm == zero ) then + c = one + s = zero + b = zero + else if( anorm == zero ) then + c = zero + s = one + a = b + b = one + else + scl = min( safmax, max( safmin, anorm, bnorm ) ) + if( anorm > bnorm ) then + sigma = sign(one,a) + else + sigma = sign(one,b) + end if + r = sigma*( scl*sqrt((a/scl)**2 + (b/scl)**2) ) + c = a/r + s = b/r + if( anorm > bnorm ) then + z = s + else if( c /= zero ) then + z = one/c + else + z = one + end if + a = r + b = z + end if + return + end subroutine stdlib_drotg + + !> APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX + !> (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN + !> (DY**T) + !> DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX >= 0, ELSE + !> LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. + !> WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> DFLAG=-1._dp DFLAG=0._dp DFLAG=1._dp DFLAG=-2.D0 + !> (DH11 DH12) (1._dp DH12) (DH11 1._dp) (1._dp 0._dp) + !> H=( ) ( ) ( ) ( ) + !> (DH21 DH22), (DH21 1._dp), (-1._dp DH22), (0._dp 1._dp). + !> SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. + + pure subroutine stdlib_drotm(n,dx,incx,dy,incy,dparam) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(in) :: dparam(5) + real(dp), intent(inout) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dflag, dh11, dh12, dh21, dh22, two, w, z, zero + integer(ilp) :: i, kx, ky, nsteps + ! Data Statements + zero = 0.0_dp + two = 2.0_dp + dflag = dparam(1) + if (n<=0 .or. (dflag+two==zero)) return + if (incx==incy.and.incx>0) then + nsteps = n*incx + if (dflag CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS + !> THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(DD1)*DX1,SQRT(DD2) DY2)**T. + !> WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> DFLAG=-1._dp DFLAG=0._dp DFLAG=1._dp DFLAG=-2.D0 + !> (DH11 DH12) (1._dp DH12) (DH11 1._dp) (1._dp 0._dp) + !> H=( ) ( ) ( ) ( ) + !> (DH21 DH22), (DH21 1._dp), (-1._dp DH22), (0._dp 1._dp). + !> LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 + !> RESPECTIVELY. (VALUES OF 1._dp, -1._dp, OR 0._dp IMPLIED BY THE + !> VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) + !> THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE + !> INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE + !> OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. + + pure subroutine stdlib_drotmg(dd1,dd2,dx1,dy1,dparam) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(inout) :: dd1, dd2, dx1 + real(dp), intent(in) :: dy1 + ! Array Arguments + real(dp), intent(out) :: dparam(5) + ! ===================================================================== + ! Local Scalars + real(dp) :: dflag, dh11, dh12, dh21, dh22, dp1, dp2, dq1, dq2, dtemp, du, gam, gamsq, & + one, rgamsq, two, zero + ! Intrinsic Functions + intrinsic :: abs + ! Data Statements + zero = 0.0_dp + one = 1.0_dp + two = 2.0_dp + gam = 4096.0_dp + gamsq = 16777216.0_dp + rgamsq = 5.9604645e-8_dp + if (dd1abs(dq2)) then + dh21 = -dy1/dx1 + dh12 = dp2/dp1 + du = one - dh12*dh21 + if (du>zero) then + dflag = zero + dd1 = dd1/du + dd2 = dd2/du + dx1 = dx1*du + else + ! this code path if here for safety. we do not expect this + ! condition to ever hold except in edge cases with rounding + ! errors. see doi: 10.1145/355841.355847 + dflag = -one + dh11 = zero + dh12 = zero + dh21 = zero + dh22 = zero + dd1 = zero + dd2 = zero + dx1 = zero + end if + else + if (dq2=gamsq)) + if (dflag==zero) then + dh11 = one + dh22 = one + dflag = -one + else + dh21 = -one + dh12 = one + dflag = -one + end if + if (dd1<=rgamsq) then + dd1 = dd1*gam**2 + dx1 = dx1/gam + dh11 = dh11/gam + dh12 = dh12/gam + else + dd1 = dd1/gam**2 + dx1 = dx1*gam + dh11 = dh11*gam + dh12 = dh12*gam + end if + enddo + end if + if (dd2/=zero) then + do while ( (abs(dd2)<=rgamsq) .or. (abs(dd2)>=gamsq) ) + if (dflag==zero) then + dh11 = one + dh22 = one + dflag = -one + else + dh21 = -one + dh12 = one + dflag = -one + end if + if (abs(dd2)<=rgamsq) then + dd2 = dd2*gam**2 + dh21 = dh21/gam + dh22 = dh22/gam + else + dd2 = dd2/gam**2 + dh21 = dh21*gam + dh22 = dh22*gam + end if + end do + end if + end if + if (dflag DSBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric band matrix, with k super-diagonals. + + pure subroutine stdlib_dsbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, k, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kplus1, kx, ky, l + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (k<0) then + info = 3 + else if (lda< (k+1)) then + info = 6 + else if (incx==0) then + info = 8 + else if (incy==0) then + info = 11 + end if + if (info/=0) then + call stdlib_xerbla('DSBMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==zero).and. (beta==one))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array a + ! are accessed sequentially with one pass through a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when upper triangle of a is stored. + kplus1 = k + 1 + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(i) + end do + y(j) = y(j) + temp1*a(kplus1,j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*a(kplus1,j) + alpha*temp2 + jx = jx + incx + jy = jy + incy + if (j>k) then + kx = kx + incx + ky = ky + incy + end if + end do + end if + else + ! form y when lower triangle of a is stored. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*a(1,j) + l = 1 - j + do i = j + 1,min(n,j+k) + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*a(1,j) + l = 1 - j + ix = jx + iy = jy + do i = j + 1,min(n,j+k) + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_dsbmv + + !> DSCAL: scales a vector by a constant. + !> uses unrolled loops for increment equal to 1. + + pure subroutine stdlib_dscal(n,da,dx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: da + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(dp), intent(inout) :: dx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, m, mp1, nincx + ! Intrinsic Functions + intrinsic :: mod + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + ! clean-up loop + m = mod(n,5) + if (m/=0) then + do i = 1,m + dx(i) = da*dx(i) + end do + if (n<5) return + end if + mp1 = m + 1 + do i = mp1,n,5 + dx(i) = da*dx(i) + dx(i+1) = da*dx(i+1) + dx(i+2) = da*dx(i+2) + dx(i+3) = da*dx(i+3) + dx(i+4) = da*dx(i+4) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + dx(i) = da*dx(i) + end do + end if + return + end subroutine stdlib_dscal + + !> Compute the inner product of two vectors with extended + !> precision accumulation and result. + !> Returns D.P. dot product accumulated in D.P., for S.P. SX and SY + !> DSDOT: = sum for I = 0 to N-1 of SX(LX+I*INCX) * SY(LY+I*INCY), + !> where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, and LY is + !> defined in a similar way using INCY. + + pure real(dp) function stdlib_dsdot(n,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(in) :: sx(*), sy(*) + ! authors: + ! ======== + ! lawson, c. l., (jpl), hanson, r. j., (snla), + ! kincaid, d. r., (u. of texas), krogh, f. t., (jpl) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, kx, ky, ns + ! Intrinsic Functions + intrinsic :: real + stdlib_dsdot = zero + if (n<=0) return + if (incx==incy .and. incx>0) then + ! code for equal, positive, non-unit increments. + ns = n*incx + do i = 1,ns,incx + stdlib_dsdot = stdlib_dsdot + real(sx(i),KIND=dp)*real(sy(i),KIND=dp) + end do + else + ! code for unequal or nonpositive increments. + kx = 1 + ky = 1 + if (incx<0) kx = 1 + (1-n)*incx + if (incy<0) ky = 1 + (1-n)*incy + do i = 1,n + stdlib_dsdot = stdlib_dsdot + real(sx(kx),KIND=dp)*real(sy(ky),KIND=dp) + kx = kx + incx + ky = ky + incy + end do + end if + return + end function stdlib_dsdot + + !> DSPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_dspmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(in) :: ap(*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 6 + else if (incy==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('DSPMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==zero).and. (beta==one))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form y when ap contains the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + k = kk + do i = 1,j - 1 + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(i) + k = k + 1 + end do + y(j) = y(j) + temp1*ap(kk+j-1) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + do k = kk,kk + j - 2 + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*ap(kk+j-1) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*ap(kk) + k = kk + 1 + do i = j + 1,n + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(i) + k = k + 1 + end do + y(j) = y(j) + alpha*temp2 + kk = kk + (n-j+1) + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*ap(kk) + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + (n-j+1) + end do + end if + end if + return + end subroutine stdlib_dspmv + + !> DSPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_dspr(uplo,n,alpha,x,incx,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(inout) :: ap(*) + real(dp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + end if + if (info/=0) then + call stdlib_xerbla('DSPR ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==zero)) return + ! set the start point in x if the increment is not unity. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = alpha*x(j) + k = kk + do i = 1,j + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = alpha*x(jx) + ix = kx + do k = kk,kk + j - 1 + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = alpha*x(j) + k = kk + do i = j,n + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = alpha*x(jx) + ix = jx + do k = kk,kk + n - j + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_dspr + + !> DSPR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_dspr2(uplo,n,alpha,x,incx,y,incy,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(inout) :: ap(*) + real(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('DSPR2 ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==zero)) return + ! set up the start points in x and y if the increments are not both + ! unity. + if ((incx/=1) .or. (incy/=1)) then + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + k = kk + do i = 1,j + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + end if + kk = kk + j + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = kx + iy = ky + do k = kk,kk + j - 1 + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + k = kk + do i = j,n + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + end if + kk = kk + n - j + 1 + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = jx + iy = jy + do k = kk,kk + n - j + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_dspr2 + + !> DSWAP: interchanges two vectors. + !> uses unrolled loops for increments equal to 1. + + pure subroutine stdlib_dswap(n,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(inout) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dtemp + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,3) + if (m/=0) then + do i = 1,m + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + end do + if (n<3) return + end if + mp1 = m + 1 + do i = mp1,n,3 + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + dtemp = dx(i+1) + dx(i+1) = dy(i+1) + dy(i+1) = dtemp + dtemp = dx(i+2) + dx(i+2) = dy(i+2) + dy(i+2) = dtemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dtemp = dx(ix) + dx(ix) = dy(iy) + dy(iy) = dtemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_dswap + + !> DSYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_dsymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*), b(ldb,*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda DSYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + + pure subroutine stdlib_dsymv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the triangular part + ! of a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when a is stored in upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + do i = 1,j - 1 + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(i) + end do + y(j) = y(j) + temp1*a(j,j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + do i = 1,j - 1 + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*a(j,j) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*a(j,j) + do i = j + 1,n + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*a(j,j) + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_dsymv + + !> DSYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + + pure subroutine stdlib_dsyr(uplo,n,alpha,x,incx,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (lda DSYR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n symmetric matrix. + + pure subroutine stdlib_dsyr2(uplo,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the triangular part + ! of a. + if (stdlib_lsame(uplo,'U')) then + ! form a when a is stored in the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + do i = 1,j + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + end if + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = kx + iy = ky + do i = 1,j + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form a when a is stored in the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + do i = j,n + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + end if + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = jx + iy = jy + do i = j,n + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_dsyr2 + + !> DSYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + + pure subroutine stdlib_dsyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*), b(ldb,*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(dp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T')) .and.(& + .not.stdlib_lsame(trans,'C'))) then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda DSYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + + pure subroutine stdlib_dsyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T')) .and.(& + .not.stdlib_lsame(trans,'C'))) then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda DTBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + pure subroutine stdlib_dtbmv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('DTBMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := a*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(kplus1,j) + end if + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*a(kplus1,j) + end if + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(1,j) + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*a(1,j) + end if + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + else + ! form x := a**t*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = kplus1 - j + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(i) + end do + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + kx = kx - incx + ix = kx + l = kplus1 - j + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(ix) + ix = ix - incx + end do + x(jx) = temp + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + temp = x(j) + l = 1 - j + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(i) + end do + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + kx = kx + incx + ix = kx + l = 1 - j + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(ix) + ix = ix + incx + end do + x(jx) = temp + jx = jx + incx + end do + end if + end if + end if + return + end subroutine stdlib_dtbmv + + !> DTBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_dtbsv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('DTBSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed by sequentially with one pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + l = kplus1 - j + if (nounit) x(j) = x(j)/a(kplus1,j) + temp = x(j) + do i = j - 1,max(1,j-k),-1 + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + kx = kx - incx + if (x(jx)/=zero) then + ix = kx + l = kplus1 - j + if (nounit) x(jx) = x(jx)/a(kplus1,j) + temp = x(jx) + do i = j - 1,max(1,j-k),-1 + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix - incx + end do + end if + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + l = 1 - j + if (nounit) x(j) = x(j)/a(1,j) + temp = x(j) + do i = j + 1,min(n,j+k) + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + jx = kx + do j = 1,n + kx = kx + incx + if (x(jx)/=zero) then + ix = kx + l = 1 - j + if (nounit) x(jx) = x(jx)/a(1,j) + temp = x(jx) + do i = j + 1,min(n,j+k) + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix + incx + end do + end if + jx = jx + incx + end do + end if + end if + else + ! form x := inv( a**t)*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(kplus1,j) + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/a(kplus1,j) + x(jx) = temp + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(1,j) + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/a(1,j) + x(jx) = temp + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + end if + return + end subroutine stdlib_dtbsv + + !> DTPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + + pure subroutine stdlib_dtpmv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: nounit + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('DTPMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with one pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x:= a*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = x(j) + k = kk + do i = 1,j - 1 + x(i) = x(i) + temp*ap(k) + k = k + 1 + end do + if (nounit) x(j) = x(j)*ap(kk+j-1) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + x(ix) = x(ix) + temp*ap(k) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*ap(kk+j-1) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + temp = x(j) + k = kk + do i = n,j + 1,-1 + x(i) = x(i) + temp*ap(k) + k = k - 1 + end do + if (nounit) x(j) = x(j)*ap(kk-n+j) + end if + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + x(ix) = x(ix) + temp*ap(k) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*ap(kk-n+j) + end if + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + else + ! form x := a**t*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + if (nounit) temp = temp*ap(kk) + k = kk - 1 + do i = j - 1,1,-1 + temp = temp + ap(k)*x(i) + k = k - 1 + end do + x(j) = temp + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + temp = x(jx) + ix = jx + if (nounit) temp = temp*ap(kk) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + ap(k)*x(ix) + end do + x(jx) = temp + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + if (nounit) temp = temp*ap(kk) + k = kk + 1 + do i = j + 1,n + temp = temp + ap(k)*x(i) + k = k + 1 + end do + x(j) = temp + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = jx + if (nounit) temp = temp*ap(kk) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + ap(k)*x(ix) + end do + x(jx) = temp + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_dtpmv + + !> DTPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_dtpsv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: nounit + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('DTPSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with one pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk - 1 + do i = j - 1,1,-1 + x(i) = x(i) - temp*ap(k) + k = k - 1 + end do + end if + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + if (x(jx)/=zero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk + 1 + do i = j + 1,n + x(i) = x(i) - temp*ap(k) + k = k + 1 + end do + end if + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + else + ! form x := inv( a**t )*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + do i = 1,j - 1 + temp = temp - ap(k)*x(i) + k = k + 1 + end do + if (nounit) temp = temp/ap(kk+j-1) + x(j) = temp + kk = kk + j + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + temp = temp - ap(k)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/ap(kk+j-1) + x(jx) = temp + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk + do i = n,j + 1,-1 + temp = temp - ap(k)*x(i) + k = k - 1 + end do + if (nounit) temp = temp/ap(kk-n+j) + x(j) = temp + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + temp = temp - ap(k)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/ap(kk-n+j) + x(jx) = temp + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_dtpsv + + !> DTRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ), + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + + pure subroutine stdlib_dtrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, nounit, upper + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda DTRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + + pure subroutine stdlib_dtrmv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda DTRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_dtrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, nounit, upper + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda DTRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_dtrsv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda DZASUM: takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and + !> returns a double precision result. + + pure real(dp) function stdlib_dzasum(n,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(in) :: zx(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: stemp + integer(ilp) :: i, nincx + stdlib_dzasum = zero + stemp = zero + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + stemp = stemp + stdlib_dcabs1(zx(i)) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + stemp = stemp + stdlib_dcabs1(zx(i)) + end do + end if + stdlib_dzasum = stemp + return + end function stdlib_dzasum + + !> ! + !> + !> DZNRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> DZNRM2 := sqrt( x**H*x ) + + pure function stdlib_dznrm2( n, x, incx ) + real(dp) :: stdlib_dznrm2 + ! -- reference blas level1 routine (version 3.9.1_dp) -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! march 2021 + ! Constants + integer, parameter :: wp = kind(1._dp) + real(dp), parameter :: maxn = huge(0.0_dp) + ! .. blue's scaling constants .. + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(dp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin + ! quick return if possible + stdlib_dznrm2 = zero + if( n <= 0 ) return + scl = one + sumsq = zero + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(real(x(ix),KIND=dp)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ax = abs(aimag(x(ix))) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range + scl = one + sumsq = amed + end if + stdlib_dznrm2 = scl*sqrt( sumsq ) + return + end function stdlib_dznrm2 + + + +end module stdlib_linalg_blas_d diff --git a/src/stdlib_linalg_blas_q.fypp b/src/stdlib_linalg_blas_q.fypp new file mode 100644 index 000000000..b7209fd35 --- /dev/null +++ b/src/stdlib_linalg_blas_q.fypp @@ -0,0 +1,4524 @@ +#:include "common.fypp" +#:if WITH_QP +module stdlib_linalg_blas_q + use stdlib_linalg_constants + use stdlib_linalg_blas_aux + use stdlib_linalg_blas_s + use stdlib_linalg_blas_c + use stdlib_linalg_blas_d + use stdlib_linalg_blas_z + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_qasum + public :: stdlib_qaxpy + public :: stdlib_qcopy + public :: stdlib_qdot + public :: stdlib_qgbmv + public :: stdlib_qgemm + public :: stdlib_qgemv + public :: stdlib_qger + public :: stdlib_qnrm2 + public :: stdlib_qrot + public :: stdlib_qrotg + public :: stdlib_qrotm + public :: stdlib_qrotmg + public :: stdlib_qsbmv + public :: stdlib_qscal + public :: stdlib_qsdot + public :: stdlib_qspmv + public :: stdlib_qspr + public :: stdlib_qspr2 + public :: stdlib_qswap + public :: stdlib_qsymm + public :: stdlib_qsymv + public :: stdlib_qsyr + public :: stdlib_qsyr2 + public :: stdlib_qsyr2k + public :: stdlib_qsyrk + public :: stdlib_qtbmv + public :: stdlib_qtbsv + public :: stdlib_qtpmv + public :: stdlib_qtpsv + public :: stdlib_qtrmm + public :: stdlib_qtrmv + public :: stdlib_qtrsm + public :: stdlib_qtrsv + public :: stdlib_qzasum + public :: stdlib_qznrm2 + + ! 128-bit real constants + real(qp), parameter, private :: negone = -1.00_qp + real(qp), parameter, private :: zero = 0.00_qp + real(qp), parameter, private :: half = 0.50_qp + real(qp), parameter, private :: one = 1.00_qp + real(qp), parameter, private :: two = 2.00_qp + real(qp), parameter, private :: three = 3.00_qp + real(qp), parameter, private :: four = 4.00_qp + real(qp), parameter, private :: eight = 8.00_qp + real(qp), parameter, private :: ten = 10.00_qp + + ! 128-bit complex constants + complex(qp), parameter, private :: czero = ( 0.0_qp,0.0_qp) + complex(qp), parameter, private :: chalf = ( 0.5_qp,0.0_qp) + complex(qp), parameter, private :: cone = ( 1.0_qp,0.0_qp) + complex(qp), parameter, private :: cnegone = (-1.0_qp,0.0_qp) + + ! 128-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(qp), parameter, private :: rradix = real(radix(zero),qp) + real(qp), parameter, private :: ulp = epsilon(zero) + real(qp), parameter, private :: eps = ulp*half + real(qp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(qp), parameter, private :: safmax = one/safmin + real(qp), parameter, private :: smlnum = safmin/ulp + real(qp), parameter, private :: bignum = safmax*ulp + real(qp), parameter, private :: rtmin = sqrt(smlnum) + real(qp), parameter, private :: rtmax = sqrt(bignum) + + ! 128-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(qp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(qp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(qp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(qp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> DASUM: takes the sum of the absolute values. + + pure real(qp) function stdlib_qasum(n,dx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(qp), intent(in) :: dx(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dtemp + integer(ilp) :: i, m, mp1, nincx + ! Intrinsic Functions + intrinsic :: abs,mod + stdlib_qasum = zero + dtemp = zero + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + ! clean-up loop + m = mod(n,6) + if (m/=0) then + do i = 1,m + dtemp = dtemp + abs(dx(i)) + end do + if (n<6) then + stdlib_qasum = dtemp + return + end if + end if + mp1 = m + 1 + do i = mp1,n,6 + dtemp = dtemp + abs(dx(i)) + abs(dx(i+1)) +abs(dx(i+2)) + abs(dx(i+3)) +abs(dx(i+& + 4)) + abs(dx(i+5)) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + dtemp = dtemp + abs(dx(i)) + end do + end if + stdlib_qasum = dtemp + return + end function stdlib_qasum + + !> DAXPY: constant times a vector plus a vector. + !> uses unrolled loops for increments equal to one. + + pure subroutine stdlib_qaxpy(n,da,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: da + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(qp), intent(in) :: dx(*) + real(qp), intent(inout) :: dy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (da==0.0_qp) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,4) + if (m/=0) then + do i = 1,m + dy(i) = dy(i) + da*dx(i) + end do + end if + if (n<4) return + mp1 = m + 1 + do i = mp1,n,4 + dy(i) = dy(i) + da*dx(i) + dy(i+1) = dy(i+1) + da*dx(i+1) + dy(i+2) = dy(i+2) + da*dx(i+2) + dy(i+3) = dy(i+3) + da*dx(i+3) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dy(iy) = dy(iy) + da*dx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_qaxpy + + !> DCOPY: copies a vector, x, to a vector, y. + !> uses unrolled loops for increments equal to 1. + + pure subroutine stdlib_qcopy(n,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(qp), intent(in) :: dx(*) + real(qp), intent(out) :: dy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,7) + if (m/=0) then + do i = 1,m + dy(i) = dx(i) + end do + if (n<7) return + end if + mp1 = m + 1 + do i = mp1,n,7 + dy(i) = dx(i) + dy(i+1) = dx(i+1) + dy(i+2) = dx(i+2) + dy(i+3) = dx(i+3) + dy(i+4) = dx(i+4) + dy(i+5) = dx(i+5) + dy(i+6) = dx(i+6) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dy(iy) = dx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_qcopy + + !> DDOT: forms the dot product of two vectors. + !> uses unrolled loops for increments equal to one. + + pure real(qp) function stdlib_qdot(n,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(qp), intent(in) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dtemp + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + stdlib_qdot = zero + dtemp = zero + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,5) + if (m/=0) then + do i = 1,m + dtemp = dtemp + dx(i)*dy(i) + end do + if (n<5) then + stdlib_qdot=dtemp + return + end if + end if + mp1 = m + 1 + do i = mp1,n,5 + dtemp = dtemp + dx(i)*dy(i) + dx(i+1)*dy(i+1) +dx(i+2)*dy(i+2) + dx(i+3)*dy(i+3) + & + dx(i+4)*dy(i+4) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dtemp = dtemp + dx(ix)*dy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_qdot = dtemp + return + end function stdlib_qdot + + !> DGBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + pure subroutine stdlib_qgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, kl, ku, lda, m, n + character, intent(in) :: trans + ! Array Arguments + real(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kup1, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (kl<0) then + info = 4 + else if (ku<0) then + info = 5 + else if (lda< (kl+ku+1)) then + info = 8 + else if (incx==0) then + info = 10 + else if (incy==0) then + info = 13 + end if + if (info/=0) then + call stdlib_xerbla('DGBMV ',info) + return + end if + ! quick return if possible. + if ((m==0) .or. (n==0) .or.((alpha==zero).and. (beta==one))) return + ! set lenx and leny, the lengths of the vectors x and y, and set + ! up the start points in x and y. + if (stdlib_lsame(trans,'N')) then + lenx = n + leny = m + else + lenx = m + leny = n + end if + if (incx>0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the band part of a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,leny + y(i) = zero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,leny + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + kup1 = ku + 1 + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(i) = y(i) + temp*a(k+i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(iy) = y(iy) + temp*a(k+i,j) + iy = iy + incy + end do + jx = jx + incx + if (j>ku) ky = ky + incy + end do + end if + else + ! form y := alpha*a**t*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = zero + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(i) + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = zero + ix = kx + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(ix) + ix = ix + incx + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + if (j>ku) kx = kx + incx + end do + end if + end if + return + end subroutine stdlib_qgbmv + + !> DGEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + pure subroutine stdlib_qgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, m, n + character, intent(in) :: transa, transb + ! Array Arguments + real(qp), intent(in) :: a(lda,*), b(ldb,*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, j, l, nrowa, nrowb + logical(lk) :: nota, notb + + ! set nota and notb as true if a and b respectively are not + ! transposed and set nrowa and nrowb as the number of rows of a + ! and b respectively. + nota = stdlib_lsame(transa,'N') + notb = stdlib_lsame(transb,'N') + if (nota) then + nrowa = m + else + nrowa = k + end if + if (notb) then + nrowb = k + else + nrowb = n + end if + ! test the input parameters. + info = 0 + if ((.not.nota) .and. (.not.stdlib_lsame(transa,'C')) .and.(.not.stdlib_lsame(transa,& + 'T'))) then + info = 1 + else if ((.not.notb) .and. (.not.stdlib_lsame(transb,'C')) .and.(.not.stdlib_lsame(& + transb,'T'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda DGEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + + pure subroutine stdlib_qgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + character, intent(in) :: trans + ! Array Arguments + real(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (lda0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,leny + y(i) = zero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,leny + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + do i = 1,m + y(i) = y(i) + temp*a(i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + do i = 1,m + y(iy) = y(iy) + temp*a(i,j) + iy = iy + incy + end do + jx = jx + incx + end do + end if + else + ! form y := alpha*a**t*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = zero + do i = 1,m + temp = temp + a(i,j)*x(i) + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = zero + ix = kx + do i = 1,m + temp = temp + a(i,j)*x(ix) + ix = ix + incx + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_qgemv + + !> DGER: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_qger(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=zero) then + temp = alpha*y(jy) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=zero) then + temp = alpha*y(jy) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_qger + + !> ! + !> + !> DNRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> DNRM2 := sqrt( x'*x ) + + pure function stdlib_qnrm2( n, x, incx ) + real(qp) :: stdlib_qnrm2 + ! -- reference blas level1 routine (version 3.9.1_qp) -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! march 2021 + ! Constants + integer, parameter :: wp = kind(1._qp) + real(qp), parameter :: maxn = huge(0.0_qp) + ! .. blue's scaling constants .. + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(qp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(qp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin + ! quick return if possible + stdlib_qnrm2 = zero + if( n <= 0 ) return + scl = one + sumsq = zero + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(x(ix)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range + scl = one + sumsq = amed + end if + stdlib_qnrm2 = scl*sqrt( sumsq ) + return + end function stdlib_qnrm2 + + !> DROT: applies a plane rotation. + + pure subroutine stdlib_qrot(n,dx,incx,dy,incy,c,s) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: c, s + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(qp), intent(inout) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dtemp + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + dtemp = c*dx(i) + s*dy(i) + dy(i) = c*dy(i) - s*dx(i) + dx(i) = dtemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dtemp = c*dx(ix) + s*dy(iy) + dy(iy) = c*dy(iy) - s*dx(ix) + dx(ix) = dtemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_qrot + + !> ! + !> + !> The computation uses the formulas + !> sigma = sgn(a) if |a| > |b| + !> = sgn(b) if |b| >= |a| + !> r = sigma*sqrt( a**2 + b**2 ) + !> c = 1; s = 0 if r = 0 + !> c = a/r; s = b/r if r != 0 + !> The subroutine also computes + !> z = s if |a| > |b|, + !> = 1/c if |b| >= |a| and c != 0 + !> = 1 if c = 0 + !> This allows c and s to be reconstructed from z as follows: + !> If z = 1, set c = 0, s = 1. + !> If |z| < 1, set c = sqrt(1 - z**2) and s = z. + !> If |z| > 1, set c = 1/z and s = sqrt( 1 - c**2). + + pure subroutine stdlib_qrotg( a, b, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Constants + integer, parameter :: wp = kind(1._qp) + ! Scaling Constants + ! Scalar Arguments + real(qp), intent(inout) :: a, b + real(qp), intent(out) :: c, s + ! Local Scalars + real(qp) :: anorm, bnorm, scl, sigma, r, z + anorm = abs(a) + bnorm = abs(b) + if( bnorm == zero ) then + c = one + s = zero + b = zero + else if( anorm == zero ) then + c = zero + s = one + a = b + b = one + else + scl = min( safmax, max( safmin, anorm, bnorm ) ) + if( anorm > bnorm ) then + sigma = sign(one,a) + else + sigma = sign(one,b) + end if + r = sigma*( scl*sqrt((a/scl)**2 + (b/scl)**2) ) + c = a/r + s = b/r + if( anorm > bnorm ) then + z = s + else if( c /= zero ) then + z = one/c + else + z = one + end if + a = r + b = z + end if + return + end subroutine stdlib_qrotg + + !> APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX + !> (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN + !> (DY**T) + !> DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX >= 0, ELSE + !> LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. + !> WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> DFLAG=-1._qp DFLAG=0._qp DFLAG=1._qp DFLAG=-2.D0 + !> (DH11 DH12) (1._qp DH12) (DH11 1._qp) (1._qp 0._qp) + !> H=( ) ( ) ( ) ( ) + !> (DH21 DH22), (DH21 1._qp), (-1._qp DH22), (0._qp 1._qp). + !> SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. + + pure subroutine stdlib_qrotm(n,dx,incx,dy,incy,dparam) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(qp), intent(in) :: dparam(5) + real(qp), intent(inout) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dflag, dh11, dh12, dh21, dh22, two, w, z, zero + integer(ilp) :: i, kx, ky, nsteps + ! Data Statements + zero = 0.0_qp + two = 2.0_qp + dflag = dparam(1) + if (n<=0 .or. (dflag+two==zero)) return + if (incx==incy.and.incx>0) then + nsteps = n*incx + if (dflag CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS + !> THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(DD1)*DX1,SQRT(DD2) DY2)**T. + !> WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> DFLAG=-1._qp DFLAG=0._qp DFLAG=1._qp DFLAG=-2.D0 + !> (DH11 DH12) (1._qp DH12) (DH11 1._qp) (1._qp 0._qp) + !> H=( ) ( ) ( ) ( ) + !> (DH21 DH22), (DH21 1._qp), (-1._qp DH22), (0._qp 1._qp). + !> LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 + !> RESPECTIVELY. (VALUES OF 1._qp, -1._qp, OR 0._qp IMPLIED BY THE + !> VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) + !> THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE + !> INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE + !> OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. + + pure subroutine stdlib_qrotmg(dd1,dd2,dx1,dy1,dparam) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(inout) :: dd1, dd2, dx1 + real(qp), intent(in) :: dy1 + ! Array Arguments + real(qp), intent(out) :: dparam(5) + ! ===================================================================== + ! Local Scalars + real(qp) :: dflag, dh11, dh12, dh21, dh22, dp1, dp2, dq1, dq2, dtemp, du, gam, gamsq, & + one, rgamsq, two, zero + ! Intrinsic Functions + intrinsic :: abs + ! Data Statements + zero = 0.0_qp + one = 1.0_qp + two = 2.0_qp + gam = 4096.0_qp + gamsq = 16777216.0_qp + rgamsq = 5.9604645e-8_qp + if (dd1abs(dq2)) then + dh21 = -dy1/dx1 + dh12 = dp2/dp1 + du = one - dh12*dh21 + if (du>zero) then + dflag = zero + dd1 = dd1/du + dd2 = dd2/du + dx1 = dx1*du + else + ! this code path if here for safety. we do not expect this + ! condition to ever hold except in edge cases with rounding + ! errors. see doi: 10.1145/355841.355847 + dflag = -one + dh11 = zero + dh12 = zero + dh21 = zero + dh22 = zero + dd1 = zero + dd2 = zero + dx1 = zero + end if + else + if (dq2=gamsq)) + if (dflag==zero) then + dh11 = one + dh22 = one + dflag = -one + else + dh21 = -one + dh12 = one + dflag = -one + end if + if (dd1<=rgamsq) then + dd1 = dd1*gam**2 + dx1 = dx1/gam + dh11 = dh11/gam + dh12 = dh12/gam + else + dd1 = dd1/gam**2 + dx1 = dx1*gam + dh11 = dh11*gam + dh12 = dh12*gam + end if + enddo + end if + if (dd2/=zero) then + do while ( (abs(dd2)<=rgamsq) .or. (abs(dd2)>=gamsq) ) + if (dflag==zero) then + dh11 = one + dh22 = one + dflag = -one + else + dh21 = -one + dh12 = one + dflag = -one + end if + if (abs(dd2)<=rgamsq) then + dd2 = dd2*gam**2 + dh21 = dh21/gam + dh22 = dh22/gam + else + dd2 = dd2/gam**2 + dh21 = dh21*gam + dh22 = dh22*gam + end if + end do + end if + end if + if (dflag DSBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric band matrix, with k super-diagonals. + + pure subroutine stdlib_qsbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, k, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kplus1, kx, ky, l + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (k<0) then + info = 3 + else if (lda< (k+1)) then + info = 6 + else if (incx==0) then + info = 8 + else if (incy==0) then + info = 11 + end if + if (info/=0) then + call stdlib_xerbla('DSBMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==zero).and. (beta==one))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array a + ! are accessed sequentially with one pass through a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when upper triangle of a is stored. + kplus1 = k + 1 + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(i) + end do + y(j) = y(j) + temp1*a(kplus1,j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*a(kplus1,j) + alpha*temp2 + jx = jx + incx + jy = jy + incy + if (j>k) then + kx = kx + incx + ky = ky + incy + end if + end do + end if + else + ! form y when lower triangle of a is stored. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*a(1,j) + l = 1 - j + do i = j + 1,min(n,j+k) + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*a(1,j) + l = 1 - j + ix = jx + iy = jy + do i = j + 1,min(n,j+k) + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_qsbmv + + !> DSCAL: scales a vector by a constant. + !> uses unrolled loops for increment equal to 1. + + pure subroutine stdlib_qscal(n,da,dx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: da + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(qp), intent(inout) :: dx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, m, mp1, nincx + ! Intrinsic Functions + intrinsic :: mod + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + ! clean-up loop + m = mod(n,5) + if (m/=0) then + do i = 1,m + dx(i) = da*dx(i) + end do + if (n<5) return + end if + mp1 = m + 1 + do i = mp1,n,5 + dx(i) = da*dx(i) + dx(i+1) = da*dx(i+1) + dx(i+2) = da*dx(i+2) + dx(i+3) = da*dx(i+3) + dx(i+4) = da*dx(i+4) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + dx(i) = da*dx(i) + end do + end if + return + end subroutine stdlib_qscal + + !> Compute the inner product of two vectors with extended + !> precision accumulation and result. + !> Returns D.P. dot product accumulated in D.P., for S.P. SX and SY + !> DSDOT: = sum for I = 0 to N-1 of SX(LX+I*INCX) * SY(LY+I*INCY), + !> where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, and LY is + !> defined in a similar way using INCY. + + pure real(qp) function stdlib_qsdot(n,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(dp), intent(in) :: sx(*), sy(*) + ! authors: + ! ======== + ! lawson, c. l., (jpl), hanson, r. j., (snla), + ! kincaid, d. r., (u. of texas), krogh, f. t., (jpl) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, kx, ky, ns + ! Intrinsic Functions + intrinsic :: real + stdlib_qsdot = zero + if (n<=0) return + if (incx==incy .and. incx>0) then + ! code for equal, positive, non-unit increments. + ns = n*incx + do i = 1,ns,incx + stdlib_qsdot = stdlib_qsdot + real(sx(i),KIND=qp)*real(sy(i),KIND=qp) + end do + else + ! code for unequal or nonpositive increments. + kx = 1 + ky = 1 + if (incx<0) kx = 1 + (1-n)*incx + if (incy<0) ky = 1 + (1-n)*incy + do i = 1,n + stdlib_qsdot = stdlib_qsdot + real(sx(kx),KIND=qp)*real(sy(ky),KIND=qp) + kx = kx + incx + ky = ky + incy + end do + end if + return + end function stdlib_qsdot + + !> DSPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_qspmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(in) :: ap(*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 6 + else if (incy==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('DSPMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==zero).and. (beta==one))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form y when ap contains the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + k = kk + do i = 1,j - 1 + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(i) + k = k + 1 + end do + y(j) = y(j) + temp1*ap(kk+j-1) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + do k = kk,kk + j - 2 + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*ap(kk+j-1) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*ap(kk) + k = kk + 1 + do i = j + 1,n + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(i) + k = k + 1 + end do + y(j) = y(j) + alpha*temp2 + kk = kk + (n-j+1) + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*ap(kk) + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + (n-j+1) + end do + end if + end if + return + end subroutine stdlib_qspmv + + !> DSPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_qspr(uplo,n,alpha,x,incx,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(inout) :: ap(*) + real(qp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + end if + if (info/=0) then + call stdlib_xerbla('DSPR ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==zero)) return + ! set the start point in x if the increment is not unity. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = alpha*x(j) + k = kk + do i = 1,j + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = alpha*x(jx) + ix = kx + do k = kk,kk + j - 1 + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = alpha*x(j) + k = kk + do i = j,n + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = alpha*x(jx) + ix = jx + do k = kk,kk + n - j + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_qspr + + !> DSPR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_qspr2(uplo,n,alpha,x,incx,y,incy,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(inout) :: ap(*) + real(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('DSPR2 ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==zero)) return + ! set up the start points in x and y if the increments are not both + ! unity. + if ((incx/=1) .or. (incy/=1)) then + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + k = kk + do i = 1,j + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + end if + kk = kk + j + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = kx + iy = ky + do k = kk,kk + j - 1 + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + k = kk + do i = j,n + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + end if + kk = kk + n - j + 1 + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = jx + iy = jy + do k = kk,kk + n - j + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_qspr2 + + !> DSWAP: interchanges two vectors. + !> uses unrolled loops for increments equal to 1. + + pure subroutine stdlib_qswap(n,dx,incx,dy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(qp), intent(inout) :: dx(*), dy(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dtemp + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,3) + if (m/=0) then + do i = 1,m + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + end do + if (n<3) return + end if + mp1 = m + 1 + do i = mp1,n,3 + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + dtemp = dx(i+1) + dx(i+1) = dy(i+1) + dy(i+1) = dtemp + dtemp = dx(i+2) + dx(i+2) = dy(i+2) + dy(i+2) = dtemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + dtemp = dx(ix) + dx(ix) = dy(iy) + dy(iy) = dtemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_qswap + + !> DSYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_qsymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*), b(ldb,*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda DSYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + + pure subroutine stdlib_qsymv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the triangular part + ! of a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when a is stored in upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + do i = 1,j - 1 + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(i) + end do + y(j) = y(j) + temp1*a(j,j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + do i = 1,j - 1 + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*a(j,j) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*a(j,j) + do i = j + 1,n + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*a(j,j) + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_qsymv + + !> DSYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + + pure subroutine stdlib_qsyr(uplo,n,alpha,x,incx,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (lda DSYR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n symmetric matrix. + + pure subroutine stdlib_qsyr2(uplo,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the triangular part + ! of a. + if (stdlib_lsame(uplo,'U')) then + ! form a when a is stored in the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + do i = 1,j + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + end if + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = kx + iy = ky + do i = 1,j + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form a when a is stored in the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + do i = j,n + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + end if + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = jx + iy = jy + do i = j,n + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_qsyr2 + + !> DSYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + + pure subroutine stdlib_qsyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*), b(ldb,*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(qp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T')) .and.(& + .not.stdlib_lsame(trans,'C'))) then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda DSYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + + pure subroutine stdlib_qsyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T')) .and.(& + .not.stdlib_lsame(trans,'C'))) then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda DTBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + pure subroutine stdlib_qtbmv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('DTBMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := a*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(kplus1,j) + end if + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*a(kplus1,j) + end if + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(1,j) + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*a(1,j) + end if + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + else + ! form x := a**t*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = kplus1 - j + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(i) + end do + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + kx = kx - incx + ix = kx + l = kplus1 - j + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(ix) + ix = ix - incx + end do + x(jx) = temp + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + temp = x(j) + l = 1 - j + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(i) + end do + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + kx = kx + incx + ix = kx + l = 1 - j + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(ix) + ix = ix + incx + end do + x(jx) = temp + jx = jx + incx + end do + end if + end if + end if + return + end subroutine stdlib_qtbmv + + !> DTBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_qtbsv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('DTBSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed by sequentially with one pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + l = kplus1 - j + if (nounit) x(j) = x(j)/a(kplus1,j) + temp = x(j) + do i = j - 1,max(1,j-k),-1 + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + kx = kx - incx + if (x(jx)/=zero) then + ix = kx + l = kplus1 - j + if (nounit) x(jx) = x(jx)/a(kplus1,j) + temp = x(jx) + do i = j - 1,max(1,j-k),-1 + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix - incx + end do + end if + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + l = 1 - j + if (nounit) x(j) = x(j)/a(1,j) + temp = x(j) + do i = j + 1,min(n,j+k) + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + jx = kx + do j = 1,n + kx = kx + incx + if (x(jx)/=zero) then + ix = kx + l = 1 - j + if (nounit) x(jx) = x(jx)/a(1,j) + temp = x(jx) + do i = j + 1,min(n,j+k) + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix + incx + end do + end if + jx = jx + incx + end do + end if + end if + else + ! form x := inv( a**t)*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(kplus1,j) + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/a(kplus1,j) + x(jx) = temp + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(1,j) + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/a(1,j) + x(jx) = temp + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + end if + return + end subroutine stdlib_qtbsv + + !> DTPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + + pure subroutine stdlib_qtpmv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(qp), intent(in) :: ap(*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: nounit + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('DTPMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with one pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x:= a*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = x(j) + k = kk + do i = 1,j - 1 + x(i) = x(i) + temp*ap(k) + k = k + 1 + end do + if (nounit) x(j) = x(j)*ap(kk+j-1) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + x(ix) = x(ix) + temp*ap(k) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*ap(kk+j-1) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + temp = x(j) + k = kk + do i = n,j + 1,-1 + x(i) = x(i) + temp*ap(k) + k = k - 1 + end do + if (nounit) x(j) = x(j)*ap(kk-n+j) + end if + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + x(ix) = x(ix) + temp*ap(k) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*ap(kk-n+j) + end if + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + else + ! form x := a**t*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + if (nounit) temp = temp*ap(kk) + k = kk - 1 + do i = j - 1,1,-1 + temp = temp + ap(k)*x(i) + k = k - 1 + end do + x(j) = temp + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + temp = x(jx) + ix = jx + if (nounit) temp = temp*ap(kk) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + ap(k)*x(ix) + end do + x(jx) = temp + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + if (nounit) temp = temp*ap(kk) + k = kk + 1 + do i = j + 1,n + temp = temp + ap(k)*x(i) + k = k + 1 + end do + x(j) = temp + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = jx + if (nounit) temp = temp*ap(kk) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + ap(k)*x(ix) + end do + x(jx) = temp + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_qtpmv + + !> DTPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_qtpsv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(qp), intent(in) :: ap(*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: nounit + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('DTPSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with one pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk - 1 + do i = j - 1,1,-1 + x(i) = x(i) - temp*ap(k) + k = k - 1 + end do + end if + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + if (x(jx)/=zero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk + 1 + do i = j + 1,n + x(i) = x(i) - temp*ap(k) + k = k + 1 + end do + end if + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + else + ! form x := inv( a**t )*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + do i = 1,j - 1 + temp = temp - ap(k)*x(i) + k = k + 1 + end do + if (nounit) temp = temp/ap(kk+j-1) + x(j) = temp + kk = kk + j + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + temp = temp - ap(k)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/ap(kk+j-1) + x(jx) = temp + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk + do i = n,j + 1,-1 + temp = temp - ap(k)*x(i) + k = k - 1 + end do + if (nounit) temp = temp/ap(kk-n+j) + x(j) = temp + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + temp = temp - ap(k)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/ap(kk-n+j) + x(jx) = temp + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_qtpsv + + !> DTRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ), + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + + pure subroutine stdlib_qtrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, nounit, upper + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda DTRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + + pure subroutine stdlib_qtrmv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda DTRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_qtrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, nounit, upper + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda DTRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_qtrsv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda DZASUM: takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and + !> returns a quad precision result. + + pure real(qp) function stdlib_qzasum(n,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(in) :: zx(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: stemp + integer(ilp) :: i, nincx + stdlib_qzasum = zero + stemp = zero + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + stemp = stemp + stdlib_qcabs1(zx(i)) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + stemp = stemp + stdlib_qcabs1(zx(i)) + end do + end if + stdlib_qzasum = stemp + return + end function stdlib_qzasum + + !> ! + !> + !> DZNRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> DZNRM2 := sqrt( x**H*x ) + + pure function stdlib_qznrm2( n, x, incx ) + real(qp) :: stdlib_qznrm2 + ! -- reference blas level1 routine (version 3.9.1_qp) -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! march 2021 + ! Constants + integer, parameter :: wp = kind(1._qp) + real(qp), parameter :: maxn = huge(0.0_qp) + ! .. blue's scaling constants .. + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(qp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin + ! quick return if possible + stdlib_qznrm2 = zero + if( n <= 0 ) return + scl = one + sumsq = zero + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(real(x(ix),KIND=qp)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ax = abs(aimag(x(ix))) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range + scl = one + sumsq = amed + end if + stdlib_qznrm2 = scl*sqrt( sumsq ) + return + end function stdlib_qznrm2 + + + +end module stdlib_linalg_blas_q +#:endif diff --git a/src/stdlib_linalg_blas_s.fypp b/src/stdlib_linalg_blas_s.fypp new file mode 100644 index 000000000..504af9d3a --- /dev/null +++ b/src/stdlib_linalg_blas_s.fypp @@ -0,0 +1,4522 @@ +#:include "common.fypp" +module stdlib_linalg_blas_s + use stdlib_linalg_constants + use stdlib_linalg_blas_aux + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_sasum + public :: stdlib_saxpy + public :: stdlib_scasum + public :: stdlib_scnrm2 + public :: stdlib_scopy + public :: stdlib_sdot + public :: stdlib_sdsdot + public :: stdlib_sgbmv + public :: stdlib_sgemm + public :: stdlib_sgemv + public :: stdlib_sger + public :: stdlib_snrm2 + public :: stdlib_srot + public :: stdlib_srotg + public :: stdlib_srotm + public :: stdlib_srotmg + public :: stdlib_ssbmv + public :: stdlib_sscal + public :: stdlib_sspmv + public :: stdlib_sspr + public :: stdlib_sspr2 + public :: stdlib_sswap + public :: stdlib_ssymm + public :: stdlib_ssymv + public :: stdlib_ssyr + public :: stdlib_ssyr2 + public :: stdlib_ssyr2k + public :: stdlib_ssyrk + public :: stdlib_stbmv + public :: stdlib_stbsv + public :: stdlib_stpmv + public :: stdlib_stpsv + public :: stdlib_strmm + public :: stdlib_strmv + public :: stdlib_strsm + public :: stdlib_strsv + + ! 32-bit real constants + real(sp), parameter, private :: negone = -1.00_sp + real(sp), parameter, private :: zero = 0.00_sp + real(sp), parameter, private :: half = 0.50_sp + real(sp), parameter, private :: one = 1.00_sp + real(sp), parameter, private :: two = 2.00_sp + real(sp), parameter, private :: three = 3.00_sp + real(sp), parameter, private :: four = 4.00_sp + real(sp), parameter, private :: eight = 8.00_sp + real(sp), parameter, private :: ten = 10.00_sp + + ! 32-bit complex constants + complex(sp), parameter, private :: czero = ( 0.0_sp,0.0_sp) + complex(sp), parameter, private :: chalf = ( 0.5_sp,0.0_sp) + complex(sp), parameter, private :: cone = ( 1.0_sp,0.0_sp) + complex(sp), parameter, private :: cnegone = (-1.0_sp,0.0_sp) + + ! 32-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(sp), parameter, private :: rradix = real(radix(zero),sp) + real(sp), parameter, private :: ulp = epsilon(zero) + real(sp), parameter, private :: eps = ulp*half + real(sp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(sp), parameter, private :: safmax = one/safmin + real(sp), parameter, private :: smlnum = safmin/ulp + real(sp), parameter, private :: bignum = safmax*ulp + real(sp), parameter, private :: rtmin = sqrt(smlnum) + real(sp), parameter, private :: rtmax = sqrt(bignum) + + ! 32-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(sp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(sp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(sp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(sp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> SASUM: takes the sum of the absolute values. + !> uses unrolled loops for increment equal to one. + + pure real(sp) function stdlib_sasum(n,sx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(sp), intent(in) :: sx(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: stemp + integer(ilp) :: i, m, mp1, nincx + ! Intrinsic Functions + intrinsic :: abs,mod + stdlib_sasum = zero + stemp = zero + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + ! clean-up loop + m = mod(n,6) + if (m/=0) then + do i = 1,m + stemp = stemp + abs(sx(i)) + end do + if (n<6) then + stdlib_sasum = stemp + return + end if + end if + mp1 = m + 1 + do i = mp1,n,6 + stemp = stemp + abs(sx(i)) + abs(sx(i+1)) +abs(sx(i+2)) + abs(sx(i+3)) +abs(sx(i+& + 4)) + abs(sx(i+5)) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + stemp = stemp + abs(sx(i)) + end do + end if + stdlib_sasum = stemp + return + end function stdlib_sasum + + !> SAXPY: constant times a vector plus a vector. + !> uses unrolled loops for increments equal to one. + + pure subroutine stdlib_saxpy(n,sa,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: sa + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(in) :: sx(*) + real(sp), intent(inout) :: sy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (sa==0.0_sp) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,4) + if (m/=0) then + do i = 1,m + sy(i) = sy(i) + sa*sx(i) + end do + end if + if (n<4) return + mp1 = m + 1 + do i = mp1,n,4 + sy(i) = sy(i) + sa*sx(i) + sy(i+1) = sy(i+1) + sa*sx(i+1) + sy(i+2) = sy(i+2) + sa*sx(i+2) + sy(i+3) = sy(i+3) + sa*sx(i+3) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + sy(iy) = sy(iy) + sa*sx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_saxpy + + !> SCASUM: takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and + !> returns a single precision result. + + pure real(sp) function stdlib_scasum(n,cx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(in) :: cx(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: stemp + integer(ilp) :: i, nincx + ! Intrinsic Functions + intrinsic :: abs,aimag,real + stdlib_scasum = zero + stemp = zero + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + stemp = stemp + abs(real(cx(i),KIND=sp)) + abs(aimag(cx(i))) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + stemp = stemp + abs(real(cx(i),KIND=sp)) + abs(aimag(cx(i))) + end do + end if + stdlib_scasum = stemp + return + end function stdlib_scasum + + !> ! + !> + !> SCNRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> SCNRM2 := sqrt( x**H*x ) + + pure function stdlib_scnrm2( n, x, incx ) + real(sp) :: stdlib_scnrm2 + ! -- reference blas level1 routine (version 3.9.1_sp) -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! march 2021 + ! Constants + integer, parameter :: wp = kind(1._sp) + real(sp), parameter :: maxn = huge(0.0_sp) + ! .. blue's scaling constants .. + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(sp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin + ! quick return if possible + stdlib_scnrm2 = zero + if( n <= 0 ) return + scl = one + sumsq = zero + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(real(x(ix),KIND=sp)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ax = abs(aimag(x(ix))) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range + scl = one + sumsq = amed + end if + stdlib_scnrm2 = scl*sqrt( sumsq ) + return + end function stdlib_scnrm2 + + !> SCOPY: copies a vector, x, to a vector, y. + !> uses unrolled loops for increments equal to 1. + + pure subroutine stdlib_scopy(n,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(in) :: sx(*) + real(sp), intent(out) :: sy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,7) + if (m/=0) then + do i = 1,m + sy(i) = sx(i) + end do + if (n<7) return + end if + mp1 = m + 1 + do i = mp1,n,7 + sy(i) = sx(i) + sy(i+1) = sx(i+1) + sy(i+2) = sx(i+2) + sy(i+3) = sx(i+3) + sy(i+4) = sx(i+4) + sy(i+5) = sx(i+5) + sy(i+6) = sx(i+6) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + sy(iy) = sx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_scopy + + !> SDOT: forms the dot product of two vectors. + !> uses unrolled loops for increments equal to one. + + pure real(sp) function stdlib_sdot(n,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(in) :: sx(*), sy(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: stemp + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + stemp = zero + stdlib_sdot = zero + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,5) + if (m/=0) then + do i = 1,m + stemp = stemp + sx(i)*sy(i) + end do + if (n<5) then + stdlib_sdot=stemp + return + end if + end if + mp1 = m + 1 + do i = mp1,n,5 + stemp = stemp + sx(i)*sy(i) + sx(i+1)*sy(i+1) +sx(i+2)*sy(i+2) + sx(i+3)*sy(i+3) + & + sx(i+4)*sy(i+4) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + stemp = stemp + sx(ix)*sy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_sdot = stemp + return + end function stdlib_sdot + + !> Compute the inner product of two vectors with extended + !> precision accumulation. + !> Returns S.P. result with dot product accumulated in D.P. + !> SDSDOT: = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), + !> where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, and LY is + !> defined in a similar way using INCY. + + pure real(sp) function stdlib_sdsdot(n,sb,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: sb + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(in) :: sx(*), sy(*) + ! Local Scalars + real(dp) :: dsdot + integer(ilp) :: i, kx, ky, ns + ! Intrinsic Functions + intrinsic :: real + dsdot = sb + if (n<=0) then + stdlib_sdsdot = dsdot + return + end if + if (incx==incy .and. incx>0) then + ! code for equal and positive increments. + ns = n*incx + do i = 1,ns,incx + dsdot = dsdot + real(sx(i),KIND=sp)*real(sy(i),KIND=sp) + end do + else + ! code for unequal or nonpositive increments. + kx = 1 + ky = 1 + if (incx<0) kx = 1 + (1-n)*incx + if (incy<0) ky = 1 + (1-n)*incy + do i = 1,n + dsdot = dsdot + real(sx(kx),KIND=sp)*real(sy(ky),KIND=sp) + kx = kx + incx + ky = ky + incy + end do + end if + stdlib_sdsdot = dsdot + return + end function stdlib_sdsdot + + !> SGBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + pure subroutine stdlib_sgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, kl, ku, lda, m, n + character, intent(in) :: trans + ! Array Arguments + real(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kup1, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (kl<0) then + info = 4 + else if (ku<0) then + info = 5 + else if (lda< (kl+ku+1)) then + info = 8 + else if (incx==0) then + info = 10 + else if (incy==0) then + info = 13 + end if + if (info/=0) then + call stdlib_xerbla('SGBMV ',info) + return + end if + ! quick return if possible. + if ((m==0) .or. (n==0) .or.((alpha==zero).and. (beta==one))) return + ! set lenx and leny, the lengths of the vectors x and y, and set + ! up the start points in x and y. + if (stdlib_lsame(trans,'N')) then + lenx = n + leny = m + else + lenx = m + leny = n + end if + if (incx>0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the band part of a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,leny + y(i) = zero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,leny + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + kup1 = ku + 1 + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(i) = y(i) + temp*a(k+i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(iy) = y(iy) + temp*a(k+i,j) + iy = iy + incy + end do + jx = jx + incx + if (j>ku) ky = ky + incy + end do + end if + else + ! form y := alpha*a**t*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = zero + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(i) + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = zero + ix = kx + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(ix) + ix = ix + incx + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + if (j>ku) kx = kx + incx + end do + end if + end if + return + end subroutine stdlib_sgbmv + + !> SGEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + pure subroutine stdlib_sgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, m, n + character, intent(in) :: transa, transb + ! Array Arguments + real(sp), intent(in) :: a(lda,*), b(ldb,*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, j, l, nrowa, nrowb + logical(lk) :: nota, notb + + ! set nota and notb as true if a and b respectively are not + ! transposed and set nrowa and nrowb as the number of rows of a + ! and b respectively. + nota = stdlib_lsame(transa,'N') + notb = stdlib_lsame(transb,'N') + if (nota) then + nrowa = m + else + nrowa = k + end if + if (notb) then + nrowb = k + else + nrowb = n + end if + ! test the input parameters. + info = 0 + if ((.not.nota) .and. (.not.stdlib_lsame(transa,'C')) .and.(.not.stdlib_lsame(transa,& + 'T'))) then + info = 1 + else if ((.not.notb) .and. (.not.stdlib_lsame(transb,'C')) .and.(.not.stdlib_lsame(& + transb,'T'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda SGEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + + pure subroutine stdlib_sgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + character, intent(in) :: trans + ! Array Arguments + real(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (lda0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,leny + y(i) = zero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,leny + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + do i = 1,m + y(i) = y(i) + temp*a(i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + do i = 1,m + y(iy) = y(iy) + temp*a(i,j) + iy = iy + incy + end do + jx = jx + incx + end do + end if + else + ! form y := alpha*a**t*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = zero + do i = 1,m + temp = temp + a(i,j)*x(i) + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = zero + ix = kx + do i = 1,m + temp = temp + a(i,j)*x(ix) + ix = ix + incx + end do + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_sgemv + + !> SGER: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_sger(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=zero) then + temp = alpha*y(jy) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=zero) then + temp = alpha*y(jy) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_sger + + !> ! + !> + !> SNRM2: returns the euclidean norm of a vector via the function + !> name, so that + !> SNRM2 := sqrt( x'*x ). + + pure function stdlib_snrm2( n, x, incx ) + real(sp) :: stdlib_snrm2 + ! -- reference blas level1 routine (version 3.9.1_sp) -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! march 2021 + ! Constants + integer, parameter :: wp = kind(1._sp) + real(sp), parameter :: maxn = huge(0.0_sp) + ! .. blue's scaling constants .. + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(sp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(sp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin + ! quick return if possible + stdlib_snrm2 = zero + if( n <= 0 ) return + scl = one + sumsq = zero + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(x(ix)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if ( (amed > zero) .or. (amed > maxn) .or. (amed /= amed) ) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range + scl = one + sumsq = amed + end if + stdlib_snrm2 = scl*sqrt( sumsq ) + return + end function stdlib_snrm2 + + !> applies a plane rotation. + + pure subroutine stdlib_srot(n,sx,incx,sy,incy,c,s) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: c, s + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(inout) :: sx(*), sy(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: stemp + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + stemp = c*sx(i) + s*sy(i) + sy(i) = c*sy(i) - s*sx(i) + sx(i) = stemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + stemp = c*sx(ix) + s*sy(iy) + sy(iy) = c*sy(iy) - s*sx(ix) + sx(ix) = stemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_srot + + !> ! + !> + !> The computation uses the formulas + !> sigma = sgn(a) if |a| > |b| + !> = sgn(b) if |b| >= |a| + !> r = sigma*sqrt( a**2 + b**2 ) + !> c = 1; s = 0 if r = 0 + !> c = a/r; s = b/r if r != 0 + !> The subroutine also computes + !> z = s if |a| > |b|, + !> = 1/c if |b| >= |a| and c != 0 + !> = 1 if c = 0 + !> This allows c and s to be reconstructed from z as follows: + !> If z = 1, set c = 0, s = 1. + !> If |z| < 1, set c = sqrt(1 - z**2) and s = z. + !> If |z| > 1, set c = 1/z and s = sqrt( 1 - c**2). + + pure subroutine stdlib_srotg( a, b, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Constants + integer, parameter :: wp = kind(1._sp) + ! Scaling Constants + ! Scalar Arguments + real(sp), intent(inout) :: a, b + real(sp), intent(out) :: c, s + ! Local Scalars + real(sp) :: anorm, bnorm, scl, sigma, r, z + anorm = abs(a) + bnorm = abs(b) + if( bnorm == zero ) then + c = one + s = zero + b = zero + else if( anorm == zero ) then + c = zero + s = one + a = b + b = one + else + scl = min( safmax, max( safmin, anorm, bnorm ) ) + if( anorm > bnorm ) then + sigma = sign(one,a) + else + sigma = sign(one,b) + end if + r = sigma*( scl*sqrt((a/scl)**2 + (b/scl)**2) ) + c = a/r + s = b/r + if( anorm > bnorm ) then + z = s + else if( c /= zero ) then + z = one/c + else + z = one + end if + a = r + b = z + end if + return + end subroutine stdlib_srotg + + !> APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX + !> (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN + !> (SX**T) + !> SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX >= 0, ELSE + !> LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. + !> WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> SFLAG=-1._sp SFLAG=0._sp SFLAG=1._sp SFLAG=-2.E0 + !> (SH11 SH12) (1._sp SH12) (SH11 1._sp) (1._sp 0._sp) + !> H=( ) ( ) ( ) ( ) + !> (SH21 SH22), (SH21 1._sp), (-1._sp SH22), (0._sp 1._sp). + !> SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. + + pure subroutine stdlib_srotm(n,sx,incx,sy,incy,sparam) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(in) :: sparam(5) + real(sp), intent(inout) :: sx(*), sy(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: sflag, sh11, sh12, sh21, sh22, two, w, z, zero + integer(ilp) :: i, kx, ky, nsteps + ! Data Statements + zero = 0.0_sp + two = 2.0_sp + sflag = sparam(1) + if (n<=0 .or. (sflag+two==zero)) return + if (incx==incy.and.incx>0) then + nsteps = n*incx + if (sflag CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS + !> THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2) SY2)**T. + !> WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. + !> SFLAG=-1._sp SFLAG=0._sp SFLAG=1._sp SFLAG=-2.E0 + !> (SH11 SH12) (1._sp SH12) (SH11 1._sp) (1._sp 0._sp) + !> H=( ) ( ) ( ) ( ) + !> (SH21 SH22), (SH21 1._sp), (-1._sp SH22), (0._sp 1._sp). + !> LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 + !> RESPECTIVELY. (VALUES OF 1._sp, -1._sp, OR 0._sp IMPLIED BY THE + !> VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) + !> THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE + !> INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE + !> OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. + + pure subroutine stdlib_srotmg(sd1,sd2,sx1,sy1,sparam) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(inout) :: sd1, sd2, sx1 + real(sp), intent(in) :: sy1 + ! Array Arguments + real(sp), intent(out) :: sparam(5) + ! ===================================================================== + ! Local Scalars + real(sp) :: gam, gamsq, one, rgamsq, sflag, sh11, sh12, sh21, sh22, sp1, sp2, sq1, sq2,& + stemp, su, two, zero + ! Intrinsic Functions + intrinsic :: abs + ! Data Statements + zero = 0.0_sp + one = 1.0_sp + two = 2.0_sp + gam = 4096.0_sp + gamsq = 1.67772e7_sp + rgamsq = 5.96046e-8_sp + if (sd1abs(sq2)) then + sh21 = -sy1/sx1 + sh12 = sp2/sp1 + su = one - sh12*sh21 + if (su>zero) then + sflag = zero + sd1 = sd1/su + sd2 = sd2/su + sx1 = sx1*su + else + ! this code path if here for safety. we do not expect this + ! condition to ever hold except in edge cases with rounding + ! errors. see doi: 10.1145/355841.355847 + sflag = -one + sh11 = zero + sh12 = zero + sh21 = zero + sh22 = zero + sd1 = zero + sd2 = zero + sx1 = zero + end if + else + if (sq2=gamsq)) + if (sflag==zero) then + sh11 = one + sh22 = one + sflag = -one + else + sh21 = -one + sh12 = one + sflag = -one + end if + if (sd1<=rgamsq) then + sd1 = sd1*gam**2 + sx1 = sx1/gam + sh11 = sh11/gam + sh12 = sh12/gam + else + sd1 = sd1/gam**2 + sx1 = sx1*gam + sh11 = sh11*gam + sh12 = sh12*gam + end if + enddo + end if + if (sd2/=zero) then + do while ( (abs(sd2)<=rgamsq) .or. (abs(sd2)>=gamsq) ) + if (sflag==zero) then + sh11 = one + sh22 = one + sflag = -one + else + sh21 = -one + sh12 = one + sflag = -one + end if + if (abs(sd2)<=rgamsq) then + sd2 = sd2*gam**2 + sh21 = sh21/gam + sh22 = sh22/gam + else + sd2 = sd2/gam**2 + sh21 = sh21*gam + sh22 = sh22*gam + end if + end do + end if + end if + if (sflag SSBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric band matrix, with k super-diagonals. + + pure subroutine stdlib_ssbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, k, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kplus1, kx, ky, l + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (k<0) then + info = 3 + else if (lda< (k+1)) then + info = 6 + else if (incx==0) then + info = 8 + else if (incy==0) then + info = 11 + end if + if (info/=0) then + call stdlib_xerbla('SSBMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==zero).and. (beta==one))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array a + ! are accessed sequentially with one pass through a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when upper triangle of a is stored. + kplus1 = k + 1 + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(i) + end do + y(j) = y(j) + temp1*a(kplus1,j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*a(kplus1,j) + alpha*temp2 + jx = jx + incx + jy = jy + incy + if (j>k) then + kx = kx + incx + ky = ky + incy + end if + end do + end if + else + ! form y when lower triangle of a is stored. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*a(1,j) + l = 1 - j + do i = j + 1,min(n,j+k) + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*a(1,j) + l = 1 - j + ix = jx + iy = jy + do i = j + 1,min(n,j+k) + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + a(l+i,j)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_ssbmv + + !> SSCAL: scales a vector by a constant. + !> uses unrolled loops for increment equal to 1. + + pure subroutine stdlib_sscal(n,sa,sx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: sa + integer(ilp), intent(in) :: incx, n + ! Array Arguments + real(sp), intent(inout) :: sx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, m, mp1, nincx + ! Intrinsic Functions + intrinsic :: mod + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + ! clean-up loop + m = mod(n,5) + if (m/=0) then + do i = 1,m + sx(i) = sa*sx(i) + end do + if (n<5) return + end if + mp1 = m + 1 + do i = mp1,n,5 + sx(i) = sa*sx(i) + sx(i+1) = sa*sx(i+1) + sx(i+2) = sa*sx(i+2) + sx(i+3) = sa*sx(i+3) + sx(i+4) = sa*sx(i+4) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + sx(i) = sa*sx(i) + end do + end if + return + end subroutine stdlib_sscal + + !> SSPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_sspmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(in) :: ap(*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 6 + else if (incy==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('SSPMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==zero).and. (beta==one))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form y when ap contains the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + k = kk + do i = 1,j - 1 + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(i) + k = k + 1 + end do + y(j) = y(j) + temp1*ap(kk+j-1) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + do k = kk,kk + j - 2 + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*ap(kk+j-1) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*ap(kk) + k = kk + 1 + do i = j + 1,n + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(i) + k = k + 1 + end do + y(j) = y(j) + alpha*temp2 + kk = kk + (n-j+1) + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*ap(kk) + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + ap(k)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + (n-j+1) + end do + end if + end if + return + end subroutine stdlib_sspmv + + !> SSPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_sspr(uplo,n,alpha,x,incx,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(inout) :: ap(*) + real(sp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + end if + if (info/=0) then + call stdlib_xerbla('SSPR ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==zero)) return + ! set the start point in x if the increment is not unity. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = alpha*x(j) + k = kk + do i = 1,j + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = alpha*x(jx) + ix = kx + do k = kk,kk + j - 1 + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = alpha*x(j) + k = kk + do i = j,n + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = alpha*x(jx) + ix = jx + do k = kk,kk + n - j + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_sspr + + !> SSPR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_sspr2(uplo,n,alpha,x,incx,y,incy,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(inout) :: ap(*) + real(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('SSPR2 ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==zero)) return + ! set up the start points in x and y if the increments are not both + ! unity. + if ((incx/=1) .or. (incy/=1)) then + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with one pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + k = kk + do i = 1,j + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + end if + kk = kk + j + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = kx + iy = ky + do k = kk,kk + j - 1 + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + k = kk + do i = j,n + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + end if + kk = kk + n - j + 1 + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = jx + iy = jy + do k = kk,kk + n - j + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_sspr2 + + !> SSWAP: interchanges two vectors. + !> uses unrolled loops for increments equal to 1. + + pure subroutine stdlib_sswap(n,sx,incx,sy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + real(sp), intent(inout) :: sx(*), sy(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: stemp + integer(ilp) :: i, ix, iy, m, mp1 + ! Intrinsic Functions + intrinsic :: mod + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + ! clean-up loop + m = mod(n,3) + if (m/=0) then + do i = 1,m + stemp = sx(i) + sx(i) = sy(i) + sy(i) = stemp + end do + if (n<3) return + end if + mp1 = m + 1 + do i = mp1,n,3 + stemp = sx(i) + sx(i) = sy(i) + sy(i) = stemp + stemp = sx(i+1) + sx(i+1) = sy(i+1) + sy(i+1) = stemp + stemp = sx(i+2) + sx(i+2) = sy(i+2) + sy(i+2) = stemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + stemp = sx(ix) + sx(ix) = sy(iy) + sy(iy) = stemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_sswap + + !> SSYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_ssymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*), b(ldb,*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda SSYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + + pure subroutine stdlib_ssymv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the triangular part + ! of a. + ! first form y := beta*y. + if (beta/=one) then + if (incy==1) then + if (beta==zero) then + do i = 1,n + y(i) = zero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==zero) then + do i = 1,n + y(iy) = zero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==zero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when a is stored in upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + do i = 1,j - 1 + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(i) + end do + y(j) = y(j) + temp1*a(j,j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + ix = kx + iy = ky + do i = 1,j - 1 + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*a(j,j) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = zero + y(j) = y(j) + temp1*a(j,j) + do i = j + 1,n + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = zero + y(jy) = y(jy) + temp1*a(j,j) + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + a(i,j)*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_ssymv + + !> SSYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**T + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + + pure subroutine stdlib_ssyr(uplo,n,alpha,x,incx,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (lda SSYR2: performs the symmetric rank 2 operation + !> A := alpha*x*y**T + alpha*y*x**T + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n symmetric matrix. + + pure subroutine stdlib_ssyr2(uplo,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through the triangular part + ! of a. + if (stdlib_lsame(uplo,'U')) then + ! form a when a is stored in the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + do i = 1,j + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + end if + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = kx + iy = ky + do i = 1,j + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form a when a is stored in the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=zero) .or. (y(j)/=zero)) then + temp1 = alpha*y(j) + temp2 = alpha*x(j) + do i = j,n + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + end if + end do + else + do j = 1,n + if ((x(jx)/=zero) .or. (y(jy)/=zero)) then + temp1 = alpha*y(jy) + temp2 = alpha*x(jx) + ix = jx + iy = jy + do i = j,n + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + end if + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_ssyr2 + + !> SSYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + + pure subroutine stdlib_ssyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*), b(ldb,*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(sp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T')) .and.(& + .not.stdlib_lsame(trans,'C'))) then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda SSYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + + pure subroutine stdlib_ssyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T')) .and.(& + .not.stdlib_lsame(trans,'C'))) then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda STBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + pure subroutine stdlib_stbmv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('STBMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with one pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := a*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(kplus1,j) + end if + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*a(kplus1,j) + end if + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(1,j) + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*a(1,j) + end if + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + else + ! form x := a**t*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = kplus1 - j + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(i) + end do + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + kx = kx - incx + ix = kx + l = kplus1 - j + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(ix) + ix = ix - incx + end do + x(jx) = temp + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + temp = x(j) + l = 1 - j + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(i) + end do + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + kx = kx + incx + ix = kx + l = 1 - j + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(ix) + ix = ix + incx + end do + x(jx) = temp + jx = jx + incx + end do + end if + end if + end if + return + end subroutine stdlib_stbmv + + !> STBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_stbsv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('STBSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed by sequentially with one pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + l = kplus1 - j + if (nounit) x(j) = x(j)/a(kplus1,j) + temp = x(j) + do i = j - 1,max(1,j-k),-1 + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + kx = kx - incx + if (x(jx)/=zero) then + ix = kx + l = kplus1 - j + if (nounit) x(jx) = x(jx)/a(kplus1,j) + temp = x(jx) + do i = j - 1,max(1,j-k),-1 + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix - incx + end do + end if + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + l = 1 - j + if (nounit) x(j) = x(j)/a(1,j) + temp = x(j) + do i = j + 1,min(n,j+k) + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + jx = kx + do j = 1,n + kx = kx + incx + if (x(jx)/=zero) then + ix = kx + l = 1 - j + if (nounit) x(jx) = x(jx)/a(1,j) + temp = x(jx) + do i = j + 1,min(n,j+k) + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix + incx + end do + end if + jx = jx + incx + end do + end if + end if + else + ! form x := inv( a**t)*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(kplus1,j) + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/a(kplus1,j) + x(jx) = temp + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(1,j) + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/a(1,j) + x(jx) = temp + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + end if + return + end subroutine stdlib_stbsv + + !> STPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + + pure subroutine stdlib_stpmv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: nounit + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('STPMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with one pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x:= a*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + temp = x(j) + k = kk + do i = 1,j - 1 + x(i) = x(i) + temp*ap(k) + k = k + 1 + end do + if (nounit) x(j) = x(j)*ap(kk+j-1) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + x(ix) = x(ix) + temp*ap(k) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*ap(kk+j-1) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + temp = x(j) + k = kk + do i = n,j + 1,-1 + x(i) = x(i) + temp*ap(k) + k = k - 1 + end do + if (nounit) x(j) = x(j)*ap(kk-n+j) + end if + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=zero) then + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + x(ix) = x(ix) + temp*ap(k) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*ap(kk-n+j) + end if + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + else + ! form x := a**t*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + if (nounit) temp = temp*ap(kk) + k = kk - 1 + do i = j - 1,1,-1 + temp = temp + ap(k)*x(i) + k = k - 1 + end do + x(j) = temp + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + temp = x(jx) + ix = jx + if (nounit) temp = temp*ap(kk) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + ap(k)*x(ix) + end do + x(jx) = temp + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + if (nounit) temp = temp*ap(kk) + k = kk + 1 + do i = j + 1,n + temp = temp + ap(k)*x(i) + k = k + 1 + end do + x(j) = temp + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = jx + if (nounit) temp = temp*ap(kk) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + ap(k)*x(ix) + end do + x(jx) = temp + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_stpmv + + !> STPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_stpsv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: nounit + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('STPSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with one pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=zero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk - 1 + do i = j - 1,1,-1 + x(i) = x(i) - temp*ap(k) + k = k - 1 + end do + end if + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + if (x(jx)/=zero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=zero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk + 1 + do i = j + 1,n + x(i) = x(i) - temp*ap(k) + k = k + 1 + end do + end if + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + if (x(jx)/=zero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + else + ! form x := inv( a**t )*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + do i = 1,j - 1 + temp = temp - ap(k)*x(i) + k = k + 1 + end do + if (nounit) temp = temp/ap(kk+j-1) + x(j) = temp + kk = kk + j + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + temp = temp - ap(k)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/ap(kk+j-1) + x(jx) = temp + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk + do i = n,j + 1,-1 + temp = temp - ap(k)*x(i) + k = k - 1 + end do + if (nounit) temp = temp/ap(kk-n+j) + x(j) = temp + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + temp = temp - ap(k)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/ap(kk-n+j) + x(jx) = temp + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_stpsv + + !> STRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ), + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + + pure subroutine stdlib_strmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, nounit, upper + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda STRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + + pure subroutine stdlib_strmv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda STRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_strsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, nounit, upper + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda STRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_strsv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(qp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(qp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(qp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(qp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> ZAXPY: constant times a vector plus a vector. + + pure subroutine stdlib_waxpy(n,za,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: za + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(qp), intent(in) :: zx(*) + complex(qp), intent(inout) :: zy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + if (n<=0) return + if (stdlib_qcabs1(za)==0.0_qp) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + zy(i) = zy(i) + za*zx(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + zy(iy) = zy(iy) + za*zx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_waxpy + + !> ZCOPY: copies a vector, x, to a vector, y. + + pure subroutine stdlib_wcopy(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(qp), intent(in) :: zx(*) + complex(qp), intent(out) :: zy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + zy(i) = zx(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + zy(iy) = zx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_wcopy + + !> ZDOTC: forms the dot product of two complex vectors + !> ZDOTC = X^H * Y + + pure complex(qp) function stdlib_wdotc(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(qp), intent(in) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + complex(qp) :: ztemp + integer(ilp) :: i, ix, iy + ! Intrinsic Functions + intrinsic :: conjg + ztemp = (0.0_qp,0.0_qp) + stdlib_wdotc = (0.0_qp,0.0_qp) + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ztemp = ztemp + conjg(zx(i))*zy(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ztemp = ztemp + conjg(zx(ix))*zy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_wdotc = ztemp + return + end function stdlib_wdotc + + !> ZDOTU: forms the dot product of two complex vectors + !> ZDOTU = X^T * Y + + pure complex(qp) function stdlib_wdotu(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(qp), intent(in) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + complex(qp) :: ztemp + integer(ilp) :: i, ix, iy + ztemp = (0.0_qp,0.0_qp) + stdlib_wdotu = (0.0_qp,0.0_qp) + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ztemp = ztemp + zx(i)*zy(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ztemp = ztemp + zx(ix)*zy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_wdotu = ztemp + return + end function stdlib_wdotu + + !> Applies a plane rotation, where the cos and sin (c and s) are real + !> and the vectors cx and cy are complex. + !> jack dongarra, linpack, 3/11/78. + + pure subroutine stdlib_wdrot( n, zx, incx, zy, incy, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(qp), intent(in) :: c, s + ! Array Arguments + complex(qp), intent(inout) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(qp) :: ctemp + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 ) then + ! code for both increments equal to 1 + do i = 1, n + ctemp = c*zx( i ) + s*zy( i ) + zy( i ) = c*zy( i ) - s*zx( i ) + zx( i ) = ctemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + ctemp = c*zx( ix ) + s*zy( iy ) + zy( iy ) = c*zy( iy ) - s*zx( ix ) + zx( ix ) = ctemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_wdrot + + !> ZDSCAL: scales a vector by a constant. + + pure subroutine stdlib_wdscal(n,da,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: da + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(inout) :: zx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + ! Intrinsic Functions + intrinsic :: cmplx + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + zx(i) = cmplx(da,0.0_qp,KIND=qp)*zx(i) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + zx(i) = cmplx(da,0.0_qp,KIND=qp)*zx(i) + end do + end if + return + end subroutine stdlib_wdscal + + !> ZGBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + pure subroutine stdlib_wgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, kl, ku, lda, m, n + character, intent(in) :: trans + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kup1, kx, ky, lenx, leny + logical(lk) :: noconj + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (kl<0) then + info = 4 + else if (ku<0) then + info = 5 + else if (lda< (kl+ku+1)) then + info = 8 + else if (incx==0) then + info = 10 + else if (incy==0) then + info = 13 + end if + if (info/=0) then + call stdlib_xerbla('ZGBMV ',info) + return + end if + ! quick return if possible. + if ((m==0) .or. (n==0) .or.((alpha==czero).and. (beta==cone))) return + noconj = stdlib_lsame(trans,'T') + ! set lenx and leny, the lengths of the vectors x and y, and set + ! up the start points in x and y. + if (stdlib_lsame(trans,'N')) then + lenx = n + leny = m + else + lenx = m + leny = n + end if + if (incx>0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the band part of a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,leny + y(i) = czero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,leny + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + kup1 = ku + 1 + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(i) = y(i) + temp*a(k+i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(iy) = y(iy) + temp*a(k+i,j) + iy = iy + incy + end do + jx = jx + incx + if (j>ku) ky = ky + incy + end do + end if + else + ! form y := alpha*a**t*x + y or y := alpha*a**h*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = czero + k = kup1 - j + if (noconj) then + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(i) + end do + else + do i = max(1,j-ku),min(m,j+kl) + temp = temp + conjg(a(k+i,j))*x(i) + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = czero + ix = kx + k = kup1 - j + if (noconj) then + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(ix) + ix = ix + incx + end do + else + do i = max(1,j-ku),min(m,j+kl) + temp = temp + conjg(a(k+i,j))*x(ix) + ix = ix + incx + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + if (j>ku) kx = kx + incx + end do + end if + end if + return + end subroutine stdlib_wgbmv + + !> ZGEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T or op( X ) = X**H, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + pure subroutine stdlib_wgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, m, n + character, intent(in) :: transa, transb + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, j, l, nrowa, nrowb + logical(lk) :: conja, conjb, nota, notb + + + ! set nota and notb as true if a and b respectively are not + ! conjugated or transposed, set conja and conjb as true if a and + ! b respectively are to be transposed but not conjugated and set + ! nrowa and nrowb as the number of rows of a and b respectively. + nota = stdlib_lsame(transa,'N') + notb = stdlib_lsame(transb,'N') + conja = stdlib_lsame(transa,'C') + conjb = stdlib_lsame(transb,'C') + if (nota) then + nrowa = m + else + nrowa = k + end if + if (notb) then + nrowb = k + else + nrowb = n + end if + ! test the input parameters. + info = 0 + if ((.not.nota) .and. (.not.conja) .and.(.not.stdlib_lsame(transa,'T'))) then + info = 1 + else if ((.not.notb) .and. (.not.conjb) .and.(.not.stdlib_lsame(transb,'T'))) & + then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda ZGEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + + pure subroutine stdlib_wgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + character, intent(in) :: trans + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky, lenx, leny + logical(lk) :: noconj + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (lda0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,leny + y(i) = czero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,leny + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + do i = 1,m + y(i) = y(i) + temp*a(i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + do i = 1,m + y(iy) = y(iy) + temp*a(i,j) + iy = iy + incy + end do + jx = jx + incx + end do + end if + else + ! form y := alpha*a**t*x + y or y := alpha*a**h*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = czero + if (noconj) then + do i = 1,m + temp = temp + a(i,j)*x(i) + end do + else + do i = 1,m + temp = temp + conjg(a(i,j))*x(i) + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = czero + ix = kx + if (noconj) then + do i = 1,m + temp = temp + a(i,j)*x(ix) + ix = ix + incx + end do + else + do i = 1,m + temp = temp + conjg(a(i,j))*x(ix) + ix = ix + incx + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_wgemv + + !> ZGERC: performs the rank 1 operation + !> A := alpha*x*y**H + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_wgerc(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*conjg(y(jy)) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*conjg(y(jy)) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_wgerc + + !> ZGERU: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_wgeru(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*y(jy) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*y(jy) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_wgeru + + !> ZHBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian band matrix, with k super-diagonals. + + pure subroutine stdlib_whbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, k, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kplus1, kx, ky, l + ! Intrinsic Functions + intrinsic :: real,conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (k<0) then + info = 3 + else if (lda< (k+1)) then + info = 6 + else if (incx==0) then + info = 8 + else if (incy==0) then + info = 11 + end if + if (info/=0) then + call stdlib_xerbla('ZHBMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==czero).and. (beta==cone))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array a + ! are accessed sequentially with cone pass through a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when upper triangle of a is stored. + kplus1 = k + 1 + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(i) + end do + y(j) = y(j) + temp1*real(a(kplus1,j),KIND=qp) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(a(kplus1,j),KIND=qp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + if (j>k) then + kx = kx + incx + ky = ky + incy + end if + end do + end if + else + ! form y when lower triangle of a is stored. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(a(1,j),KIND=qp) + l = 1 - j + do i = j + 1,min(n,j+k) + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(a(1,j),KIND=qp) + l = 1 - j + ix = jx + iy = jy + do i = j + 1,min(n,j+k) + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_whbmv + + !> ZHEMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is an hermitian matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_whemm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda ZHEMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix. + + pure subroutine stdlib_whemv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when a is stored in upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + do i = 1,j - 1 + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(i) + end do + y(j) = y(j) + temp1*real(a(j,j),KIND=qp) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + do i = 1,j - 1 + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(a(j,j),KIND=qp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(a(j,j),KIND=qp) + do i = j + 1,n + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(a(j,j),KIND=qp) + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_whemv + + !> ZHER: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix. + + pure subroutine stdlib_wher(uplo,n,alpha,x,incx,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (lda ZHER2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n hermitian matrix. + + pure subroutine stdlib_wher2(uplo,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + if (stdlib_lsame(uplo,'U')) then + ! form a when a is stored in the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + do i = 1,j - 1 + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + a(j,j) = real(a(j,j),KIND=qp) +real(x(j)*temp1+y(j)*temp2,KIND=qp) + + else + a(j,j) = real(a(j,j),KIND=qp) + end if + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ix = kx + iy = ky + do i = 1,j - 1 + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + a(j,j) = real(a(j,j),KIND=qp) +real(x(jx)*temp1+y(jy)*temp2,KIND=qp) + + else + a(j,j) = real(a(j,j),KIND=qp) + end if + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form a when a is stored in the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + a(j,j) = real(a(j,j),KIND=qp) +real(x(j)*temp1+y(j)*temp2,KIND=qp) + + do i = j + 1,n + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + else + a(j,j) = real(a(j,j),KIND=qp) + end if + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + a(j,j) = real(a(j,j),KIND=qp) +real(x(jx)*temp1+y(jy)*temp2,KIND=qp) + + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + end do + else + a(j,j) = real(a(j,j),KIND=qp) + end if + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_wher2 + + !> ZHER2K: performs one of the hermitian rank 2k operations + !> C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, + !> or + !> C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, + !> where alpha and beta are scalars with beta real, C is an n by n + !> hermitian matrix and A and B are n by k matrices in the first case + !> and k by n matrices in the second case. + + pure subroutine stdlib_wher2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + real(qp), intent(in) :: beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'C'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZHERK: performs one of the hermitian rank k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n by n hermitian + !> matrix and A is an n by k matrix in the first case and a k by n + !> matrix in the second case. + + pure subroutine stdlib_wherk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,max + ! Local Scalars + complex(qp) :: temp + real(qp) :: rtemp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'C'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZHPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_whpmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(in) :: ap(*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 6 + else if (incy==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('ZHPMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==czero).and. (beta==cone))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form y when ap contains the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + k = kk + do i = 1,j - 1 + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(i) + k = k + 1 + end do + y(j) = y(j) + temp1*real(ap(kk+j-1),KIND=qp) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + do k = kk,kk + j - 2 + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(ap(kk+j-1),KIND=qp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(ap(kk),KIND=qp) + k = kk + 1 + do i = j + 1,n + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(i) + k = k + 1 + end do + y(j) = y(j) + alpha*temp2 + kk = kk + (n-j+1) + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(ap(kk),KIND=qp) + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + (n-j+1) + end do + end if + end if + return + end subroutine stdlib_whpmv + + !> ZHPR: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_whpr(uplo,n,alpha,x,incx,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + ! Intrinsic Functions + intrinsic :: real,conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + end if + if (info/=0) then + call stdlib_xerbla('ZHPR ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==real(czero,KIND=qp))) return + ! set the start point in x if the increment is not unity. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = alpha*conjg(x(j)) + k = kk + do i = 1,j - 1 + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) + real(x(j)*temp,KIND=qp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = alpha*conjg(x(jx)) + ix = kx + do k = kk,kk + j - 2 + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) + real(x(jx)*temp,KIND=qp) + + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = alpha*conjg(x(j)) + ap(kk) = real(ap(kk),KIND=qp) + real(temp*x(j),KIND=qp) + k = kk + 1 + do i = j + 1,n + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + else + ap(kk) = real(ap(kk),KIND=qp) + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = alpha*conjg(x(jx)) + ap(kk) = real(ap(kk),KIND=qp) + real(temp*x(jx),KIND=qp) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + ap(k) = ap(k) + x(ix)*temp + end do + else + ap(kk) = real(ap(kk),KIND=qp) + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_whpr + + !> ZHPR2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_whpr2(uplo,n,alpha,x,incx,y,incy,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('ZHPR2 ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==czero)) return + ! set up the start points in x and y if the increments are not both + ! unity. + if ((incx/=1) .or. (incy/=1)) then + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + k = kk + do i = 1,j - 1 + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) +real(x(j)*temp1+y(j)*temp2,& + KIND=qp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) + end if + kk = kk + j + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ix = kx + iy = ky + do k = kk,kk + j - 2 + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) +real(x(jx)*temp1+y(jy)*temp2,& + KIND=qp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=qp) + end if + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + ap(kk) = real(ap(kk),KIND=qp) +real(x(j)*temp1+y(j)*temp2,KIND=qp) + + k = kk + 1 + do i = j + 1,n + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + else + ap(kk) = real(ap(kk),KIND=qp) + end if + kk = kk + n - j + 1 + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ap(kk) = real(ap(kk),KIND=qp) +real(x(jx)*temp1+y(jy)*temp2,KIND=qp) + + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + end do + else + ap(kk) = real(ap(kk),KIND=qp) + end if + jx = jx + incx + jy = jy + incy + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_whpr2 + + !> ! + !> + !> The computation uses the formulas + !> |x| = sqrt( Re(x)**2 + Im(x)**2 ) + !> sgn(x) = x / |x| if x /= 0 + !> = 1 if x = 0 + !> c = |a| / sqrt(|a|**2 + |b|**2) + !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) + !> When a and b are real and r /= 0, the formulas simplify to + !> r = sgn(a)*sqrt(|a|**2 + |b|**2) + !> c = a / r + !> s = b / r + !> the same as in DROTG when |a| > |b|. When |b| >= |a|, the + !> sign of c and s will be different from those computed by DROTG + !> if the signs of a and b are not the same. + + pure subroutine stdlib_wrotg( a, b, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Constants + integer, parameter :: wp = kind(1._qp) + ! Scaling Constants + ! Scalar Arguments + real(qp), intent(out) :: c + complex(qp), intent(inout) :: a + complex(qp), intent(in) :: b + complex(qp), intent(out) :: s + ! Local Scalars + real(qp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + complex(qp) :: f, fs, g, gs, r, t + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(qp) :: abssq + ! Statement Function Definitions + abssq( t ) = real( t,KIND=qp)**2 + aimag( t )**2 + ! Executable Statements + f = a + g = b + if( g == czero ) then + c = one + s = czero + r = f + else if( f == czero ) then + c = zero + g1 = max( abs(real(g,KIND=qp)), abs(aimag(g)) ) + if( g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + g2 = abssq( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else + ! use scaled algorithm + u = min( safmax, max( safmin, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if + else + f1 = max( abs(real(f,KIND=qp)), abs(aimag(f)) ) + g1 = max( abs(real(g,KIND=qp)), abs(aimag(g)) ) + if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + f2 = abssq( f ) + g2 = abssq( g ) + h2 = f2 + g2 + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = f2*p + s = conjg( g )*( f*p ) + r = f*( h2*p ) + else + ! use scaled algorithm + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + if( f1*uu < rtmin ) then + ! f is not well-scaled when scaled by g1. + ! use a different scaling for f. + v = min( safmax, max( safmin, f1 ) ) + vv = one / v + w = v * uu + fs = f*vv + f2 = abssq( fs ) + h2 = f2*w**2 + g2 + else + ! otherwise use the same scaling for f and g. + w = one + fs = f*uu + f2 = abssq( fs ) + h2 = f2 + g2 + end if + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = ( f2*p )*w + s = conjg( gs )*( fs*p ) + r = ( fs*( h2*p ) )*u + end if + end if + a = r + return + end subroutine stdlib_wrotg + + !> ZSCAL: scales a vector by a constant. + + pure subroutine stdlib_wscal(n,za,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: za + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(inout) :: zx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + zx(i) = za*zx(i) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + zx(i) = za*zx(i) + end do + end if + return + end subroutine stdlib_wscal + + !> ZSWAP: interchanges two vectors. + + pure subroutine stdlib_wswap(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(qp), intent(inout) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + complex(qp) :: ztemp + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ztemp = zx(i) + zx(i) = zy(i) + zy(i) = ztemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ztemp = zx(ix) + zx(ix) = zy(iy) + zy(iy) = ztemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_wswap + + !> ZSYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_wsymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda ZSYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + + pure subroutine stdlib_wsyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(qp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZSYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + + pure subroutine stdlib_wsyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZTBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + pure subroutine stdlib_wtbmv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('ZTBMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := a*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(kplus1,j) + end if + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*a(kplus1,j) + end if + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(1,j) + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*a(1,j) + end if + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + else + ! form x := a**t*x or x := a**h*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = kplus1 - j + if (noconj) then + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(i) + end do + else + if (nounit) temp = temp*conjg(a(kplus1,j)) + do i = j - 1,max(1,j-k),-1 + temp = temp + conjg(a(l+i,j))*x(i) + end do + end if + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + kx = kx - incx + ix = kx + l = kplus1 - j + if (noconj) then + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(ix) + ix = ix - incx + end do + else + if (nounit) temp = temp*conjg(a(kplus1,j)) + do i = j - 1,max(1,j-k),-1 + temp = temp + conjg(a(l+i,j))*x(ix) + ix = ix - incx + end do + end if + x(jx) = temp + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + temp = x(j) + l = 1 - j + if (noconj) then + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(i) + end do + else + if (nounit) temp = temp*conjg(a(1,j)) + do i = j + 1,min(n,j+k) + temp = temp + conjg(a(l+i,j))*x(i) + end do + end if + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + kx = kx + incx + ix = kx + l = 1 - j + if (noconj) then + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(ix) + ix = ix + incx + end do + else + if (nounit) temp = temp*conjg(a(1,j)) + do i = j + 1,min(n,j+k) + temp = temp + conjg(a(l+i,j))*x(ix) + ix = ix + incx + end do + end if + x(jx) = temp + jx = jx + incx + end do + end if + end if + end if + return + end subroutine stdlib_wtbmv + + !> ZTBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_wtbsv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('ZTBSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed by sequentially with cone pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + l = kplus1 - j + if (nounit) x(j) = x(j)/a(kplus1,j) + temp = x(j) + do i = j - 1,max(1,j-k),-1 + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + kx = kx - incx + if (x(jx)/=czero) then + ix = kx + l = kplus1 - j + if (nounit) x(jx) = x(jx)/a(kplus1,j) + temp = x(jx) + do i = j - 1,max(1,j-k),-1 + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix - incx + end do + end if + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + l = 1 - j + if (nounit) x(j) = x(j)/a(1,j) + temp = x(j) + do i = j + 1,min(n,j+k) + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + jx = kx + do j = 1,n + kx = kx + incx + if (x(jx)/=czero) then + ix = kx + l = 1 - j + if (nounit) x(jx) = x(jx)/a(1,j) + temp = x(jx) + do i = j + 1,min(n,j+k) + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix + incx + end do + end if + jx = jx + incx + end do + end if + end if + else + ! form x := inv( a**t )*x or x := inv( a**h )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + temp = x(j) + l = kplus1 - j + if (noconj) then + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(kplus1,j) + else + do i = max(1,j-k),j - 1 + temp = temp - conjg(a(l+i,j))*x(i) + end do + if (nounit) temp = temp/conjg(a(kplus1,j)) + end if + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + l = kplus1 - j + if (noconj) then + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/a(kplus1,j) + else + do i = max(1,j-k),j - 1 + temp = temp - conjg(a(l+i,j))*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/conjg(a(kplus1,j)) + end if + x(jx) = temp + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = 1 - j + if (noconj) then + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(1,j) + else + do i = min(n,j+k),j + 1,-1 + temp = temp - conjg(a(l+i,j))*x(i) + end do + if (nounit) temp = temp/conjg(a(1,j)) + end if + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + l = 1 - j + if (noconj) then + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/a(1,j) + else + do i = min(n,j+k),j + 1,-1 + temp = temp - conjg(a(l+i,j))*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/conjg(a(1,j)) + end if + x(jx) = temp + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + end if + return + end subroutine stdlib_wtbsv + + !> ZTPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + + pure subroutine stdlib_wtpmv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('ZTPMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with cone pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x:= a*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = x(j) + k = kk + do i = 1,j - 1 + x(i) = x(i) + temp*ap(k) + k = k + 1 + end do + if (nounit) x(j) = x(j)*ap(kk+j-1) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + x(ix) = x(ix) + temp*ap(k) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*ap(kk+j-1) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + temp = x(j) + k = kk + do i = n,j + 1,-1 + x(i) = x(i) + temp*ap(k) + k = k - 1 + end do + if (nounit) x(j) = x(j)*ap(kk-n+j) + end if + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + x(ix) = x(ix) + temp*ap(k) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*ap(kk-n+j) + end if + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + else + ! form x := a**t*x or x := a**h*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk - 1 + if (noconj) then + if (nounit) temp = temp*ap(kk) + do i = j - 1,1,-1 + temp = temp + ap(k)*x(i) + k = k - 1 + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do i = j - 1,1,-1 + temp = temp + conjg(ap(k))*x(i) + k = k - 1 + end do + end if + x(j) = temp + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + temp = x(jx) + ix = jx + if (noconj) then + if (nounit) temp = temp*ap(kk) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + ap(k)*x(ix) + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + conjg(ap(k))*x(ix) + end do + end if + x(jx) = temp + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + 1 + if (noconj) then + if (nounit) temp = temp*ap(kk) + do i = j + 1,n + temp = temp + ap(k)*x(i) + k = k + 1 + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do i = j + 1,n + temp = temp + conjg(ap(k))*x(i) + k = k + 1 + end do + end if + x(j) = temp + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = jx + if (noconj) then + if (nounit) temp = temp*ap(kk) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + ap(k)*x(ix) + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + conjg(ap(k))*x(ix) + end do + end if + x(jx) = temp + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_wtpmv + + !> ZTPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_wtpsv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('ZTPSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with cone pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk - 1 + do i = j - 1,1,-1 + x(i) = x(i) - temp*ap(k) + k = k - 1 + end do + end if + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + if (x(jx)/=czero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk + 1 + do i = j + 1,n + x(i) = x(i) - temp*ap(k) + k = k + 1 + end do + end if + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + else + ! form x := inv( a**t )*x or x := inv( a**h )*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + if (noconj) then + do i = 1,j - 1 + temp = temp - ap(k)*x(i) + k = k + 1 + end do + if (nounit) temp = temp/ap(kk+j-1) + else + do i = 1,j - 1 + temp = temp - conjg(ap(k))*x(i) + k = k + 1 + end do + if (nounit) temp = temp/conjg(ap(kk+j-1)) + end if + x(j) = temp + kk = kk + j + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + if (noconj) then + do k = kk,kk + j - 2 + temp = temp - ap(k)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/ap(kk+j-1) + else + do k = kk,kk + j - 2 + temp = temp - conjg(ap(k))*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/conjg(ap(kk+j-1)) + end if + x(jx) = temp + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk + if (noconj) then + do i = n,j + 1,-1 + temp = temp - ap(k)*x(i) + k = k - 1 + end do + if (nounit) temp = temp/ap(kk-n+j) + else + do i = n,j + 1,-1 + temp = temp - conjg(ap(k))*x(i) + k = k - 1 + end do + if (nounit) temp = temp/conjg(ap(kk-n+j)) + end if + x(j) = temp + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + if (noconj) then + do k = kk,kk - (n- (j+1)),-1 + temp = temp - ap(k)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/ap(kk-n+j) + else + do k = kk,kk - (n- (j+1)),-1 + temp = temp - conjg(ap(k))*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/conjg(ap(kk-n+j)) + end if + x(jx) = temp + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_wtpsv + + !> ZTRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ) + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + + pure subroutine stdlib_wtrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, noconj, nounit, upper + + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + noconj = stdlib_lsame(transa,'T') + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda ZTRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + + pure subroutine stdlib_wtrmv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda ZTRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_wtrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, noconj, nounit, upper + + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + noconj = stdlib_lsame(transa,'T') + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda ZTRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_wtrsv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(dp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(dp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(dp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(dp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> ZAXPY: constant times a vector plus a vector. + + pure subroutine stdlib_zaxpy(n,za,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: za + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(dp), intent(in) :: zx(*) + complex(dp), intent(inout) :: zy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + if (n<=0) return + if (stdlib_dcabs1(za)==0.0_dp) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + zy(i) = zy(i) + za*zx(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + zy(iy) = zy(iy) + za*zx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_zaxpy + + !> ZCOPY: copies a vector, x, to a vector, y. + + pure subroutine stdlib_zcopy(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(dp), intent(in) :: zx(*) + complex(dp), intent(out) :: zy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + zy(i) = zx(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + zy(iy) = zx(ix) + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_zcopy + + !> ZDOTC: forms the dot product of two complex vectors + !> ZDOTC = X^H * Y + + pure complex(dp) function stdlib_zdotc(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(dp), intent(in) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + complex(dp) :: ztemp + integer(ilp) :: i, ix, iy + ! Intrinsic Functions + intrinsic :: conjg + ztemp = (0.0_dp,0.0_dp) + stdlib_zdotc = (0.0_dp,0.0_dp) + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ztemp = ztemp + conjg(zx(i))*zy(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ztemp = ztemp + conjg(zx(ix))*zy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_zdotc = ztemp + return + end function stdlib_zdotc + + !> ZDOTU: forms the dot product of two complex vectors + !> ZDOTU = X^T * Y + + pure complex(dp) function stdlib_zdotu(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(dp), intent(in) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + complex(dp) :: ztemp + integer(ilp) :: i, ix, iy + ztemp = (0.0_dp,0.0_dp) + stdlib_zdotu = (0.0_dp,0.0_dp) + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ztemp = ztemp + zx(i)*zy(i) + end do + else + ! code for unequal increments or equal increments + ! not equal to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ztemp = ztemp + zx(ix)*zy(iy) + ix = ix + incx + iy = iy + incy + end do + end if + stdlib_zdotu = ztemp + return + end function stdlib_zdotu + + !> Applies a plane rotation, where the cos and sin (c and s) are real + !> and the vectors cx and cy are complex. + !> jack dongarra, linpack, 3/11/78. + + pure subroutine stdlib_zdrot( n, zx, incx, zy, incy, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(dp), intent(in) :: c, s + ! Array Arguments + complex(dp), intent(inout) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(dp) :: ctemp + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 ) then + ! code for both increments equal to 1 + do i = 1, n + ctemp = c*zx( i ) + s*zy( i ) + zy( i ) = c*zy( i ) - s*zx( i ) + zx( i ) = ctemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + ctemp = c*zx( ix ) + s*zy( iy ) + zy( iy ) = c*zy( iy ) - s*zx( ix ) + zx( ix ) = ctemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_zdrot + + !> ZDSCAL: scales a vector by a constant. + + pure subroutine stdlib_zdscal(n,da,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: da + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(inout) :: zx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + ! Intrinsic Functions + intrinsic :: cmplx + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + zx(i) = cmplx(da,0.0_dp,KIND=dp)*zx(i) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + zx(i) = cmplx(da,0.0_dp,KIND=dp)*zx(i) + end do + end if + return + end subroutine stdlib_zdscal + + !> ZGBMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + pure subroutine stdlib_zgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, kl, ku, lda, m, n + character, intent(in) :: trans + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kup1, kx, ky, lenx, leny + logical(lk) :: noconj + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (kl<0) then + info = 4 + else if (ku<0) then + info = 5 + else if (lda< (kl+ku+1)) then + info = 8 + else if (incx==0) then + info = 10 + else if (incy==0) then + info = 13 + end if + if (info/=0) then + call stdlib_xerbla('ZGBMV ',info) + return + end if + ! quick return if possible. + if ((m==0) .or. (n==0) .or.((alpha==czero).and. (beta==cone))) return + noconj = stdlib_lsame(trans,'T') + ! set lenx and leny, the lengths of the vectors x and y, and set + ! up the start points in x and y. + if (stdlib_lsame(trans,'N')) then + lenx = n + leny = m + else + lenx = m + leny = n + end if + if (incx>0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the band part of a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,leny + y(i) = czero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,leny + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + kup1 = ku + 1 + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(i) = y(i) + temp*a(k+i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + k = kup1 - j + do i = max(1,j-ku),min(m,j+kl) + y(iy) = y(iy) + temp*a(k+i,j) + iy = iy + incy + end do + jx = jx + incx + if (j>ku) ky = ky + incy + end do + end if + else + ! form y := alpha*a**t*x + y or y := alpha*a**h*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = czero + k = kup1 - j + if (noconj) then + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(i) + end do + else + do i = max(1,j-ku),min(m,j+kl) + temp = temp + conjg(a(k+i,j))*x(i) + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = czero + ix = kx + k = kup1 - j + if (noconj) then + do i = max(1,j-ku),min(m,j+kl) + temp = temp + a(k+i,j)*x(ix) + ix = ix + incx + end do + else + do i = max(1,j-ku),min(m,j+kl) + temp = temp + conjg(a(k+i,j))*x(ix) + ix = ix + incx + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + if (j>ku) kx = kx + incx + end do + end if + end if + return + end subroutine stdlib_zgbmv + + !> ZGEMM: performs one of the matrix-matrix operations + !> C := alpha*op( A )*op( B ) + beta*C, + !> where op( X ) is one of + !> op( X ) = X or op( X ) = X**T or op( X ) = X**H, + !> alpha and beta are scalars, and A, B and C are matrices, with op( A ) + !> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + pure subroutine stdlib_zgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, m, n + character, intent(in) :: transa, transb + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, j, l, nrowa, nrowb + logical(lk) :: conja, conjb, nota, notb + + + ! set nota and notb as true if a and b respectively are not + ! conjugated or transposed, set conja and conjb as true if a and + ! b respectively are to be transposed but not conjugated and set + ! nrowa and nrowb as the number of rows of a and b respectively. + nota = stdlib_lsame(transa,'N') + notb = stdlib_lsame(transb,'N') + conja = stdlib_lsame(transa,'C') + conjb = stdlib_lsame(transb,'C') + if (nota) then + nrowa = m + else + nrowa = k + end if + if (notb) then + nrowb = k + else + nrowb = n + end if + ! test the input parameters. + info = 0 + if ((.not.nota) .and. (.not.conja) .and.(.not.stdlib_lsame(transa,'T'))) then + info = 1 + else if ((.not.notb) .and. (.not.conjb) .and.(.not.stdlib_lsame(transb,'T'))) & + then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda ZGEMV: performs one of the matrix-vector operations + !> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + !> y := alpha*A**H*x + beta*y, + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + + pure subroutine stdlib_zgemv(trans,m,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + character, intent(in) :: trans + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky, lenx, leny + logical(lk) :: noconj + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 1 + else if (m<0) then + info = 2 + else if (n<0) then + info = 3 + else if (lda0) then + kx = 1 + else + kx = 1 - (lenx-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (leny-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,leny + y(i) = czero + end do + else + do i = 1,leny + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,leny + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,leny + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(trans,'N')) then + ! form y := alpha*a*x + y. + jx = kx + if (incy==1) then + do j = 1,n + temp = alpha*x(jx) + do i = 1,m + y(i) = y(i) + temp*a(i,j) + end do + jx = jx + incx + end do + else + do j = 1,n + temp = alpha*x(jx) + iy = ky + do i = 1,m + y(iy) = y(iy) + temp*a(i,j) + iy = iy + incy + end do + jx = jx + incx + end do + end if + else + ! form y := alpha*a**t*x + y or y := alpha*a**h*x + y. + jy = ky + if (incx==1) then + do j = 1,n + temp = czero + if (noconj) then + do i = 1,m + temp = temp + a(i,j)*x(i) + end do + else + do i = 1,m + temp = temp + conjg(a(i,j))*x(i) + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + else + do j = 1,n + temp = czero + ix = kx + if (noconj) then + do i = 1,m + temp = temp + a(i,j)*x(ix) + ix = ix + incx + end do + else + do i = 1,m + temp = temp + conjg(a(i,j))*x(ix) + ix = ix + incx + end do + end if + y(jy) = y(jy) + alpha*temp + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_zgemv + + !> ZGERC: performs the rank 1 operation + !> A := alpha*x*y**H + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_zgerc(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*conjg(y(jy)) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*conjg(y(jy)) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_zgerc + + !> ZGERU: performs the rank 1 operation + !> A := alpha*x*y**T + A, + !> where alpha is a scalar, x is an m element vector, y is an n element + !> vector and A is an m by n matrix. + + pure subroutine stdlib_zgeru(m,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jy, kx + ! Intrinsic Functions + intrinsic :: max + ! test the input parameters. + info = 0 + if (m<0) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + jy = 1 + else + jy = 1 - (n-1)*incy + end if + if (incx==1) then + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*y(jy) + do i = 1,m + a(i,j) = a(i,j) + x(i)*temp + end do + end if + jy = jy + incy + end do + else + if (incx>0) then + kx = 1 + else + kx = 1 - (m-1)*incx + end if + do j = 1,n + if (y(jy)/=czero) then + temp = alpha*y(jy) + ix = kx + do i = 1,m + a(i,j) = a(i,j) + x(ix)*temp + ix = ix + incx + end do + end if + jy = jy + incy + end do + end if + return + end subroutine stdlib_zgeru + + !> ZHBMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian band matrix, with k super-diagonals. + + pure subroutine stdlib_zhbmv(uplo,n,k,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, k, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kplus1, kx, ky, l + ! Intrinsic Functions + intrinsic :: real,conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (k<0) then + info = 3 + else if (lda< (k+1)) then + info = 6 + else if (incx==0) then + info = 8 + else if (incy==0) then + info = 11 + end if + if (info/=0) then + call stdlib_xerbla('ZHBMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==czero).and. (beta==cone))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array a + ! are accessed sequentially with cone pass through a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when upper triangle of a is stored. + kplus1 = k + 1 + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(i) + end do + y(j) = y(j) + temp1*real(a(kplus1,j),KIND=dp) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + l = kplus1 - j + do i = max(1,j-k),j - 1 + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(a(kplus1,j),KIND=dp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + if (j>k) then + kx = kx + incx + ky = ky + incy + end if + end do + end if + else + ! form y when lower triangle of a is stored. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(a(1,j),KIND=dp) + l = 1 - j + do i = j + 1,min(n,j+k) + y(i) = y(i) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(a(1,j),KIND=dp) + l = 1 - j + ix = jx + iy = jy + do i = j + 1,min(n,j+k) + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(l+i,j) + temp2 = temp2 + conjg(a(l+i,j))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_zhbmv + + !> ZHEMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is an hermitian matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_zhemm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda ZHEMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix. + + pure subroutine stdlib_zhemv(uplo,n,alpha,a,lda,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + if (stdlib_lsame(uplo,'U')) then + ! form y when a is stored in upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + do i = 1,j - 1 + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(i) + end do + y(j) = y(j) + temp1*real(a(j,j),KIND=dp) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + do i = 1,j - 1 + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(a(j,j),KIND=dp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(a(j,j),KIND=dp) + do i = j + 1,n + y(i) = y(i) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(i) + end do + y(j) = y(j) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(a(j,j),KIND=dp) + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*a(i,j) + temp2 = temp2 + conjg(a(i,j))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_zhemv + + !> ZHER: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix. + + pure subroutine stdlib_zher(uplo,n,alpha,x,incx,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (lda ZHER2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an n + !> by n hermitian matrix. + + pure subroutine stdlib_zher2(uplo,n,alpha,x,incx,y,incy,a,lda) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + else if (lda0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + if (stdlib_lsame(uplo,'U')) then + ! form a when a is stored in the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + do i = 1,j - 1 + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + a(j,j) = real(a(j,j),KIND=dp) +real(x(j)*temp1+y(j)*temp2,KIND=dp) + + else + a(j,j) = real(a(j,j),KIND=dp) + end if + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ix = kx + iy = ky + do i = 1,j - 1 + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + a(j,j) = real(a(j,j),KIND=dp) +real(x(jx)*temp1+y(jy)*temp2,KIND=dp) + + else + a(j,j) = real(a(j,j),KIND=dp) + end if + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form a when a is stored in the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + a(j,j) = real(a(j,j),KIND=dp) +real(x(j)*temp1+y(j)*temp2,KIND=dp) + + do i = j + 1,n + a(i,j) = a(i,j) + x(i)*temp1 + y(i)*temp2 + end do + else + a(j,j) = real(a(j,j),KIND=dp) + end if + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + a(j,j) = real(a(j,j),KIND=dp) +real(x(jx)*temp1+y(jy)*temp2,KIND=dp) + + ix = jx + iy = jy + do i = j + 1,n + ix = ix + incx + iy = iy + incy + a(i,j) = a(i,j) + x(ix)*temp1 + y(iy)*temp2 + end do + else + a(j,j) = real(a(j,j),KIND=dp) + end if + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_zher2 + + !> ZHER2K: performs one of the hermitian rank 2k operations + !> C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, + !> or + !> C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, + !> where alpha and beta are scalars with beta real, C is an n by n + !> hermitian matrix and A and B are n by k matrices in the first case + !> and k by n matrices in the second case. + + pure subroutine stdlib_zher2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + real(dp), intent(in) :: beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'C'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZHERK: performs one of the hermitian rank k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n by n hermitian + !> matrix and A is an n by k matrix in the first case and a k by n + !> matrix in the second case. + + pure subroutine stdlib_zherk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,max + ! Local Scalars + complex(dp) :: temp + real(dp) :: rtemp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'C'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZHPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_zhpmv(uplo,n,alpha,ap,x,incx,beta,y,incy) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(in) :: ap(*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 6 + else if (incy==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('ZHPMV ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. ((alpha==czero).and. (beta==cone))) return + ! set up the start points in x and y. + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + ! first form y := beta*y. + if (beta/=cone) then + if (incy==1) then + if (beta==czero) then + do i = 1,n + y(i) = czero + end do + else + do i = 1,n + y(i) = beta*y(i) + end do + end if + else + iy = ky + if (beta==czero) then + do i = 1,n + y(iy) = czero + iy = iy + incy + end do + else + do i = 1,n + y(iy) = beta*y(iy) + iy = iy + incy + end do + end if + end if + end if + if (alpha==czero) return + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form y when ap contains the upper triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + k = kk + do i = 1,j - 1 + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(i) + k = k + 1 + end do + y(j) = y(j) + temp1*real(ap(kk+j-1),KIND=dp) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + ix = kx + iy = ky + do k = kk,kk + j - 2 + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(ix) + ix = ix + incx + iy = iy + incy + end do + y(jy) = y(jy) + temp1*real(ap(kk+j-1),KIND=dp) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + temp1 = alpha*x(j) + temp2 = czero + y(j) = y(j) + temp1*real(ap(kk),KIND=dp) + k = kk + 1 + do i = j + 1,n + y(i) = y(i) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(i) + k = k + 1 + end do + y(j) = y(j) + alpha*temp2 + kk = kk + (n-j+1) + end do + else + jx = kx + jy = ky + do j = 1,n + temp1 = alpha*x(jx) + temp2 = czero + y(jy) = y(jy) + temp1*real(ap(kk),KIND=dp) + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + y(iy) = y(iy) + temp1*ap(k) + temp2 = temp2 + conjg(ap(k))*x(ix) + end do + y(jy) = y(jy) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + (n-j+1) + end do + end if + end if + return + end subroutine stdlib_zhpmv + + !> ZHPR: performs the hermitian rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a real scalar, x is an n element vector and A is an + !> n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_zhpr(uplo,n,alpha,x,incx,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + ! Intrinsic Functions + intrinsic :: real,conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + end if + if (info/=0) then + call stdlib_xerbla('ZHPR ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==real(czero,KIND=dp))) return + ! set the start point in x if the increment is not unity. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = alpha*conjg(x(j)) + k = kk + do i = 1,j - 1 + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) + real(x(j)*temp,KIND=dp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = alpha*conjg(x(jx)) + ix = kx + do k = kk,kk + j - 2 + ap(k) = ap(k) + x(ix)*temp + ix = ix + incx + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) + real(x(jx)*temp,KIND=dp) + + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = alpha*conjg(x(j)) + ap(kk) = real(ap(kk),KIND=dp) + real(temp*x(j),KIND=dp) + k = kk + 1 + do i = j + 1,n + ap(k) = ap(k) + x(i)*temp + k = k + 1 + end do + else + ap(kk) = real(ap(kk),KIND=dp) + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = alpha*conjg(x(jx)) + ap(kk) = real(ap(kk),KIND=dp) + real(temp*x(jx),KIND=dp) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + ap(k) = ap(k) + x(ix)*temp + end do + else + ap(kk) = real(ap(kk),KIND=dp) + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_zhpr + + !> ZHPR2: performs the hermitian rank 2 operation + !> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + !> where alpha is a scalar, x and y are n element vectors and A is an + !> n by n hermitian matrix, supplied in packed form. + + pure subroutine stdlib_zhpr2(uplo,n,alpha,x,incx,y,incy,ap) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + integer(ilp), intent(in) :: incx, incy, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(in) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + ! Intrinsic Functions + intrinsic :: real,conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (n<0) then + info = 2 + else if (incx==0) then + info = 5 + else if (incy==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('ZHPR2 ',info) + return + end if + ! quick return if possible. + if ((n==0) .or. (alpha==czero)) return + ! set up the start points in x and y if the increments are not both + ! unity. + if ((incx/=1) .or. (incy/=1)) then + if (incx>0) then + kx = 1 + else + kx = 1 - (n-1)*incx + end if + if (incy>0) then + ky = 1 + else + ky = 1 - (n-1)*incy + end if + jx = kx + jy = ky + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if (stdlib_lsame(uplo,'U')) then + ! form a when upper triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + k = kk + do i = 1,j - 1 + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) +real(x(j)*temp1+y(j)*temp2,& + KIND=dp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) + end if + kk = kk + j + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ix = kx + iy = ky + do k = kk,kk + j - 2 + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + ix = ix + incx + iy = iy + incy + end do + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) +real(x(jx)*temp1+y(jy)*temp2,& + KIND=dp) + else + ap(kk+j-1) = real(ap(kk+j-1),KIND=dp) + end if + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if ((incx==1) .and. (incy==1)) then + do j = 1,n + if ((x(j)/=czero) .or. (y(j)/=czero)) then + temp1 = alpha*conjg(y(j)) + temp2 = conjg(alpha*x(j)) + ap(kk) = real(ap(kk),KIND=dp) +real(x(j)*temp1+y(j)*temp2,KIND=dp) + + k = kk + 1 + do i = j + 1,n + ap(k) = ap(k) + x(i)*temp1 + y(i)*temp2 + k = k + 1 + end do + else + ap(kk) = real(ap(kk),KIND=dp) + end if + kk = kk + n - j + 1 + end do + else + do j = 1,n + if ((x(jx)/=czero) .or. (y(jy)/=czero)) then + temp1 = alpha*conjg(y(jy)) + temp2 = conjg(alpha*x(jx)) + ap(kk) = real(ap(kk),KIND=dp) +real(x(jx)*temp1+y(jy)*temp2,KIND=dp) + + ix = jx + iy = jy + do k = kk + 1,kk + n - j + ix = ix + incx + iy = iy + incy + ap(k) = ap(k) + x(ix)*temp1 + y(iy)*temp2 + end do + else + ap(kk) = real(ap(kk),KIND=dp) + end if + jx = jx + incx + jy = jy + incy + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_zhpr2 + + !> ! + !> + !> The computation uses the formulas + !> |x| = sqrt( Re(x)**2 + Im(x)**2 ) + !> sgn(x) = x / |x| if x /= 0 + !> = 1 if x = 0 + !> c = |a| / sqrt(|a|**2 + |b|**2) + !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) + !> When a and b are real and r /= 0, the formulas simplify to + !> r = sgn(a)*sqrt(|a|**2 + |b|**2) + !> c = a / r + !> s = b / r + !> the same as in DROTG when |a| > |b|. When |b| >= |a|, the + !> sign of c and s will be different from those computed by DROTG + !> if the signs of a and b are not the same. + + pure subroutine stdlib_zrotg( a, b, c, s ) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Constants + integer, parameter :: wp = kind(1._dp) + ! Scaling Constants + ! Scalar Arguments + real(dp), intent(out) :: c + complex(dp), intent(inout) :: a + complex(dp), intent(in) :: b + complex(dp), intent(out) :: s + ! Local Scalars + real(dp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + complex(dp) :: f, fs, g, gs, r, t + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(dp) :: abssq + ! Statement Function Definitions + abssq( t ) = real( t,KIND=dp)**2 + aimag( t )**2 + ! Executable Statements + f = a + g = b + if( g == czero ) then + c = one + s = czero + r = f + else if( f == czero ) then + c = zero + g1 = max( abs(real(g,KIND=dp)), abs(aimag(g)) ) + if( g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + g2 = abssq( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else + ! use scaled algorithm + u = min( safmax, max( safmin, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if + else + f1 = max( abs(real(f,KIND=dp)), abs(aimag(f)) ) + g1 = max( abs(real(g,KIND=dp)), abs(aimag(g)) ) + if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + f2 = abssq( f ) + g2 = abssq( g ) + h2 = f2 + g2 + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = f2*p + s = conjg( g )*( f*p ) + r = f*( h2*p ) + else + ! use scaled algorithm + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + if( f1*uu < rtmin ) then + ! f is not well-scaled when scaled by g1. + ! use a different scaling for f. + v = min( safmax, max( safmin, f1 ) ) + vv = one / v + w = v * uu + fs = f*vv + f2 = abssq( fs ) + h2 = f2*w**2 + g2 + else + ! otherwise use the same scaling for f and g. + w = one + fs = f*uu + f2 = abssq( fs ) + h2 = f2 + g2 + end if + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = ( f2*p )*w + s = conjg( gs )*( fs*p ) + r = ( fs*( h2*p ) )*u + end if + end if + a = r + return + end subroutine stdlib_zrotg + + !> ZSCAL: scales a vector by a constant. + + pure subroutine stdlib_zscal(n,za,zx,incx) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: za + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(inout) :: zx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + if (n<=0 .or. incx<=0) return + if (incx==1) then + ! code for increment equal to 1 + do i = 1,n + zx(i) = za*zx(i) + end do + else + ! code for increment not equal to 1 + nincx = n*incx + do i = 1,nincx,incx + zx(i) = za*zx(i) + end do + end if + return + end subroutine stdlib_zscal + + !> ZSWAP: interchanges two vectors. + + pure subroutine stdlib_zswap(n,zx,incx,zy,incy) + ! -- reference blas level1 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + ! Array Arguments + complex(dp), intent(inout) :: zx(*), zy(*) + ! ===================================================================== + ! Local Scalars + complex(dp) :: ztemp + integer(ilp) :: i, ix, iy + if (n<=0) return + if (incx==1 .and. incy==1) then + ! code for both increments equal to 1 + do i = 1,n + ztemp = zx(i) + zx(i) = zy(i) + zy(i) = ztemp + end do + else + ! code for unequal increments or equal increments not equal + ! to 1 + ix = 1 + iy = 1 + if (incx<0) ix = (-n+1)*incx + 1 + if (incy<0) iy = (-n+1)*incy + 1 + do i = 1,n + ztemp = zx(ix) + zx(ix) = zy(iy) + zy(iy) = ztemp + ix = ix + incx + iy = iy + incy + end do + end if + return + end subroutine stdlib_zswap + + !> ZSYMM: performs one of the matrix-matrix operations + !> C := alpha*A*B + beta*C, + !> or + !> C := alpha*B*A + beta*C, + !> where alpha and beta are scalars, A is a symmetric matrix and B and + !> C are m by n matrices. + + pure subroutine stdlib_zsymm(side,uplo,m,n,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + character, intent(in) :: side, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: upper + + + ! set nrowa as the number of rows of a. + if (stdlib_lsame(side,'L')) then + nrowa = m + else + nrowa = n + end if + upper = stdlib_lsame(uplo,'U') + ! test the input parameters. + info = 0 + if ((.not.stdlib_lsame(side,'L')) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if (m<0) then + info = 3 + else if (n<0) then + info = 4 + else if (lda ZSYR2K: performs one of the symmetric rank 2k operations + !> C := alpha*A*B**T + alpha*B*A**T + beta*C, + !> or + !> C := alpha*A**T*B + alpha*B**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A and B are n by k matrices in the first case and k by n + !> matrices in the second case. + + pure subroutine stdlib_zsyr2k(uplo,trans,n,k,alpha,a,lda,b,ldb,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldb, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(dp) :: temp1, temp2 + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZSYRK: performs one of the symmetric rank k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are scalars, C is an n by n symmetric matrix + !> and A is an n by k matrix in the first case and a k by n matrix + !> in the second case. + + pure subroutine stdlib_zsyrk(uplo,trans,n,k,alpha,a,lda,beta,c,ldc) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, ldc, n + character, intent(in) :: trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, j, l, nrowa + logical(lk) :: upper + + + ! test the input parameters. + if (stdlib_lsame(trans,'N')) then + nrowa = n + else + nrowa = k + end if + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 1 + else if ((.not.stdlib_lsame(trans,'N')) .and.(.not.stdlib_lsame(trans,'T'))) & + then + info = 2 + else if (n<0) then + info = 3 + else if (k<0) then + info = 4 + else if (lda ZTBMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + pure subroutine stdlib_ztbmv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('ZTBMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := a*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = x(j) + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(kplus1,j) + end if + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + l = kplus1 - j + do i = max(1,j-k),j - 1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*a(kplus1,j) + end if + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + temp = x(j) + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(i) = x(i) + temp*a(l+i,j) + end do + if (nounit) x(j) = x(j)*a(1,j) + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + l = 1 - j + do i = min(n,j+k),j + 1,-1 + x(ix) = x(ix) + temp*a(l+i,j) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*a(1,j) + end if + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + else + ! form x := a**t*x or x := a**h*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = kplus1 - j + if (noconj) then + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(i) + end do + else + if (nounit) temp = temp*conjg(a(kplus1,j)) + do i = j - 1,max(1,j-k),-1 + temp = temp + conjg(a(l+i,j))*x(i) + end do + end if + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + kx = kx - incx + ix = kx + l = kplus1 - j + if (noconj) then + if (nounit) temp = temp*a(kplus1,j) + do i = j - 1,max(1,j-k),-1 + temp = temp + a(l+i,j)*x(ix) + ix = ix - incx + end do + else + if (nounit) temp = temp*conjg(a(kplus1,j)) + do i = j - 1,max(1,j-k),-1 + temp = temp + conjg(a(l+i,j))*x(ix) + ix = ix - incx + end do + end if + x(jx) = temp + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + temp = x(j) + l = 1 - j + if (noconj) then + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(i) + end do + else + if (nounit) temp = temp*conjg(a(1,j)) + do i = j + 1,min(n,j+k) + temp = temp + conjg(a(l+i,j))*x(i) + end do + end if + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + kx = kx + incx + ix = kx + l = 1 - j + if (noconj) then + if (nounit) temp = temp*a(1,j) + do i = j + 1,min(n,j+k) + temp = temp + a(l+i,j)*x(ix) + ix = ix + incx + end do + else + if (nounit) temp = temp*conjg(a(1,j)) + do i = j + 1,min(n,j+k) + temp = temp + conjg(a(l+i,j))*x(ix) + ix = ix + incx + end do + end if + x(jx) = temp + jx = jx + incx + end do + end if + end if + end if + return + end subroutine stdlib_ztbmv + + !> ZTBSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular band matrix, with ( k + 1 ) + !> diagonals. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_ztbsv(uplo,trans,diag,n,k,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kplus1, kx, l + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (k<0) then + info = 5 + else if (lda< (k+1)) then + info = 7 + else if (incx==0) then + info = 9 + end if + if (info/=0) then + call stdlib_xerbla('ZTBSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of a are + ! accessed by sequentially with cone pass through a. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + l = kplus1 - j + if (nounit) x(j) = x(j)/a(kplus1,j) + temp = x(j) + do i = j - 1,max(1,j-k),-1 + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + kx = kx - incx + if (x(jx)/=czero) then + ix = kx + l = kplus1 - j + if (nounit) x(jx) = x(jx)/a(kplus1,j) + temp = x(jx) + do i = j - 1,max(1,j-k),-1 + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix - incx + end do + end if + jx = jx - incx + end do + end if + else + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + l = 1 - j + if (nounit) x(j) = x(j)/a(1,j) + temp = x(j) + do i = j + 1,min(n,j+k) + x(i) = x(i) - temp*a(l+i,j) + end do + end if + end do + else + jx = kx + do j = 1,n + kx = kx + incx + if (x(jx)/=czero) then + ix = kx + l = 1 - j + if (nounit) x(jx) = x(jx)/a(1,j) + temp = x(jx) + do i = j + 1,min(n,j+k) + x(ix) = x(ix) - temp*a(l+i,j) + ix = ix + incx + end do + end if + jx = jx + incx + end do + end if + end if + else + ! form x := inv( a**t )*x or x := inv( a**h )*x. + if (stdlib_lsame(uplo,'U')) then + kplus1 = k + 1 + if (incx==1) then + do j = 1,n + temp = x(j) + l = kplus1 - j + if (noconj) then + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(kplus1,j) + else + do i = max(1,j-k),j - 1 + temp = temp - conjg(a(l+i,j))*x(i) + end do + if (nounit) temp = temp/conjg(a(kplus1,j)) + end if + x(j) = temp + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + l = kplus1 - j + if (noconj) then + do i = max(1,j-k),j - 1 + temp = temp - a(l+i,j)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/a(kplus1,j) + else + do i = max(1,j-k),j - 1 + temp = temp - conjg(a(l+i,j))*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/conjg(a(kplus1,j)) + end if + x(jx) = temp + jx = jx + incx + if (j>k) kx = kx + incx + end do + end if + else + if (incx==1) then + do j = n,1,-1 + temp = x(j) + l = 1 - j + if (noconj) then + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(i) + end do + if (nounit) temp = temp/a(1,j) + else + do i = min(n,j+k),j + 1,-1 + temp = temp - conjg(a(l+i,j))*x(i) + end do + if (nounit) temp = temp/conjg(a(1,j)) + end if + x(j) = temp + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + l = 1 - j + if (noconj) then + do i = min(n,j+k),j + 1,-1 + temp = temp - a(l+i,j)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/a(1,j) + else + do i = min(n,j+k),j + 1,-1 + temp = temp - conjg(a(l+i,j))*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/conjg(a(1,j)) + end if + x(jx) = temp + jx = jx - incx + if ((n-j)>=k) kx = kx - incx + end do + end if + end if + end if + return + end subroutine stdlib_ztbsv + + !> ZTPMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix, supplied in packed form. + + pure subroutine stdlib_ztpmv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('ZTPMV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with cone pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x:= a*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + temp = x(j) + k = kk + do i = 1,j - 1 + x(i) = x(i) + temp*ap(k) + k = k + 1 + end do + if (nounit) x(j) = x(j)*ap(kk+j-1) + end if + kk = kk + j + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + do k = kk,kk + j - 2 + x(ix) = x(ix) + temp*ap(k) + ix = ix + incx + end do + if (nounit) x(jx) = x(jx)*ap(kk+j-1) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + temp = x(j) + k = kk + do i = n,j + 1,-1 + x(i) = x(i) + temp*ap(k) + k = k - 1 + end do + if (nounit) x(j) = x(j)*ap(kk-n+j) + end if + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + if (x(jx)/=czero) then + temp = x(jx) + ix = kx + do k = kk,kk - (n- (j+1)),-1 + x(ix) = x(ix) + temp*ap(k) + ix = ix - incx + end do + if (nounit) x(jx) = x(jx)*ap(kk-n+j) + end if + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + else + ! form x := a**t*x or x := a**h*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk - 1 + if (noconj) then + if (nounit) temp = temp*ap(kk) + do i = j - 1,1,-1 + temp = temp + ap(k)*x(i) + k = k - 1 + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do i = j - 1,1,-1 + temp = temp + conjg(ap(k))*x(i) + k = k - 1 + end do + end if + x(j) = temp + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + temp = x(jx) + ix = jx + if (noconj) then + if (nounit) temp = temp*ap(kk) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + ap(k)*x(ix) + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + temp = temp + conjg(ap(k))*x(ix) + end do + end if + x(jx) = temp + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + 1 + if (noconj) then + if (nounit) temp = temp*ap(kk) + do i = j + 1,n + temp = temp + ap(k)*x(i) + k = k + 1 + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do i = j + 1,n + temp = temp + conjg(ap(k))*x(i) + k = k + 1 + end do + end if + x(j) = temp + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = jx + if (noconj) then + if (nounit) temp = temp*ap(kk) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + ap(k)*x(ix) + end do + else + if (nounit) temp = temp*conjg(ap(kk)) + do k = kk + 1,kk + n - j + ix = ix + incx + temp = temp + conjg(ap(k))*x(ix) + end do + end if + x(jx) = temp + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_ztpmv + + !> ZTPSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix, supplied in packed form. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_ztpsv(uplo,trans,diag,n,ap,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (incx==0) then + info = 7 + end if + if (info/=0) then + call stdlib_xerbla('ZTPSV ',info) + return + end if + ! quick return if possible. + if (n==0) return + noconj = stdlib_lsame(trans,'T') + nounit = stdlib_lsame(diag,'N') + ! set up the start point in x if the increment is not unity. this + ! will be ( n - 1 )*incx too small for descending loops. + if (incx<=0) then + kx = 1 - (n-1)*incx + else if (incx/=1) then + kx = 1 + end if + ! start the operations. in this version the elements of ap are + ! accessed sequentially with cone pass through ap. + if (stdlib_lsame(trans,'N')) then + ! form x := inv( a )*x. + if (stdlib_lsame(uplo,'U')) then + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + if (x(j)/=czero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk - 1 + do i = j - 1,1,-1 + x(i) = x(i) - temp*ap(k) + k = k - 1 + end do + end if + kk = kk - j + end do + else + jx = kx + (n-1)*incx + do j = n,1,-1 + if (x(jx)/=czero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk - 1,kk - j + 1,-1 + ix = ix - incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx - incx + kk = kk - j + end do + end if + else + kk = 1 + if (incx==1) then + do j = 1,n + if (x(j)/=czero) then + if (nounit) x(j) = x(j)/ap(kk) + temp = x(j) + k = kk + 1 + do i = j + 1,n + x(i) = x(i) - temp*ap(k) + k = k + 1 + end do + end if + kk = kk + (n-j+1) + end do + else + jx = kx + do j = 1,n + if (x(jx)/=czero) then + if (nounit) x(jx) = x(jx)/ap(kk) + temp = x(jx) + ix = jx + do k = kk + 1,kk + n - j + ix = ix + incx + x(ix) = x(ix) - temp*ap(k) + end do + end if + jx = jx + incx + kk = kk + (n-j+1) + end do + end if + end if + else + ! form x := inv( a**t )*x or x := inv( a**h )*x. + if (stdlib_lsame(uplo,'U')) then + kk = 1 + if (incx==1) then + do j = 1,n + temp = x(j) + k = kk + if (noconj) then + do i = 1,j - 1 + temp = temp - ap(k)*x(i) + k = k + 1 + end do + if (nounit) temp = temp/ap(kk+j-1) + else + do i = 1,j - 1 + temp = temp - conjg(ap(k))*x(i) + k = k + 1 + end do + if (nounit) temp = temp/conjg(ap(kk+j-1)) + end if + x(j) = temp + kk = kk + j + end do + else + jx = kx + do j = 1,n + temp = x(jx) + ix = kx + if (noconj) then + do k = kk,kk + j - 2 + temp = temp - ap(k)*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/ap(kk+j-1) + else + do k = kk,kk + j - 2 + temp = temp - conjg(ap(k))*x(ix) + ix = ix + incx + end do + if (nounit) temp = temp/conjg(ap(kk+j-1)) + end if + x(jx) = temp + jx = jx + incx + kk = kk + j + end do + end if + else + kk = (n* (n+1))/2 + if (incx==1) then + do j = n,1,-1 + temp = x(j) + k = kk + if (noconj) then + do i = n,j + 1,-1 + temp = temp - ap(k)*x(i) + k = k - 1 + end do + if (nounit) temp = temp/ap(kk-n+j) + else + do i = n,j + 1,-1 + temp = temp - conjg(ap(k))*x(i) + k = k - 1 + end do + if (nounit) temp = temp/conjg(ap(kk-n+j)) + end if + x(j) = temp + kk = kk - (n-j+1) + end do + else + kx = kx + (n-1)*incx + jx = kx + do j = n,1,-1 + temp = x(jx) + ix = kx + if (noconj) then + do k = kk,kk - (n- (j+1)),-1 + temp = temp - ap(k)*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/ap(kk-n+j) + else + do k = kk,kk - (n- (j+1)),-1 + temp = temp - conjg(ap(k))*x(ix) + ix = ix - incx + end do + if (nounit) temp = temp/conjg(ap(kk-n+j)) + end if + x(jx) = temp + jx = jx - incx + kk = kk - (n-j+1) + end do + end if + end if + end if + return + end subroutine stdlib_ztpsv + + !> ZTRMM: performs one of the matrix-matrix operations + !> B := alpha*op( A )*B, or B := alpha*B*op( A ) + !> where alpha is a scalar, B is an m by n matrix, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + + pure subroutine stdlib_ztrmm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, noconj, nounit, upper + + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + noconj = stdlib_lsame(transa,'T') + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda ZTRMV: performs one of the matrix-vector operations + !> x := A*x, or x := A**T*x, or x := A**H*x, + !> where x is an n element vector and A is an n by n unit, or non-unit, + !> upper or lower triangular matrix. + + pure subroutine stdlib_ztrmv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda ZTRSM: solves one of the matrix equations + !> op( A )*X = alpha*B, or X*op( A ) = alpha*B, + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T or op( A ) = A**H. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_ztrsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + ! -- reference blas level3 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: alpha + integer(ilp), intent(in) :: lda, ldb, m, n + character, intent(in) :: diag, side, transa, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: conjg,max + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, j, k, nrowa + logical(lk) :: lside, noconj, nounit, upper + + + ! test the input parameters. + lside = stdlib_lsame(side,'L') + if (lside) then + nrowa = m + else + nrowa = n + end if + noconj = stdlib_lsame(transa,'T') + nounit = stdlib_lsame(diag,'N') + upper = stdlib_lsame(uplo,'U') + info = 0 + if ((.not.lside) .and. (.not.stdlib_lsame(side,'R'))) then + info = 1 + else if ((.not.upper) .and. (.not.stdlib_lsame(uplo,'L'))) then + info = 2 + else if ((.not.stdlib_lsame(transa,'N')) .and.(.not.stdlib_lsame(transa,'T')) .and.(& + .not.stdlib_lsame(transa,'C'))) then + info = 3 + else if ((.not.stdlib_lsame(diag,'U')) .and. (.not.stdlib_lsame(diag,'N'))) & + then + info = 4 + else if (m<0) then + info = 5 + else if (n<0) then + info = 6 + else if (lda ZTRSV: solves one of the systems of equations + !> A*x = b, or A**T*x = b, or A**H*x = b, + !> where b and x are n element vectors and A is an n by n unit, or + !> non-unit, upper or lower triangular matrix. + !> No test for singularity or near-singularity is included in this + !> routine. Such tests must be performed before calling this routine. + + pure subroutine stdlib_ztrsv(uplo,trans,diag,n,a,lda,x,incx) + ! -- reference blas level2 routine -- + ! -- reference blas is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, lda, n + character, intent(in) :: diag, trans, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: temp + integer(ilp) :: i, info, ix, j, jx, kx + logical(lk) :: noconj, nounit + ! Intrinsic Functions + intrinsic :: conjg,max + ! test the input parameters. + info = 0 + if (.not.stdlib_lsame(uplo,'U') .and. .not.stdlib_lsame(uplo,'L')) then + info = 1 + else if (.not.stdlib_lsame(trans,'N') .and. .not.stdlib_lsame(trans,'T') & + .and..not.stdlib_lsame(trans,'C')) then + info = 2 + else if (.not.stdlib_lsame(diag,'U') .and. .not.stdlib_lsame(diag,'N')) then + info = 3 + else if (n<0) then + info = 4 + else if (lda BBCSD: computes the CS decomposition of a unitary matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See CUNCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The unitary matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + interface bbcsd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, & + u1, ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d,& + b22e, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,lrwork,m,p,q + real(sp), intent(out) :: b11d(*),b11e(*),b12d(*),b12e(*),b21d(*),b21e(*),b22d(& + *),b22e(*),rwork(*) + real(sp), intent(inout) :: phi(*),theta(*) + complex(sp), intent(inout) :: u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),v2t(ldv2t,*) + + end subroutine cbbcsd +#else + module procedure stdlib_cbbcsd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, & + u1, ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d,& + b22e, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,lwork,m,p,q + real(dp), intent(out) :: b11d(*),b11e(*),b12d(*),b12e(*),b21d(*),b21e(*),b22d(& + *),b22e(*),work(*) + real(dp), intent(inout) :: phi(*),theta(*),u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),& + v2t(ldv2t,*) + end subroutine dbbcsd +#else + module procedure stdlib_dbbcsd +#endif +#:if WITH_QP + module procedure stdlib_qbbcsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, & + u1, ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d,& + b22e, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,lwork,m,p,q + real(sp), intent(out) :: b11d(*),b11e(*),b12d(*),b12e(*),b21d(*),b21e(*),b22d(& + *),b22e(*),work(*) + real(sp), intent(inout) :: phi(*),theta(*),u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),& + v2t(ldv2t,*) + end subroutine sbbcsd +#else + module procedure stdlib_sbbcsd +#endif +#:if WITH_QP + module procedure stdlib_wbbcsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, & + u1, ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d,& + b22e, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,lrwork,m,p,q + real(dp), intent(out) :: b11d(*),b11e(*),b12d(*),b12e(*),b21d(*),b21e(*),b22d(& + *),b22e(*),rwork(*) + real(dp), intent(inout) :: phi(*),theta(*) + complex(dp), intent(inout) :: u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),v2t(ldv2t,*) + + end subroutine zbbcsd +#else + module procedure stdlib_zbbcsd +#endif + end interface bbcsd + + + + !> BDSDC: computes the singular value decomposition (SVD) of a real + !> N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + !> using a divide and conquer method, where S is a diagonal matrix + !> with non-negative diagonal elements (the singular values of B), and + !> U and VT are orthogonal matrices of left and right singular vectors, + !> respectively. BDSDC can be used to compute all singular values, + !> and optionally, singular vectors or singular vectors in compact form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See DLASD3 for details. + !> The code currently calls DLASDQ if singular values only are desired. + !> However, it can be slightly modified to compute singular values + !> using the divide and conquer method. + interface bdsdc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dbdsdc( uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,uplo + integer(ilp), intent(out) :: info,iq(*),iwork(*) + integer(ilp), intent(in) :: ldu,ldvt,n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: q(*),u(ldu,*),vt(ldvt,*),work(*) + end subroutine dbdsdc +#else + module procedure stdlib_dbdsdc +#endif +#:if WITH_QP + module procedure stdlib_qbdsdc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sbdsdc( uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,uplo + integer(ilp), intent(out) :: info,iq(*),iwork(*) + integer(ilp), intent(in) :: ldu,ldvt,n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: q(*),u(ldu,*),vt(ldvt,*),work(*) + end subroutine sbdsdc +#else + module procedure stdlib_sbdsdc +#endif + end interface bdsdc + + + + !> BDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**H + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**H*VT instead of + !> P**H, for given complex input matrices U and VT. When U and VT are + !> the unitary matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by CGEBRD, then + !> A = (U*Q) * S * (P**H*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**H*C + !> for a given complex input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + interface bdsqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,ldu,ldvt,n,ncc,ncvt,nru + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: c(ldc,*),u(ldu,*),vt(ldvt,*) + end subroutine cbdsqr +#else + module procedure stdlib_cbdsqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, & + work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,ldu,ldvt,n,ncc,ncvt,nru + real(dp), intent(inout) :: c(ldc,*),d(*),e(*),u(ldu,*),vt(ldvt,*) + real(dp), intent(out) :: work(*) + end subroutine dbdsqr +#else + module procedure stdlib_dbdsqr +#endif +#:if WITH_QP + module procedure stdlib_qbdsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, & + work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,ldu,ldvt,n,ncc,ncvt,nru + real(sp), intent(inout) :: c(ldc,*),d(*),e(*),u(ldu,*),vt(ldvt,*) + real(sp), intent(out) :: work(*) + end subroutine sbdsqr +#else + module procedure stdlib_sbdsqr +#endif +#:if WITH_QP + module procedure stdlib_wbdsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,ldu,ldvt,n,ncc,ncvt,nru + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: c(ldc,*),u(ldu,*),vt(ldvt,*) + end subroutine zbdsqr +#else + module procedure stdlib_zbdsqr +#endif + end interface bdsqr + + + + !> DISNA: computes the reciprocal condition numbers for the eigenvectors + !> of a real symmetric or complex Hermitian matrix or for the left or + !> right singular vectors of a general m-by-n matrix. The reciprocal + !> condition number is the 'gap' between the corresponding eigenvalue or + !> singular value and the nearest other one. + !> The bound on the error, measured by angle in radians, in the I-th + !> computed vector is given by + !> DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) + !> where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed + !> to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of + !> the error bound. + !> DISNA may also be used to compute error bounds for eigenvectors of + !> the generalized symmetric definite eigenproblem. + interface disna +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ddisna( job, m, n, d, sep, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: m,n + real(dp), intent(in) :: d(*) + real(dp), intent(out) :: sep(*) + end subroutine ddisna +#else + module procedure stdlib_ddisna +#endif +#:if WITH_QP + module procedure stdlib_qdisna +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sdisna( job, m, n, d, sep, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: m,n + real(sp), intent(in) :: d(*) + real(sp), intent(out) :: sep(*) + end subroutine sdisna +#else + module procedure stdlib_sdisna +#endif + end interface disna + + + + !> GBBRD: reduces a complex general m-by-n band matrix A to real upper + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> The routine computes B, and optionally forms Q or P**H, or computes + !> Q**H*C for a given matrix C. + interface gbbrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, & + c, ldc, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldc,ldpt,ldq,m,n,ncc + real(sp), intent(out) :: d(*),e(*),rwork(*) + complex(sp), intent(inout) :: ab(ldab,*),c(ldc,*) + complex(sp), intent(out) :: pt(ldpt,*),q(ldq,*),work(*) + end subroutine cgbbrd +#else + module procedure stdlib_cgbbrd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, & + c, ldc, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldc,ldpt,ldq,m,n,ncc + real(dp), intent(inout) :: ab(ldab,*),c(ldc,*) + real(dp), intent(out) :: d(*),e(*),pt(ldpt,*),q(ldq,*),work(*) + end subroutine dgbbrd +#else + module procedure stdlib_dgbbrd +#endif +#:if WITH_QP + module procedure stdlib_qgbbrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, & + c, ldc, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldc,ldpt,ldq,m,n,ncc + real(sp), intent(inout) :: ab(ldab,*),c(ldc,*) + real(sp), intent(out) :: d(*),e(*),pt(ldpt,*),q(ldq,*),work(*) + end subroutine sgbbrd +#else + module procedure stdlib_sgbbrd +#endif +#:if WITH_QP + module procedure stdlib_wgbbrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, & + c, ldc, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldc,ldpt,ldq,m,n,ncc + real(dp), intent(out) :: d(*),e(*),rwork(*) + complex(dp), intent(inout) :: ab(ldab,*),c(ldc,*) + complex(dp), intent(out) :: pt(ldpt,*),q(ldq,*),work(*) + end subroutine zgbbrd +#else + module procedure stdlib_zgbbrd +#endif + end interface gbbrd + + + + !> GBCON: estimates the reciprocal of the condition number of a complex + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by CGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + interface gbcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, rwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(out) :: work(*) + end subroutine cgbcon +#else + module procedure stdlib_cgbcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kl,ku,ldab,n,ipiv(*) + real(dp), intent(in) :: anorm,ab(ldab,*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dgbcon +#else + module procedure stdlib_dgbcon +#endif +#:if WITH_QP + module procedure stdlib_qgbcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kl,ku,ldab,n,ipiv(*) + real(sp), intent(in) :: anorm,ab(ldab,*) + real(sp), intent(out) :: rcond,work(*) + end subroutine sgbcon +#else + module procedure stdlib_sgbcon +#endif +#:if WITH_QP + module procedure stdlib_wgbcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, rwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(out) :: work(*) + end subroutine zgbcon +#else + module procedure stdlib_zgbcon +#endif + end interface gbcon + + + + !> GBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + interface gbequ +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(sp), intent(in) :: ab(ldab,*) + end subroutine cgbequ +#else + module procedure stdlib_cgbequ +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(dp), intent(in) :: ab(ldab,*) + end subroutine dgbequ +#else + module procedure stdlib_dgbequ +#endif +#:if WITH_QP + module procedure stdlib_qgbequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(sp), intent(in) :: ab(ldab,*) + end subroutine sgbequ +#else + module procedure stdlib_sgbequ +#endif +#:if WITH_QP + module procedure stdlib_wgbequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(dp), intent(in) :: ab(ldab,*) + end subroutine zgbequ +#else + module procedure stdlib_zgbequ +#endif + end interface gbequ + + + + !> GBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from CGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + interface gbequb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(sp), intent(in) :: ab(ldab,*) + end subroutine cgbequb +#else + module procedure stdlib_cgbequb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(dp), intent(in) :: ab(ldab,*) + end subroutine dgbequb +#else + module procedure stdlib_dgbequb +#endif +#:if WITH_QP + module procedure stdlib_qgbequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(sp), intent(in) :: ab(ldab,*) + end subroutine sgbequb +#else + module procedure stdlib_sgbequb +#endif +#:if WITH_QP + module procedure stdlib_wgbequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(dp), intent(in) :: ab(ldab,*) + end subroutine zgbequb +#else + module procedure stdlib_zgbequb +#endif + end interface gbequb + + + + !> GBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + interface gbrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, & + x, ldx, ferr, berr, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldafb,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cgbrfs +#else + module procedure stdlib_cgbrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, & + x, ldx, ferr, berr, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kl,ku,ldab,ldafb,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dgbrfs +#else + module procedure stdlib_dgbrfs +#endif +#:if WITH_QP + module procedure stdlib_qgbrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, & + x, ldx, ferr, berr, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kl,ku,ldab,ldafb,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine sgbrfs +#else + module procedure stdlib_sgbrfs +#endif +#:if WITH_QP + module procedure stdlib_wgbrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, & + x, ldx, ferr, berr, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldafb,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zgbrfs +#else + module procedure stdlib_zgbrfs +#endif + end interface gbrfs + + + + !> GBSV: computes the solution to a complex system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + interface gbsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs + complex(sp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine cgbsv +#else + module procedure stdlib_cgbsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs + real(dp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine dgbsv +#else + module procedure stdlib_dgbsv +#endif +#:if WITH_QP + module procedure stdlib_qgbsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs + real(sp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine sgbsv +#else + module procedure stdlib_sgbsv +#endif +#:if WITH_QP + module procedure stdlib_wgbsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs + complex(dp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine zgbsv +#else + module procedure stdlib_zgbsv +#endif + end interface gbsv + + + + !> GBTRF: computes an LU factorization of a complex m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface gbtrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,m,n + complex(sp), intent(inout) :: ab(ldab,*) + end subroutine cgbtrf +#else + module procedure stdlib_cgbtrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(inout) :: ab(ldab,*) + end subroutine dgbtrf +#else + module procedure stdlib_dgbtrf +#endif +#:if WITH_QP + module procedure stdlib_qgbtrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(inout) :: ab(ldab,*) + end subroutine sgbtrf +#else + module procedure stdlib_sgbtrf +#endif +#:if WITH_QP + module procedure stdlib_wgbtrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: kl,ku,ldab,m,n + complex(dp), intent(inout) :: ab(ldab,*) + end subroutine zgbtrf +#else + module procedure stdlib_zgbtrf +#endif + end interface gbtrf + + + + !> GBTRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general band matrix A using the LU factorization computed + !> by CGBTRF. + interface gbtrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine cgbtrs +#else + module procedure stdlib_cgbtrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs,ipiv(*) + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dgbtrs +#else + module procedure stdlib_dgbtrs +#endif +#:if WITH_QP + module procedure stdlib_qgbtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs,ipiv(*) + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine sgbtrs +#else + module procedure stdlib_sgbtrs +#endif +#:if WITH_QP + module procedure stdlib_wgbtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,ldab,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zgbtrs +#else + module procedure stdlib_zgbtrs +#endif + end interface gbtrs + + + + !> GEBAK: forms the right or left eigenvectors of a complex general + !> matrix by backward transformation on the computed eigenvectors of the + !> balanced matrix output by CGEBAL. + interface gebak +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: scale(*) + complex(sp), intent(inout) :: v(ldv,*) + end subroutine cgebak +#else + module procedure stdlib_cgebak +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: scale(*) + real(dp), intent(inout) :: v(ldv,*) + end subroutine dgebak +#else + module procedure stdlib_dgebak +#endif +#:if WITH_QP + module procedure stdlib_qgebak +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: v(ldv,*) + real(sp), intent(in) :: scale(*) + end subroutine sgebak +#else + module procedure stdlib_sgebak +#endif +#:if WITH_QP + module procedure stdlib_wgebak +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: scale(*) + complex(dp), intent(inout) :: v(ldv,*) + end subroutine zgebak +#else + module procedure stdlib_zgebak +#endif + end interface gebak + + + + !> GEBAL: balances a general complex matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + interface gebal +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgebal( job, n, a, lda, ilo, ihi, scale, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: scale(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine cgebal +#else + module procedure stdlib_cgebal +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgebal( job, n, a, lda, ilo, ihi, scale, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: scale(*) + end subroutine dgebal +#else + module procedure stdlib_dgebal +#endif +#:if WITH_QP + module procedure stdlib_qgebal +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgebal( job, n, a, lda, ilo, ihi, scale, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: scale(*) + end subroutine sgebal +#else + module procedure stdlib_sgebal +#endif +#:if WITH_QP + module procedure stdlib_wgebal +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgebal( job, n, a, lda, ilo, ihi, scale, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: scale(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zgebal +#else + module procedure stdlib_zgebal +#endif + end interface gebal + + + + !> GEBRD: reduces a general complex M-by-N matrix A to upper or lower + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + interface gebrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(out) :: d(*),e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: taup(*),tauq(*),work(*) + end subroutine cgebrd +#else + module procedure stdlib_cgebrd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*),e(*),taup(*),tauq(*),work(*) + end subroutine dgebrd +#else + module procedure stdlib_dgebrd +#endif +#:if WITH_QP + module procedure stdlib_qgebrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*),e(*),taup(*),tauq(*),work(*) + end subroutine sgebrd +#else + module procedure stdlib_sgebrd +#endif +#:if WITH_QP + module procedure stdlib_wgebrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(out) :: d(*),e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: taup(*),tauq(*),work(*) + end subroutine zgebrd +#else + module procedure stdlib_zgebrd +#endif + end interface gebrd + + + + !> GECON: estimates the reciprocal of the condition number of a general + !> complex matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by CGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + interface gecon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgecon( norm, n, a, lda, anorm, rcond, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine cgecon +#else + module procedure stdlib_cgecon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgecon( norm, n, a, lda, anorm, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,work(*) + real(dp), intent(inout) :: a(lda,*) + end subroutine dgecon +#else + module procedure stdlib_dgecon +#endif +#:if WITH_QP + module procedure stdlib_qgecon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgecon( norm, n, a, lda, anorm, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,work(*) + real(sp), intent(inout) :: a(lda,*) + end subroutine sgecon +#else + module procedure stdlib_sgecon +#endif +#:if WITH_QP + module procedure stdlib_wgecon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgecon( norm, n, a, lda, anorm, rcond, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zgecon +#else + module procedure stdlib_zgecon +#endif + end interface gecon + + + + !> GEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + interface geequ +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(sp), intent(in) :: a(lda,*) + end subroutine cgeequ +#else + module procedure stdlib_cgeequ +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(dp), intent(in) :: a(lda,*) + end subroutine dgeequ +#else + module procedure stdlib_dgeequ +#endif +#:if WITH_QP + module procedure stdlib_qgeequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(sp), intent(in) :: a(lda,*) + end subroutine sgeequ +#else + module procedure stdlib_sgeequ +#endif +#:if WITH_QP + module procedure stdlib_wgeequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(dp), intent(in) :: a(lda,*) + end subroutine zgeequ +#else + module procedure stdlib_zgeequ +#endif + end interface geequ + + + + !> GEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from CGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + interface geequb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(sp), intent(in) :: a(lda,*) + end subroutine cgeequb +#else + module procedure stdlib_cgeequb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(dp), intent(in) :: a(lda,*) + end subroutine dgeequb +#else + module procedure stdlib_dgeequb +#endif +#:if WITH_QP + module procedure stdlib_qgeequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + real(sp), intent(in) :: a(lda,*) + end subroutine sgeequb +#else + module procedure stdlib_sgeequb +#endif +#:if WITH_QP + module procedure stdlib_wgeequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(out) :: amax,colcnd,rowcnd,c(*),r(*) + complex(dp), intent(in) :: a(lda,*) + end subroutine zgeequb +#else + module procedure stdlib_zgeequb +#endif + end interface geequb + + + + !> GEES: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A complex matrix is in Schur form if it is upper triangular. + interface gees +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgees( jobvs, sort, select, n, a, lda, sdim, w, vs,ldvs, work, lwork, & + rwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_select_c + implicit none(type,external) + character, intent(in) :: jobvs,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldvs,lwork,n + logical(lk), intent(out) :: bwork(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: vs(ldvs,*),w(*),work(*) + procedure(stdlib_select_c) :: select + end subroutine cgees +#else + module procedure stdlib_cgees +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgees( jobvs, sort, select, n, a, lda, sdim, wr, wi,vs, ldvs, work, & + lwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_select_d + implicit none(type,external) + character, intent(in) :: jobvs,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldvs,lwork,n + logical(lk), intent(out) :: bwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: vs(ldvs,*),wi(*),work(*),wr(*) + procedure(stdlib_select_d) :: select + end subroutine dgees +#else + module procedure stdlib_dgees +#endif +#:if WITH_QP + module procedure stdlib_qgees +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgees( jobvs, sort, select, n, a, lda, sdim, wr, wi,vs, ldvs, work, & + lwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_select_s + implicit none(type,external) + character, intent(in) :: jobvs,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldvs,lwork,n + logical(lk), intent(out) :: bwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: vs(ldvs,*),wi(*),work(*),wr(*) + procedure(stdlib_select_s) :: select + end subroutine sgees +#else + module procedure stdlib_sgees +#endif +#:if WITH_QP + module procedure stdlib_wgees +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgees( jobvs, sort, select, n, a, lda, sdim, w, vs,ldvs, work, lwork, & + rwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_select_z + implicit none(type,external) + character, intent(in) :: jobvs,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldvs,lwork,n + logical(lk), intent(out) :: bwork(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: vs(ldvs,*),w(*),work(*) + procedure(stdlib_select_z) :: select + end subroutine zgees +#else + module procedure stdlib_zgees +#endif + end interface gees + + + + !> GEEV: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + interface geev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgeev( jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr,work, lwork, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldvl,ldvr,lwork,n + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: vl(ldvl,*),vr(ldvr,*),w(*),work(*) + end subroutine cgeev +#else + module procedure stdlib_cgeev +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgeev( jobvl, jobvr, n, a, lda, wr, wi, vl, ldvl, vr,ldvr, work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldvl,ldvr,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: vl(ldvl,*),vr(ldvr,*),wi(*),work(*),wr(*) + end subroutine dgeev +#else + module procedure stdlib_dgeev +#endif +#:if WITH_QP + module procedure stdlib_qgeev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgeev( jobvl, jobvr, n, a, lda, wr, wi, vl, ldvl, vr,ldvr, work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldvl,ldvr,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: vl(ldvl,*),vr(ldvr,*),wi(*),work(*),wr(*) + end subroutine sgeev +#else + module procedure stdlib_sgeev +#endif +#:if WITH_QP + module procedure stdlib_wgeev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgeev( jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr,work, lwork, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldvl,ldvr,lwork,n + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: vl(ldvl,*),vr(ldvr,*),w(*),work(*) + end subroutine zgeev +#else + module procedure stdlib_zgeev +#endif + end interface geev + + + + !> GEHRD: reduces a complex general matrix A to upper Hessenberg form H by + !> an unitary similarity transformation: Q**H * A * Q = H . + interface gehrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgehrd +#else + module procedure stdlib_cgehrd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgehrd +#else + module procedure stdlib_dgehrd +#endif +#:if WITH_QP + module procedure stdlib_qgehrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgehrd +#else + module procedure stdlib_sgehrd +#endif +#:if WITH_QP + module procedure stdlib_wgehrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgehrd +#else + module procedure stdlib_zgehrd +#endif + end interface gehrd + + + + !> GEJSV: computes the singular value decomposition (SVD) of a complex M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^*, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and + !> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + interface gejsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, & + ldu, v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldv,lwork,lrwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*),v(ldv,*),cwork(lwork) + real(sp), intent(out) :: sva(n),rwork(lrwork) + character, intent(in) :: joba,jobp,jobr,jobt,jobu,jobv + end subroutine cgejsv +#else + module procedure stdlib_cgejsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, & + ldu, v, ldv,work, lwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldv,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: sva(n),u(ldu,*),v(ldv,*),work(lwork) + character, intent(in) :: joba,jobp,jobr,jobt,jobu,jobv + end subroutine dgejsv +#else + module procedure stdlib_dgejsv +#endif +#:if WITH_QP + module procedure stdlib_qgejsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, & + ldu, v, ldv,work, lwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldv,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: sva(n),u(ldu,*),v(ldv,*),work(lwork) + character, intent(in) :: joba,jobp,jobr,jobt,jobu,jobv + end subroutine sgejsv +#else + module procedure stdlib_sgejsv +#endif +#:if WITH_QP + module procedure stdlib_wgejsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, & + ldu, v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldv,lwork,lrwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*),v(ldv,*),cwork(lwork) + real(dp), intent(out) :: sva(n),rwork(lrwork) + character, intent(in) :: joba,jobp,jobr,jobt,jobu,jobv + end subroutine zgejsv +#else + module procedure stdlib_zgejsv +#endif + end interface gejsv + + + + !> GELQ: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + interface gelq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgelq( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(*),work(*) + end subroutine cgelq +#else + module procedure stdlib_cgelq +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgelq( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(*),work(*) + end subroutine dgelq +#else + module procedure stdlib_dgelq +#endif +#:if WITH_QP + module procedure stdlib_qgelq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgelq( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(*),work(*) + end subroutine sgelq +#else + module procedure stdlib_sgelq +#endif +#:if WITH_QP + module procedure stdlib_wgelq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgelq( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(*),work(*) + end subroutine zgelq +#else + module procedure stdlib_zgelq +#endif + end interface gelq + + + + !> GELQF: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + interface gelqf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgelqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgelqf +#else + module procedure stdlib_cgelqf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgelqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgelqf +#else + module procedure stdlib_dgelqf +#endif +#:if WITH_QP + module procedure stdlib_qgelqf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgelqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgelqf +#else + module procedure stdlib_sgelqf +#endif +#:if WITH_QP + module procedure stdlib_wgelqf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgelqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgelqf +#else + module procedure stdlib_zgelqf +#endif + end interface gelqf + + + + !> GELQT: computes a blocked LQ factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + interface gelqt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgelqt( m, n, mb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,mb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*),work(*) + end subroutine cgelqt +#else + module procedure stdlib_cgelqt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgelqt( m, n, mb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,mb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*),work(*) + end subroutine dgelqt +#else + module procedure stdlib_dgelqt +#endif +#:if WITH_QP + module procedure stdlib_qgelqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgelqt( m, n, mb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,mb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*),work(*) + end subroutine sgelqt +#else + module procedure stdlib_sgelqt +#endif +#:if WITH_QP + module procedure stdlib_wgelqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgelqt( m, n, mb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,mb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*),work(*) + end subroutine zgelqt +#else + module procedure stdlib_zgelqt +#endif + end interface gelqt + + + + !> GELQT3: recursively computes a LQ factorization of a complex M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + interface gelqt3 +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine cgelqt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*) + end subroutine cgelqt3 +#else + module procedure stdlib_cgelqt3 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine dgelqt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*) + end subroutine dgelqt3 +#else + module procedure stdlib_dgelqt3 +#endif +#:if WITH_QP + module procedure stdlib_qgelqt3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine sgelqt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*) + end subroutine sgelqt3 +#else + module procedure stdlib_sgelqt3 +#endif +#:if WITH_QP + module procedure stdlib_wgelqt3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine zgelqt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*) + end subroutine zgelqt3 +#else + module procedure stdlib_zgelqt3 +#endif + end interface gelqt3 + + + + !> GELS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, or its conjugate-transpose, using a QR + !> or LQ factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an underdetermined system A**H * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**H * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + interface gels +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine cgels +#else + module procedure stdlib_cgels +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dgels +#else + module procedure stdlib_dgels +#endif +#:if WITH_QP + module procedure stdlib_qgels +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine sgels +#else + module procedure stdlib_sgels +#endif +#:if WITH_QP + module procedure stdlib_wgels +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zgels +#else + module procedure stdlib_zgels +#endif + end interface gels + + + + !> GELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface gelsd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(in) :: rcond + real(sp), intent(out) :: rwork(*),s(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine cgelsd +#else + module procedure stdlib_cgelsd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(in) :: rcond + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: s(*),work(*) + end subroutine dgelsd +#else + module procedure stdlib_dgelsd +#endif +#:if WITH_QP + module procedure stdlib_qgelsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond,rank, work, lwork, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(in) :: rcond + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: s(*),work(*) + end subroutine sgelsd +#else + module procedure stdlib_sgelsd +#endif +#:if WITH_QP + module procedure stdlib_wgelsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(in) :: rcond + real(dp), intent(out) :: rwork(*),s(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zgelsd +#else + module procedure stdlib_zgelsd +#endif + end interface gelsd + + + + !> GELSS: computes the minimum norm solution to a complex linear + !> least squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + interface gelss +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(in) :: rcond + real(sp), intent(out) :: rwork(*),s(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine cgelss +#else + module procedure stdlib_cgelss +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(in) :: rcond + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: s(*),work(*) + end subroutine dgelss +#else + module procedure stdlib_dgelss +#endif +#:if WITH_QP + module procedure stdlib_qgelss +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(in) :: rcond + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: s(*),work(*) + end subroutine sgelss +#else + module procedure stdlib_sgelss +#endif +#:if WITH_QP + module procedure stdlib_wgelss +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(in) :: rcond + real(dp), intent(out) :: rwork(*),s(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zgelss +#else + module procedure stdlib_zgelss +#endif + end interface gelss + + + + !> GELSY: computes the minimum-norm solution to a complex linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by unitary transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**H [ inv(T11)*Q1**H*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + interface gelsy +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(in) :: rcond + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine cgelsy +#else + module procedure stdlib_cgelsy +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(in) :: rcond + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dgelsy +#else + module procedure stdlib_dgelsy +#endif +#:if WITH_QP + module procedure stdlib_qgelsy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(in) :: rcond + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine sgelsy +#else + module procedure stdlib_sgelsy +#endif +#:if WITH_QP + module procedure stdlib_wgelsy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,rank + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(in) :: rcond + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zgelsy +#else + module procedure stdlib_zgelsy +#endif + end interface gelsy + + + + !> GEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by short wide + !> LQ factorization (CGELQ) + interface gemlq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + complex(sp), intent(in) :: a(lda,*),t(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine cgemlq +#else + module procedure stdlib_cgemlq +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + real(dp), intent(in) :: a(lda,*),t(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dgemlq +#else + module procedure stdlib_dgemlq +#endif +#:if WITH_QP + module procedure stdlib_qgemlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + real(sp), intent(in) :: a(lda,*),t(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine sgemlq +#else + module procedure stdlib_sgemlq +#endif +#:if WITH_QP + module procedure stdlib_wgemlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + complex(dp), intent(in) :: a(lda,*),t(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zgemlq +#else + module procedure stdlib_zgemlq +#endif + end interface gemlq + + + + !> GEMLQT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex unitary matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by CGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + interface gemlqt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,mb,ldt + complex(sp), intent(in) :: v(ldv,*),t(ldt,*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine cgemlqt +#else + module procedure stdlib_cgemlqt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,mb,ldt + real(dp), intent(in) :: v(ldv,*),t(ldt,*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dgemlqt +#else + module procedure stdlib_dgemlqt +#endif +#:if WITH_QP + module procedure stdlib_qgemlqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,mb,ldt + real(sp), intent(in) :: v(ldv,*),t(ldt,*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine sgemlqt +#else + module procedure stdlib_sgemlqt +#endif +#:if WITH_QP + module procedure stdlib_wgemlqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,mb,ldt + complex(dp), intent(in) :: v(ldv,*),t(ldt,*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zgemlqt +#else + module procedure stdlib_zgemlqt +#endif + end interface gemlqt + + + + !> GEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (CGEQR) + interface gemqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + complex(sp), intent(in) :: a(lda,*),t(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine cgemqr +#else + module procedure stdlib_cgemqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + real(dp), intent(in) :: a(lda,*),t(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dgemqr +#else + module procedure stdlib_dgemqr +#endif +#:if WITH_QP + module procedure stdlib_qgemqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + real(sp), intent(in) :: a(lda,*),t(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine sgemqr +#else + module procedure stdlib_sgemqr +#endif +#:if WITH_QP + module procedure stdlib_wgemqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,tsize,lwork,ldc + complex(dp), intent(in) :: a(lda,*),t(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zgemqr +#else + module procedure stdlib_zgemqr +#endif + end interface gemqr + + + + !> GEMQRT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by CGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + interface gemqrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,nb,ldt + complex(sp), intent(in) :: v(ldv,*),t(ldt,*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine cgemqrt +#else + module procedure stdlib_cgemqrt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,nb,ldt + real(dp), intent(in) :: v(ldv,*),t(ldt,*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dgemqrt +#else + module procedure stdlib_dgemqrt +#endif +#:if WITH_QP + module procedure stdlib_qgemqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,nb,ldt + real(sp), intent(in) :: v(ldv,*),t(ldt,*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine sgemqrt +#else + module procedure stdlib_sgemqrt +#endif +#:if WITH_QP + module procedure stdlib_wgemqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,ldc,m,n,nb,ldt + complex(dp), intent(in) :: v(ldv,*),t(ldt,*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zgemqrt +#else + module procedure stdlib_zgemqrt +#endif + end interface gemqrt + + + + !> GEQLF: computes a QL factorization of a complex M-by-N matrix A: + !> A = Q * L. + interface geqlf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeqlf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgeqlf +#else + module procedure stdlib_cgeqlf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeqlf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgeqlf +#else + module procedure stdlib_dgeqlf +#endif +#:if WITH_QP + module procedure stdlib_qgeqlf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeqlf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgeqlf +#else + module procedure stdlib_sgeqlf +#endif +#:if WITH_QP + module procedure stdlib_wgeqlf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeqlf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgeqlf +#else + module procedure stdlib_zgeqlf +#endif + end interface geqlf + + + + !> GEQR: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + interface geqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(*),work(*) + end subroutine cgeqr +#else + module procedure stdlib_cgeqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(*),work(*) + end subroutine dgeqr +#else + module procedure stdlib_dgeqr +#endif +#:if WITH_QP + module procedure stdlib_qgeqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(*),work(*) + end subroutine sgeqr +#else + module procedure stdlib_sgeqr +#endif +#:if WITH_QP + module procedure stdlib_wgeqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,tsize,lwork + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(*),work(*) + end subroutine zgeqr +#else + module procedure stdlib_zgeqr +#endif + end interface geqr + + + + !> GEQR2P: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + interface geqr2p +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgeqr2p( m, n, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgeqr2p +#else + module procedure stdlib_cgeqr2p +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgeqr2p( m, n, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgeqr2p +#else + module procedure stdlib_dgeqr2p +#endif +#:if WITH_QP + module procedure stdlib_qgeqr2p +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgeqr2p( m, n, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgeqr2p +#else + module procedure stdlib_sgeqr2p +#endif +#:if WITH_QP + module procedure stdlib_wgeqr2p +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgeqr2p( m, n, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgeqr2p +#else + module procedure stdlib_zgeqr2p +#endif + end interface geqr2p + + + + !> GEQRF: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + interface geqrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeqrf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgeqrf +#else + module procedure stdlib_cgeqrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeqrf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgeqrf +#else + module procedure stdlib_dgeqrf +#endif +#:if WITH_QP + module procedure stdlib_qgeqrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeqrf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgeqrf +#else + module procedure stdlib_sgeqrf +#endif +#:if WITH_QP + module procedure stdlib_wgeqrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeqrf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgeqrf +#else + module procedure stdlib_zgeqrf +#endif + end interface geqrf + + + + !> CGEQR2P computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + interface geqrfp +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgeqrfp( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgeqrfp +#else + module procedure stdlib_cgeqrfp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgeqrfp( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgeqrfp +#else + module procedure stdlib_dgeqrfp +#endif +#:if WITH_QP + module procedure stdlib_qgeqrfp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgeqrfp( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgeqrfp +#else + module procedure stdlib_sgeqrfp +#endif +#:if WITH_QP + module procedure stdlib_wgeqrfp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgeqrfp( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgeqrfp +#else + module procedure stdlib_zgeqrfp +#endif + end interface geqrfp + + + + !> GEQRT: computes a blocked QR factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + interface geqrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*),work(*) + end subroutine cgeqrt +#else + module procedure stdlib_cgeqrt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*),work(*) + end subroutine dgeqrt +#else + module procedure stdlib_dgeqrt +#endif +#:if WITH_QP + module procedure stdlib_qgeqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*),work(*) + end subroutine sgeqrt +#else + module procedure stdlib_sgeqrt +#endif +#:if WITH_QP + module procedure stdlib_wgeqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*),work(*) + end subroutine zgeqrt +#else + module procedure stdlib_zgeqrt +#endif + end interface geqrt + + + + !> GEQRT2: computes a QR factorization of a complex M-by-N matrix A, + !> using the compact WY representation of Q. + interface geqrt2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgeqrt2( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*) + end subroutine cgeqrt2 +#else + module procedure stdlib_cgeqrt2 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgeqrt2( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*) + end subroutine dgeqrt2 +#else + module procedure stdlib_dgeqrt2 +#endif +#:if WITH_QP + module procedure stdlib_qgeqrt2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgeqrt2( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*) + end subroutine sgeqrt2 +#else + module procedure stdlib_sgeqrt2 +#endif +#:if WITH_QP + module procedure stdlib_wgeqrt2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgeqrt2( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*) + end subroutine zgeqrt2 +#else + module procedure stdlib_zgeqrt2 +#endif + end interface geqrt2 + + + + !> GEQRT3: recursively computes a QR factorization of a complex M-by-N matrix A, + !> using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + interface geqrt3 +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine cgeqrt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*) + end subroutine cgeqrt3 +#else + module procedure stdlib_cgeqrt3 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine dgeqrt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*) + end subroutine dgeqrt3 +#else + module procedure stdlib_dgeqrt3 +#endif +#:if WITH_QP + module procedure stdlib_qgeqrt3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine sgeqrt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*) + end subroutine sgeqrt3 +#else + module procedure stdlib_sgeqrt3 +#endif +#:if WITH_QP + module procedure stdlib_wgeqrt3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine zgeqrt3( m, n, a, lda, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,ldt + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*) + end subroutine zgeqrt3 +#else + module procedure stdlib_zgeqrt3 +#endif + end interface geqrt3 + + + + !> GERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + interface gerfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, & + ferr, berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cgerfs +#else + module procedure stdlib_cgerfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, & + ferr, berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dgerfs +#else + module procedure stdlib_dgerfs +#endif +#:if WITH_QP + module procedure stdlib_qgerfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, & + ferr, berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine sgerfs +#else + module procedure stdlib_sgerfs +#endif +#:if WITH_QP + module procedure stdlib_wgerfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, & + ferr, berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zgerfs +#else + module procedure stdlib_zgerfs +#endif + end interface gerfs + + + + !> GERQF: computes an RQ factorization of a complex M-by-N matrix A: + !> A = R * Q. + interface gerqf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgerqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine cgerqf +#else + module procedure stdlib_cgerqf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgerqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dgerqf +#else + module procedure stdlib_dgerqf +#endif +#:if WITH_QP + module procedure stdlib_qgerqf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgerqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine sgerqf +#else + module procedure stdlib_sgerqf +#endif +#:if WITH_QP + module procedure stdlib_wgerqf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgerqf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zgerqf +#else + module procedure stdlib_zgerqf +#endif + end interface gerqf + + + + !> GESDD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors, by using divide-and-conquer method. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**H, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface gesdd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(sp), intent(out) :: rwork(*),s(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*),vt(ldvt,*),work(*) + end subroutine cgesdd +#else + module procedure stdlib_cgesdd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: s(*),u(ldu,*),vt(ldvt,*),work(*) + end subroutine dgesdd +#else + module procedure stdlib_dgesdd +#endif +#:if WITH_QP + module procedure stdlib_qgesdd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: s(*),u(ldu,*),vt(ldvt,*),work(*) + end subroutine sgesdd +#else + module procedure stdlib_sgesdd +#endif +#:if WITH_QP + module procedure stdlib_wgesdd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(dp), intent(out) :: rwork(*),s(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*),vt(ldvt,*),work(*) + end subroutine zgesdd +#else + module procedure stdlib_zgesdd +#endif + end interface gesdd + + + + !> GESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + interface gesv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine cgesv +#else + module procedure stdlib_cgesv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine dgesv +#else + module procedure stdlib_dgesv +#endif +#:if WITH_QP + module procedure stdlib_qgesv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine sgesv +#else + module procedure stdlib_sgesv +#endif +#:if WITH_QP + module procedure stdlib_wgesv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine zgesv +#else + module procedure stdlib_zgesv +#endif + end interface gesv + + + + !> GESVD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**H, not V. + interface gesvd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu,jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(sp), intent(out) :: rwork(*),s(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*),vt(ldvt,*),work(*) + end subroutine cgesvd +#else + module procedure stdlib_cgesvd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu,jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: s(*),u(ldu,*),vt(ldvt,*),work(*) + end subroutine dgesvd +#else + module procedure stdlib_dgesvd +#endif +#:if WITH_QP + module procedure stdlib_qgesvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu,jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: s(*),u(ldu,*),vt(ldvt,*),work(*) + end subroutine sgesvd +#else + module procedure stdlib_sgesvd +#endif +#:if WITH_QP + module procedure stdlib_wgesvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu,jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldu,ldvt,lwork,m,n + real(dp), intent(out) :: rwork(*),s(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*),vt(ldvt,*),work(*) + end subroutine zgesvd +#else + module procedure stdlib_zgesvd +#endif + end interface gesvd + + + + !> GESVDQ: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + interface gesvdq +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: joba,jobp,jobr,jobu,jobv + integer(ilp), intent(in) :: m,n,lda,ldu,ldv,liwork,lrwork + integer(ilp), intent(out) :: numrank,info,iwork(*) + integer(ilp), intent(inout) :: lcwork + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*),v(ldv,*),cwork(*) + real(sp), intent(out) :: s(*),rwork(*) + end subroutine cgesvdq +#else + module procedure stdlib_cgesvdq +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: joba,jobp,jobr,jobu,jobv + integer(ilp), intent(in) :: m,n,lda,ldu,ldv,liwork,lrwork + integer(ilp), intent(out) :: numrank,info,iwork(*) + integer(ilp), intent(inout) :: lwork + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: u(ldu,*),v(ldv,*),work(*),s(*),rwork(*) + end subroutine dgesvdq +#else + module procedure stdlib_dgesvdq +#endif +#:if WITH_QP + module procedure stdlib_qgesvdq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: joba,jobp,jobr,jobu,jobv + integer(ilp), intent(in) :: m,n,lda,ldu,ldv,liwork,lrwork + integer(ilp), intent(out) :: numrank,info,iwork(*) + integer(ilp), intent(inout) :: lwork + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: u(ldu,*),v(ldv,*),work(*),s(*),rwork(*) + end subroutine sgesvdq +#else + module procedure stdlib_sgesvdq +#endif +#:if WITH_QP + module procedure stdlib_wgesvdq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: joba,jobp,jobr,jobu,jobv + integer(ilp), intent(in) :: m,n,lda,ldu,ldv,liwork,lrwork + integer(ilp), intent(out) :: numrank,info,iwork(*) + integer(ilp), intent(inout) :: lcwork + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*),v(ldv,*),cwork(*) + real(dp), intent(out) :: s(*),rwork(*) + end subroutine zgesvdq +#else + module procedure stdlib_zgesvdq +#endif + end interface gesvdq + + + + !> GESVJ: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + interface gesvj +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, & + lwork, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,lrwork,m,mv,n + character, intent(in) :: joba,jobu,jobv + complex(sp), intent(inout) :: a(lda,*),v(ldv,*),cwork(lwork) + real(sp), intent(inout) :: rwork(lrwork) + real(sp), intent(out) :: sva(n) + end subroutine cgesvj +#else + module procedure stdlib_cgesvj +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n + character, intent(in) :: joba,jobu,jobv + real(dp), intent(inout) :: a(lda,*),v(ldv,*),work(lwork) + real(dp), intent(out) :: sva(n) + end subroutine dgesvj +#else + module procedure stdlib_dgesvj +#endif +#:if WITH_QP + module procedure stdlib_qgesvj +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n + character, intent(in) :: joba,jobu,jobv + real(sp), intent(inout) :: a(lda,*),v(ldv,*),work(lwork) + real(sp), intent(out) :: sva(n) + end subroutine sgesvj +#else + module procedure stdlib_sgesvj +#endif +#:if WITH_QP + module procedure stdlib_wgesvj +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, & + lwork, rwork, lrwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,lrwork,m,mv,n + character, intent(in) :: joba,jobu,jobv + complex(dp), intent(inout) :: a(lda,*),v(ldv,*),cwork(lwork) + real(dp), intent(inout) :: rwork(lrwork) + real(dp), intent(out) :: sva(n) + end subroutine zgesvj +#else + module procedure stdlib_zgesvj +#endif + end interface gesvj + + + + !> GETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + interface getrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgetrf( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cgetrf +#else + module procedure stdlib_cgetrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgetrf( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dgetrf +#else + module procedure stdlib_dgetrf +#endif +#:if WITH_QP + module procedure stdlib_qgetrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgetrf( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(inout) :: a(lda,*) + end subroutine sgetrf +#else + module procedure stdlib_sgetrf +#endif +#:if WITH_QP + module procedure stdlib_wgetrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgetrf( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zgetrf +#else + module procedure stdlib_zgetrf +#endif + end interface getrf + + + + !> GETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + interface getrf2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine cgetrf2( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cgetrf2 +#else + module procedure stdlib_cgetrf2 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine dgetrf2( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dgetrf2 +#else + module procedure stdlib_dgetrf2 +#endif +#:if WITH_QP + module procedure stdlib_qgetrf2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine sgetrf2( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(inout) :: a(lda,*) + end subroutine sgetrf2 +#else + module procedure stdlib_sgetrf2 +#endif +#:if WITH_QP + module procedure stdlib_wgetrf2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine zgetrf2( m, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,m,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zgetrf2 +#else + module procedure stdlib_zgetrf2 +#endif + end interface getrf2 + + + + !> GETRI: computes the inverse of a matrix using the LU factorization + !> computed by CGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + interface getri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgetri( n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine cgetri +#else + module procedure stdlib_cgetri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgetri( n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n,ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + end subroutine dgetri +#else + module procedure stdlib_dgetri +#endif +#:if WITH_QP + module procedure stdlib_qgetri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgetri( n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n,ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + end subroutine sgetri +#else + module procedure stdlib_sgetri +#endif +#:if WITH_QP + module procedure stdlib_wgetri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgetri( n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zgetri +#else + module procedure stdlib_zgetri +#endif + end interface getri + + + + !> GETRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by CGETRF. + interface getrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine cgetrs +#else + module procedure stdlib_cgetrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dgetrs +#else + module procedure stdlib_dgetrs +#endif +#:if WITH_QP + module procedure stdlib_qgetrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine sgetrs +#else + module procedure stdlib_sgetrs +#endif +#:if WITH_QP + module procedure stdlib_wgetrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zgetrs +#else + module procedure stdlib_zgetrs +#endif + end interface getrs + + + + !> GETSLS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + interface getsls +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine cgetsls +#else + module procedure stdlib_cgetsls +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dgetsls +#else + module procedure stdlib_dgetsls +#endif +#:if WITH_QP + module procedure stdlib_qgetsls +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine sgetsls +#else + module procedure stdlib_sgetsls +#endif +#:if WITH_QP + module procedure stdlib_wgetsls +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zgetsls +#else + module procedure stdlib_zgetsls +#endif + end interface getsls + + + + !> GETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a complex M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in CGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of CGEQRT for more details on the format. + interface getsqrhrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,nb1,nb2,mb1 + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*),work(*) + end subroutine cgetsqrhrt +#else + module procedure stdlib_cgetsqrhrt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,nb1,nb2,mb1 + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*),work(*) + end subroutine dgetsqrhrt +#else + module procedure stdlib_dgetsqrhrt +#endif +#:if WITH_QP + module procedure stdlib_qgetsqrhrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,nb1,nb2,mb1 + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*),work(*) + end subroutine sgetsqrhrt +#else + module procedure stdlib_sgetsqrhrt +#endif +#:if WITH_QP + module procedure stdlib_wgetsqrhrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,nb1,nb2,mb1 + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*),work(*) + end subroutine zgetsqrhrt +#else + module procedure stdlib_zgetsqrhrt +#endif + end interface getsqrhrt + + + + !> GGBAK: forms the right or left eigenvectors of a complex generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> CGGBAL. + interface ggbak +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: lscale(*),rscale(*) + complex(sp), intent(inout) :: v(ldv,*) + end subroutine cggbak +#else + module procedure stdlib_cggbak +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: lscale(*),rscale(*) + real(dp), intent(inout) :: v(ldv,*) + end subroutine dggbak +#else + module procedure stdlib_dggbak +#endif +#:if WITH_QP + module procedure stdlib_qggbak +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: lscale(*),rscale(*) + real(sp), intent(inout) :: v(ldv,*) + end subroutine sggbak +#else + module procedure stdlib_sggbak +#endif +#:if WITH_QP + module procedure stdlib_wggbak +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job,side + integer(ilp), intent(in) :: ihi,ilo,ldv,m,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: lscale(*),rscale(*) + complex(dp), intent(inout) :: v(ldv,*) + end subroutine zggbak +#else + module procedure stdlib_zggbak +#endif + end interface ggbak + + + + !> GGBAL: balances a pair of general complex matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + interface ggbal +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,ldb,n + real(sp), intent(out) :: lscale(*),rscale(*),work(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine cggbal +#else + module procedure stdlib_cggbal +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,ldb,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: lscale(*),rscale(*),work(*) + end subroutine dggbal +#else + module procedure stdlib_dggbal +#endif +#:if WITH_QP + module procedure stdlib_qggbal +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,ldb,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: lscale(*),rscale(*),work(*) + end subroutine sggbal +#else + module procedure stdlib_sggbal +#endif +#:if WITH_QP + module procedure stdlib_wggbal +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: job + integer(ilp), intent(out) :: ihi,ilo,info + integer(ilp), intent(in) :: lda,ldb,n + real(dp), intent(out) :: lscale(*),rscale(*),work(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine zggbal +#else + module procedure stdlib_zggbal +#endif + end interface ggbal + + + + !> GGES: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> CGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + interface gges +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alpha, & + beta, vsl, ldvsl, vsr, ldvsr, work,lwork, rwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_selctg_c + implicit none(type,external) + character, intent(in) :: jobvsl,jobvsr,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldb,ldvsl,ldvsr,lwork,n + logical(lk), intent(out) :: bwork(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: alpha(*),beta(*),vsl(ldvsl,*),vsr(ldvsr,*),work(*) + + procedure(stdlib_selctg_c) :: selctg + end subroutine cgges +#else + module procedure stdlib_cgges +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alphar, & + alphai, beta, vsl, ldvsl, vsr,ldvsr, work, lwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_selctg_d + implicit none(type,external) + character, intent(in) :: jobvsl,jobvsr,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldb,ldvsl,ldvsr,lwork,n + logical(lk), intent(out) :: bwork(*) + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: alphai(*),alphar(*),beta(*),vsl(ldvsl,*),vsr(ldvsr,*)& + ,work(*) + procedure(stdlib_selctg_d) :: selctg + end subroutine dgges +#else + module procedure stdlib_dgges +#endif +#:if WITH_QP + module procedure stdlib_qgges +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alphar, & + alphai, beta, vsl, ldvsl, vsr,ldvsr, work, lwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_selctg_s + implicit none(type,external) + character, intent(in) :: jobvsl,jobvsr,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldb,ldvsl,ldvsr,lwork,n + logical(lk), intent(out) :: bwork(*) + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: alphai(*),alphar(*),beta(*),vsl(ldvsl,*),vsr(ldvsr,*)& + ,work(*) + procedure(stdlib_selctg_s) :: selctg + end subroutine sgges +#else + module procedure stdlib_sgges +#endif +#:if WITH_QP + module procedure stdlib_wgges +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alpha, & + beta, vsl, ldvsl, vsr, ldvsr, work,lwork, rwork, bwork, info ) + import sp,dp,qp,ilp,lk,stdlib_selctg_z + implicit none(type,external) + character, intent(in) :: jobvsl,jobvsr,sort + integer(ilp), intent(out) :: info,sdim + integer(ilp), intent(in) :: lda,ldb,ldvsl,ldvsr,lwork,n + logical(lk), intent(out) :: bwork(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: alpha(*),beta(*),vsl(ldvsl,*),vsr(ldvsr,*),work(*) + + procedure(stdlib_selctg_z) :: selctg + end subroutine zgges +#else + module procedure stdlib_zgges +#endif + end interface gges + + + + !> GGEV: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + interface ggev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cggev( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,n + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: alpha(*),beta(*),vl(ldvl,*),vr(ldvr,*),work(*) + + end subroutine cggev +#else + module procedure stdlib_cggev +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dggev( jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai,beta, vl, ldvl, & + vr, ldvr, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: alphai(*),alphar(*),beta(*),vl(ldvl,*),vr(ldvr,*),& + work(*) + end subroutine dggev +#else + module procedure stdlib_dggev +#endif +#:if WITH_QP + module procedure stdlib_qggev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sggev( jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai,beta, vl, ldvl, & + vr, ldvr, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: alphai(*),alphar(*),beta(*),vl(ldvl,*),vr(ldvr,*),& + work(*) + end subroutine sggev +#else + module procedure stdlib_sggev +#endif +#:if WITH_QP + module procedure stdlib_wggev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zggev( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobvl,jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,n + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: alpha(*),beta(*),vl(ldvl,*),vr(ldvr,*),work(*) + + end subroutine zggev +#else + module procedure stdlib_zggev +#endif + end interface ggev + + + + !> GGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + interface ggglm +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),d(*) + complex(sp), intent(out) :: work(*),x(*),y(*) + end subroutine cggglm +#else + module procedure stdlib_cggglm +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(dp), intent(inout) :: a(lda,*),b(ldb,*),d(*) + real(dp), intent(out) :: work(*),x(*),y(*) + end subroutine dggglm +#else + module procedure stdlib_dggglm +#endif +#:if WITH_QP + module procedure stdlib_qggglm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(sp), intent(inout) :: a(lda,*),b(ldb,*),d(*) + real(sp), intent(out) :: work(*),x(*),y(*) + end subroutine sggglm +#else + module procedure stdlib_sggglm +#endif +#:if WITH_QP + module procedure stdlib_wggglm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),d(*) + complex(dp), intent(out) :: work(*),x(*),y(*) + end subroutine zggglm +#else + module procedure stdlib_zggglm +#endif + end interface ggglm + + + + !> GGHRD: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the generalized + !> eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then GGHRD reduces the original + !> problem to generalized Hessenberg form. + interface gghrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz + integer(ilp), intent(in) :: ihi,ilo,lda,ldb,ldq,ldz,n + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine cgghrd +#else + module procedure stdlib_cgghrd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz + integer(ilp), intent(in) :: ihi,ilo,lda,ldb,ldq,ldz,n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine dgghrd +#else + module procedure stdlib_dgghrd +#endif +#:if WITH_QP + module procedure stdlib_qgghrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz + integer(ilp), intent(in) :: ihi,ilo,lda,ldb,ldq,ldz,n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine sgghrd +#else + module procedure stdlib_sgghrd +#endif +#:if WITH_QP + module procedure stdlib_wgghrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz + integer(ilp), intent(in) :: ihi,ilo,lda,ldb,ldq,ldz,n + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine zgghrd +#else + module procedure stdlib_zgghrd +#endif + end interface gghrd + + + + !> GGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + interface gglse +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),c(*),d(*) + complex(sp), intent(out) :: work(*),x(*) + end subroutine cgglse +#else + module procedure stdlib_cgglse +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(dp), intent(inout) :: a(lda,*),b(ldb,*),c(*),d(*) + real(dp), intent(out) :: work(*),x(*) + end subroutine dgglse +#else + module procedure stdlib_dgglse +#endif +#:if WITH_QP + module procedure stdlib_qgglse +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(sp), intent(inout) :: a(lda,*),b(ldb,*),c(*),d(*) + real(sp), intent(out) :: work(*),x(*) + end subroutine sgglse +#else + module procedure stdlib_sgglse +#endif +#:if WITH_QP + module procedure stdlib_wgglse +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),c(*),d(*) + complex(dp), intent(out) :: work(*),x(*) + end subroutine zgglse +#else + module procedure stdlib_zgglse +#endif + end interface gglse + + + + !> GGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, + !> and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**H * (inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z' denotes the + !> conjugate transpose of matrix Z. + interface ggqrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: taua(*),taub(*),work(*) + end subroutine cggqrf +#else + module procedure stdlib_cggqrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: taua(*),taub(*),work(*) + end subroutine dggqrf +#else + module procedure stdlib_dggqrf +#endif +#:if WITH_QP + module procedure stdlib_qggqrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: taua(*),taub(*),work(*) + end subroutine sggqrf +#else + module procedure stdlib_sggqrf +#endif +#:if WITH_QP + module procedure stdlib_wggqrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: taua(*),taub(*),work(*) + end subroutine zggqrf +#else + module procedure stdlib_zggqrf +#endif + end interface ggqrf + + + + !> GGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**H + !> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the + !> conjugate transpose of the matrix Z. + interface ggrqf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: taua(*),taub(*),work(*) + end subroutine cggrqf +#else + module procedure stdlib_cggrqf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: taua(*),taub(*),work(*) + end subroutine dggrqf +#else + module procedure stdlib_dggrqf +#endif +#:if WITH_QP + module procedure stdlib_qggrqf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: taua(*),taub(*),work(*) + end subroutine sggrqf +#else + module procedure stdlib_sggrqf +#endif +#:if WITH_QP + module procedure stdlib_wggrqf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,lwork,m,n,p + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: taua(*),taub(*),work(*) + end subroutine zggrqf +#else + module procedure stdlib_zggrqf +#endif + end interface ggrqf + + + + !> GSVJ0: is called from CGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as CGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + interface gsvj0 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,nsweep + real(sp), intent(in) :: eps,sfmin,tol + character, intent(in) :: jobv + complex(sp), intent(inout) :: a(lda,*),d(n),v(ldv,*) + complex(sp), intent(out) :: work(lwork) + real(sp), intent(inout) :: sva(n) + end subroutine cgsvj0 +#else + module procedure stdlib_cgsvj0 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,nsweep + real(dp), intent(in) :: eps,sfmin,tol + character, intent(in) :: jobv + real(dp), intent(inout) :: a(lda,*),sva(n),d(n),v(ldv,*) + real(dp), intent(out) :: work(lwork) + end subroutine dgsvj0 +#else + module procedure stdlib_dgsvj0 +#endif +#:if WITH_QP + module procedure stdlib_qgsvj0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,nsweep + real(sp), intent(in) :: eps,sfmin,tol + character, intent(in) :: jobv + real(sp), intent(inout) :: a(lda,*),sva(n),d(n),v(ldv,*) + real(sp), intent(out) :: work(lwork) + end subroutine sgsvj0 +#else + module procedure stdlib_sgsvj0 +#endif +#:if WITH_QP + module procedure stdlib_wgsvj0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,nsweep + real(dp), intent(in) :: eps,sfmin,tol + character, intent(in) :: jobv + complex(dp), intent(inout) :: a(lda,*),d(n),v(ldv,*) + complex(dp), intent(out) :: work(lwork) + real(dp), intent(inout) :: sva(n) + end subroutine zgsvj0 +#else + module procedure stdlib_zgsvj0 +#endif + end interface gsvj0 + + + + !> GSVJ1: is called from CGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as CGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> GSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + interface gsvj1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol,& + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: eps,sfmin,tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,n1,nsweep + character, intent(in) :: jobv + complex(sp), intent(inout) :: a(lda,*),d(n),v(ldv,*) + complex(sp), intent(out) :: work(lwork) + real(sp), intent(inout) :: sva(n) + end subroutine cgsvj1 +#else + module procedure stdlib_cgsvj1 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol,& + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: eps,sfmin,tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,n1,nsweep + character, intent(in) :: jobv + real(dp), intent(inout) :: a(lda,*),d(n),sva(n),v(ldv,*) + real(dp), intent(out) :: work(lwork) + end subroutine dgsvj1 +#else + module procedure stdlib_dgsvj1 +#endif +#:if WITH_QP + module procedure stdlib_qgsvj1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol,& + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: eps,sfmin,tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,n1,nsweep + character, intent(in) :: jobv + real(sp), intent(inout) :: a(lda,*),d(n),sva(n),v(ldv,*) + real(sp), intent(out) :: work(lwork) + end subroutine sgsvj1 +#else + module procedure stdlib_sgsvj1 +#endif +#:if WITH_QP + module procedure stdlib_wgsvj1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol,& + nsweep, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: eps,sfmin,tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldv,lwork,m,mv,n,n1,nsweep + character, intent(in) :: jobv + complex(dp), intent(inout) :: a(lda,*),d(n),v(ldv,*) + complex(dp), intent(out) :: work(lwork) + real(dp), intent(inout) :: sva(n) + end subroutine zgsvj1 +#else + module procedure stdlib_zgsvj1 +#endif + end interface gsvj1 + + + + !> GTCON: estimates the reciprocal of the condition number of a complex + !> tridiagonal matrix A using the LU factorization as computed by + !> CGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface gtcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: d(*),dl(*),du(*),du2(*) + complex(sp), intent(out) :: work(*) + end subroutine cgtcon +#else + module procedure stdlib_cgtcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n,ipiv(*) + real(dp), intent(in) :: anorm,d(*),dl(*),du(*),du2(*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dgtcon +#else + module procedure stdlib_dgtcon +#endif +#:if WITH_QP + module procedure stdlib_qgtcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n,ipiv(*) + real(sp), intent(in) :: anorm,d(*),dl(*),du(*),du2(*) + real(sp), intent(out) :: rcond,work(*) + end subroutine sgtcon +#else + module procedure stdlib_sgtcon +#endif +#:if WITH_QP + module procedure stdlib_wgtcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: d(*),dl(*),du(*),du2(*) + complex(dp), intent(out) :: work(*) + end subroutine zgtcon +#else + module procedure stdlib_zgtcon +#endif + end interface gtcon + + + + !> GTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + interface gtrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, & + x, ldx, ferr, berr, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: b(ldb,*),d(*),df(*),dl(*),dlf(*),du(*),du2(*),duf(& + *) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cgtrfs +#else + module procedure stdlib_cgtrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, & + x, ldx, ferr, berr, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(in) :: b(ldb,*),d(*),df(*),dl(*),dlf(*),du(*),du2(*),duf(*) + + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dgtrfs +#else + module procedure stdlib_dgtrfs +#endif +#:if WITH_QP + module procedure stdlib_qgtrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, & + x, ldx, ferr, berr, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(in) :: b(ldb,*),d(*),df(*),dl(*),dlf(*),du(*),du2(*),duf(*) + + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine sgtrfs +#else + module procedure stdlib_sgtrfs +#endif +#:if WITH_QP + module procedure stdlib_wgtrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, & + x, ldx, ferr, berr, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: b(ldb,*),d(*),df(*),dl(*),dlf(*),du(*),du2(*),duf(& + *) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zgtrfs +#else + module procedure stdlib_zgtrfs +#endif + end interface gtrfs + + + + !> GTSV: solves the equation + !> A*X = B, + !> where A is an N-by-N tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T *X = B may be solved by interchanging the + !> order of the arguments DU and DL. + interface gtsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgtsv( n, nrhs, dl, d, du, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(inout) :: b(ldb,*),d(*),dl(*),du(*) + end subroutine cgtsv +#else + module procedure stdlib_cgtsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgtsv( n, nrhs, dl, d, du, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(inout) :: b(ldb,*),d(*),dl(*),du(*) + end subroutine dgtsv +#else + module procedure stdlib_dgtsv +#endif +#:if WITH_QP + module procedure stdlib_qgtsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgtsv( n, nrhs, dl, d, du, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(inout) :: b(ldb,*),d(*),dl(*),du(*) + end subroutine sgtsv +#else + module procedure stdlib_sgtsv +#endif +#:if WITH_QP + module procedure stdlib_wgtsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgtsv( n, nrhs, dl, d, du, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(inout) :: b(ldb,*),d(*),dl(*),du(*) + end subroutine zgtsv +#else + module procedure stdlib_zgtsv +#endif + end interface gtsv + + + + !> GTTRF: computes an LU factorization of a complex tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + interface gttrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgttrf( n, dl, d, du, du2, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: d(*),dl(*),du(*) + complex(sp), intent(out) :: du2(*) + end subroutine cgttrf +#else + module procedure stdlib_cgttrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgttrf( n, dl, d, du, du2, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: d(*),dl(*),du(*) + real(dp), intent(out) :: du2(*) + end subroutine dgttrf +#else + module procedure stdlib_dgttrf +#endif +#:if WITH_QP + module procedure stdlib_qgttrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgttrf( n, dl, d, du, du2, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: d(*),dl(*),du(*) + real(sp), intent(out) :: du2(*) + end subroutine sgttrf +#else + module procedure stdlib_sgttrf +#endif +#:if WITH_QP + module procedure stdlib_wgttrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgttrf( n, dl, d, du, du2, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: d(*),dl(*),du(*) + complex(dp), intent(out) :: du2(*) + end subroutine zgttrf +#else + module procedure stdlib_zgttrf +#endif + end interface gttrf + + + + !> GTTRS: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by CGTTRF. + interface gttrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: d(*),dl(*),du(*),du2(*) + end subroutine cgttrs +#else + module procedure stdlib_cgttrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(in) :: d(*),dl(*),du(*),du2(*) + end subroutine dgttrs +#else + module procedure stdlib_dgttrs +#endif +#:if WITH_QP + module procedure stdlib_qgttrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(in) :: d(*),dl(*),du(*),du2(*) + end subroutine sgttrs +#else + module procedure stdlib_sgttrs +#endif +#:if WITH_QP + module procedure stdlib_wgttrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: d(*),dl(*),du(*),du2(*) + end subroutine zgttrs +#else + module procedure stdlib_zgttrs +#endif + end interface gttrs + + + + !> HB2ST_KERNELS: is an internal routine used by the CHETRD_HB2ST + !> subroutine. + interface hb2st_kernels +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, & + lda, v, tau, ldvt, work) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype,st,ed,sweep,n,nb,ib,lda,ldvt + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: v(*),tau(*),work(*) + end subroutine chb2st_kernels +#else + module procedure stdlib_chb2st_kernels +#endif +#:if WITH_QP + module procedure stdlib_whb2st_kernels +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, & + lda, v, tau, ldvt, work) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype,st,ed,sweep,n,nb,ib,lda,ldvt + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: v(*),tau(*),work(*) + end subroutine zhb2st_kernels +#else + module procedure stdlib_zhb2st_kernels +#endif + end interface hb2st_kernels + + + + !> HBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. + interface hbev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldz,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chbev +#else + module procedure stdlib_chbev +#endif +#:if WITH_QP + module procedure stdlib_whbev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldz,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhbev +#else + module procedure stdlib_zhbev +#endif + end interface hbev + + + + !> HBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface hbevd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, rwork, & + lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldz,liwork,lrwork,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chbevd +#else + module procedure stdlib_chbevd +#endif +#:if WITH_QP + module procedure stdlib_whbevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, rwork, & + lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldz,liwork,lrwork,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhbevd +#else + module procedure stdlib_zhbevd +#endif + end interface hbevd + + + + !> HBGST: reduces a complex Hermitian-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**H*S by CPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**H*A*X, where + !> X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the + !> bandwidth of A. + interface hbgst +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldx,n + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(in) :: bb(ldbb,*) + complex(sp), intent(out) :: work(*),x(ldx,*) + end subroutine chbgst +#else + module procedure stdlib_chbgst +#endif +#:if WITH_QP + module procedure stdlib_whbgst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldx,n + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(in) :: bb(ldbb,*) + complex(dp), intent(out) :: work(*),x(ldx,*) + end subroutine zhbgst +#else + module procedure stdlib_zhbgst +#endif + end interface hbgst + + + + !> HBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. + interface hbgv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chbgv +#else + module procedure stdlib_chbgv +#endif +#:if WITH_QP + module procedure stdlib_whbgv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhbgv +#else + module procedure stdlib_zhbgv +#endif + end interface hbgv + + + + !> HBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface hbgvd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, rwork, lrwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,liwork,lrwork,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chbgvd +#else + module procedure stdlib_chbgvd +#endif +#:if WITH_QP + module procedure stdlib_whbgvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, rwork, lrwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,liwork,lrwork,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhbgvd +#else + module procedure stdlib_zhbgvd +#endif + end interface hbgvd + + + + !> HBTRD: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + interface hbtrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldq,n + real(sp), intent(out) :: d(*),e(*) + complex(sp), intent(inout) :: ab(ldab,*),q(ldq,*) + complex(sp), intent(out) :: work(*) + end subroutine chbtrd +#else + module procedure stdlib_chbtrd +#endif +#:if WITH_QP + module procedure stdlib_whbtrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldq,n + real(dp), intent(out) :: d(*),e(*) + complex(dp), intent(inout) :: ab(ldab,*),q(ldq,*) + complex(dp), intent(out) :: work(*) + end subroutine zhbtrd +#else + module procedure stdlib_zhbtrd +#endif + end interface hbtrd + + + + !> HECON: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface hecon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine checon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine checon +#else + module procedure stdlib_checon +#endif +#:if WITH_QP + module procedure stdlib_whecon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhecon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhecon +#else + module procedure stdlib_zhecon +#endif + end interface hecon + + + + !> HECON_ROOK: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface hecon_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine checon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine checon_rook +#else + module procedure stdlib_checon_rook +#endif +#:if WITH_QP + module procedure stdlib_whecon_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhecon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhecon_rook +#else + module procedure stdlib_zhecon_rook +#endif + end interface hecon_rook + + + + !> HEEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + interface heequb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cheequb( uplo, n, a, lda, s, scond, amax, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*) + character, intent(in) :: uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine cheequb +#else + module procedure stdlib_cheequb +#endif +#:if WITH_QP + module procedure stdlib_wheequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zheequb( uplo, n, a, lda, s, scond, amax, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*) + character, intent(in) :: uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zheequb +#else + module procedure stdlib_zheequb +#endif + end interface heequb + + + + !> HEEV: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. + interface heev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cheev( jobz, uplo, n, a, lda, w, work, lwork, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine cheev +#else + module procedure stdlib_cheev +#endif +#:if WITH_QP + module procedure stdlib_wheev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zheev( jobz, uplo, n, a, lda, w, work, lwork, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zheev +#else + module procedure stdlib_zheev +#endif + end interface heev + + + + !> HEEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface heevd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cheevd( jobz, uplo, n, a, lda, w, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,liwork,lrwork,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine cheevd +#else + module procedure stdlib_cheevd +#endif +#:if WITH_QP + module procedure stdlib_wheevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zheevd( jobz, uplo, n, a, lda, w, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,liwork,lrwork,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zheevd +#else + module procedure stdlib_zheevd +#endif + end interface heevd + + + + !> HEEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> HEEVR first reduces the matrix A to tridiagonal form T with a call + !> to CHETRD. Then, whenever possible, HEEVR calls CSTEMR to compute + !> the eigenspectrum using Relatively Robust Representations. CSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see CSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : HEEVR calls CSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> HEEVR calls SSTEBZ and CSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of CSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + interface heevr +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cheevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, & + ldz, isuppz, work, lwork,rwork, lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range,uplo + integer(ilp), intent(in) :: il,iu,lda,ldz,liwork,lrwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: abstol,vl,vu + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine cheevr +#else + module procedure stdlib_cheevr +#endif +#:if WITH_QP + module procedure stdlib_wheevr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zheevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, & + ldz, isuppz, work, lwork,rwork, lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range,uplo + integer(ilp), intent(in) :: il,iu,lda,ldz,liwork,lrwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: abstol,vl,vu + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zheevr +#else + module procedure stdlib_zheevr +#endif + end interface heevr + + + + !> HEGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by CPOTRF. + interface hegst +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chegst( itype, uplo, n, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,n + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine chegst +#else + module procedure stdlib_chegst +#endif +#:if WITH_QP + module procedure stdlib_whegst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhegst( itype, uplo, n, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,n + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine zhegst +#else + module procedure stdlib_zhegst +#endif + end interface hegst + + + + !> HEGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian and B is also + !> positive definite. + interface hegv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chegv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chegv +#else + module procedure stdlib_chegv +#endif +#:if WITH_QP + module procedure stdlib_whegv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhegv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhegv +#else + module procedure stdlib_zhegv +#endif + end interface hegv + + + + !> HEGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface hegvd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chegvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, & + lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,lda,ldb,liwork,lrwork,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chegvd +#else + module procedure stdlib_chegvd +#endif +#:if WITH_QP + module procedure stdlib_whegvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhegvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, & + lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,lda,ldb,liwork,lrwork,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhegvd +#else + module procedure stdlib_zhegvd +#endif + end interface hegvd + + + + !> HERFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite, and + !> provides error bounds and backward error estimates for the solution. + interface herfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr,& + berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cherfs +#else + module procedure stdlib_cherfs +#endif +#:if WITH_QP + module procedure stdlib_wherfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr,& + berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zherfs +#else + module procedure stdlib_zherfs +#endif + end interface herfs + + + + !> HESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + interface hesv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chesv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chesv +#else + module procedure stdlib_chesv +#endif +#:if WITH_QP + module procedure stdlib_whesv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhesv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhesv +#else + module procedure stdlib_zhesv +#endif + end interface hesv + + + + !> HESV_AA: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**H * T * U, if UPLO = 'U', or + !> A = L * T * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is Hermitian and tridiagonal. The factored form + !> of A is then used to solve the system of equations A * X = B. + interface hesv_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chesv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chesv_aa +#else + module procedure stdlib_chesv_aa +#endif +#:if WITH_QP + module procedure stdlib_whesv_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhesv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhesv_aa +#else + module procedure stdlib_zhesv_aa +#endif + end interface hesv_aa + + + + !> HESV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**H)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> CHETRF_RK is called to compute the factorization of a complex + !> Hermitian matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine CHETRS_3. + interface hesv_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chesv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: e(*),work(*) + end subroutine chesv_rk +#else + module procedure stdlib_chesv_rk +#endif +#:if WITH_QP + module procedure stdlib_whesv_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhesv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: e(*),work(*) + end subroutine zhesv_rk +#else + module procedure stdlib_zhesv_rk +#endif + end interface hesv_rk + + + + !> HESV_ROOK: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used + !> to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> CHETRF_ROOK is called to compute the factorization of a complex + !> Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling CHETRS_ROOK (uses BLAS 2). + interface hesv_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chesv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chesv_rook +#else + module procedure stdlib_chesv_rook +#endif +#:if WITH_QP + module procedure stdlib_whesv_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhesv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhesv_rook +#else + module procedure stdlib_zhesv_rook +#endif + end interface hesv_rook + + + + !> HESWAPR: applies an elementary permutation on the rows and the columns of + !> a hermitian matrix. + interface heswapr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cheswapr( uplo, n, a, lda, i1, i2) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1,i2,lda,n + complex(sp), intent(inout) :: a(lda,n) + end subroutine cheswapr +#else + module procedure stdlib_cheswapr +#endif +#:if WITH_QP + module procedure stdlib_wheswapr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zheswapr( uplo, n, a, lda, i1, i2) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1,i2,lda,n + complex(dp), intent(inout) :: a(lda,n) + end subroutine zheswapr +#else + module procedure stdlib_zheswapr +#endif + end interface heswapr + + + + !> HETF2_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + interface hetf2_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetf2_rk( uplo, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*) + end subroutine chetf2_rk +#else + module procedure stdlib_chetf2_rk +#endif +#:if WITH_QP + module procedure stdlib_whetf2_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetf2_rk( uplo, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*) + end subroutine zhetf2_rk +#else + module procedure stdlib_zhetf2_rk +#endif + end interface hetf2_rk + + + + !> HETF2_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + interface hetf2_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetf2_rook( uplo, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine chetf2_rook +#else + module procedure stdlib_chetf2_rook +#endif +#:if WITH_QP + module procedure stdlib_whetf2_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetf2_rook( uplo, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zhetf2_rook +#else + module procedure stdlib_zhetf2_rook +#endif + end interface hetf2_rook + + + + !> HETRD: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + interface hetrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(out) :: d(*),e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine chetrd +#else + module procedure stdlib_chetrd +#endif +#:if WITH_QP + module procedure stdlib_whetrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(out) :: d(*),e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zhetrd +#else + module procedure stdlib_zhetrd +#endif + end interface hetrd + + + + !> HETRD_HB2ST: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + interface hetrd_hb2st +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrd_hb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, & + lhous, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: stage1,uplo,vect + integer(ilp), intent(in) :: n,kd,ldab,lhous,lwork + integer(ilp), intent(out) :: info + real(sp), intent(out) :: d(*),e(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: hous(*),work(*) + end subroutine chetrd_hb2st +#else + module procedure stdlib_chetrd_hb2st +#endif +#:if WITH_QP + module procedure stdlib_whetrd_hb2st +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrd_hb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, & + lhous, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: stage1,uplo,vect + integer(ilp), intent(in) :: n,kd,ldab,lhous,lwork + integer(ilp), intent(out) :: info + real(dp), intent(out) :: d(*),e(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: hous(*),work(*) + end subroutine zhetrd_hb2st +#else + module procedure stdlib_zhetrd_hb2st +#endif + end interface hetrd_hb2st + + + + !> HETRD_HE2HB: reduces a complex Hermitian matrix A to complex Hermitian + !> band-diagonal form AB by a unitary similarity transformation: + !> Q**H * A * Q = AB. + interface hetrd_he2hb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrd_he2hb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldab,lwork,n,kd + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: ab(ldab,*),tau(*),work(*) + end subroutine chetrd_he2hb +#else + module procedure stdlib_chetrd_he2hb +#endif +#:if WITH_QP + module procedure stdlib_whetrd_he2hb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrd_he2hb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldab,lwork,n,kd + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: ab(ldab,*),tau(*),work(*) + end subroutine zhetrd_he2hb +#else + module procedure stdlib_zhetrd_he2hb +#endif + end interface hetrd_he2hb + + + + !> HETRF: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface hetrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrf( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine chetrf +#else + module procedure stdlib_chetrf +#endif +#:if WITH_QP + module procedure stdlib_whetrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrf( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetrf +#else + module procedure stdlib_zhetrf +#endif + end interface hetrf + + + + !> HETRF_AA: computes the factorization of a complex hermitian matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**H*T*U or A = L*T*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a hermitian tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface hetrf_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,lwork + integer(ilp), intent(out) :: info,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine chetrf_aa +#else + module procedure stdlib_chetrf_aa +#endif +#:if WITH_QP + module procedure stdlib_whetrf_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,lwork + integer(ilp), intent(out) :: info,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetrf_aa +#else + module procedure stdlib_zhetrf_aa +#endif + end interface hetrf_aa + + + + !> HETRF_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + interface hetrf_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*),work(*) + end subroutine chetrf_rk +#else + module procedure stdlib_chetrf_rk +#endif +#:if WITH_QP + module procedure stdlib_whetrf_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*),work(*) + end subroutine zhetrf_rk +#else + module procedure stdlib_zhetrf_rk +#endif + end interface hetrf_rk + + + + !> HETRF_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface hetrf_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine chetrf_rook +#else + module procedure stdlib_chetrf_rook +#endif +#:if WITH_QP + module procedure stdlib_whetrf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetrf_rook +#else + module procedure stdlib_zhetrf_rook +#endif + end interface hetrf_rook + + + + !> HETRI: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> CHETRF. + interface hetri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetri( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine chetri +#else + module procedure stdlib_chetri +#endif +#:if WITH_QP + module procedure stdlib_whetri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetri( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetri +#else + module procedure stdlib_zhetri +#endif + end interface hetri + + + + !> HETRI_ROOK: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> CHETRF_ROOK. + interface hetri_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetri_rook( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine chetri_rook +#else + module procedure stdlib_chetri_rook +#endif +#:if WITH_QP + module procedure stdlib_whetri_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetri_rook( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetri_rook +#else + module procedure stdlib_zhetri_rook +#endif + end interface hetri_rook + + + + !> HETRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF. + interface hetrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine chetrs +#else + module procedure stdlib_chetrs +#endif +#:if WITH_QP + module procedure stdlib_whetrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zhetrs +#else + module procedure stdlib_zhetrs +#endif + end interface hetrs + + + + !> HETRS2: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF and converted by CSYCONV. + interface hetrs2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chetrs2 +#else + module procedure stdlib_chetrs2 +#endif +#:if WITH_QP + module procedure stdlib_whetrs2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetrs2 +#else + module procedure stdlib_zhetrs2 +#endif + end interface hetrs2 + + + + !> HETRS_3: solves a system of linear equations A * X = B with a complex + !> Hermitian matrix A using the factorization computed + !> by CHETRF_RK or CHETRF_BK: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + interface hetrs_3 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*),e(*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine chetrs_3 +#else + module procedure stdlib_chetrs_3 +#endif +#:if WITH_QP + module procedure stdlib_whetrs_3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*),e(*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zhetrs_3 +#else + module procedure stdlib_zhetrs_3 +#endif + end interface hetrs_3 + + + + !> HETRS_AA: solves a system of linear equations A*X = B with a complex + !> hermitian matrix A using the factorization A = U**H*T*U or + !> A = L*T*L**H computed by CHETRF_AA. + interface hetrs_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,nrhs,lda,ldb,lwork,ipiv(*) + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine chetrs_aa +#else + module procedure stdlib_chetrs_aa +#endif +#:if WITH_QP + module procedure stdlib_whetrs_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,nrhs,lda,ldb,lwork,ipiv(*) + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zhetrs_aa +#else + module procedure stdlib_zhetrs_aa +#endif + end interface hetrs_aa + + + + !> HETRS_ROOK: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF_ROOK. + interface hetrs_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chetrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine chetrs_rook +#else + module procedure stdlib_chetrs_rook +#endif +#:if WITH_QP + module procedure stdlib_whetrs_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhetrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zhetrs_rook +#else + module procedure stdlib_zhetrs_rook +#endif + end interface hetrs_rook + + + + !> Level 3 BLAS like routine for C in RFP Format. + !> HFRK: performs one of the Hermitian rank--k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n Hermitian + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + interface hfrk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: k,lda,n + character, intent(in) :: trans,transr,uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: c(*) + end subroutine chfrk +#else + module procedure stdlib_chfrk +#endif +#:if WITH_QP + module procedure stdlib_whfrk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: k,lda,n + character, intent(in) :: trans,transr,uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: c(*) + end subroutine zhfrk +#else + module procedure stdlib_zhfrk +#endif + end interface hfrk + + + + !> HGEQZ: computes the eigenvalues of a complex matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the single-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a complex matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by CGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices and S and P are upper triangular. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from CGGHRD that reduced + !> the matrix pair (A,B) to generalized Hessenberg form, then the output + !> matrices Q1*Q and Z1*Z are the unitary factors from the generalized + !> Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) + !> (equivalently, of (A,B)) are computed as a pair of complex values + !> (alpha,beta). If beta is nonzero, lambda = alpha / beta is an + !> eigenvalue of the generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> The values of alpha and beta for the i-th eigenvalue can be read + !> directly from the generalized Schur form: alpha = S(i,i), + !> beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + interface hgeqz +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alpha, beta, q, & + ldq, z, ldz, work, lwork,rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz,job + integer(ilp), intent(in) :: ihi,ilo,ldh,ldq,ldt,ldz,lwork,n + integer(ilp), intent(out) :: info + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: alpha(*),beta(*),work(*) + complex(sp), intent(inout) :: h(ldh,*),q(ldq,*),t(ldt,*),z(ldz,*) + end subroutine chgeqz +#else + module procedure stdlib_chgeqz +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dhgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alphar, alphai, & + beta, q, ldq, z, ldz, work,lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz,job + integer(ilp), intent(in) :: ihi,ilo,ldh,ldq,ldt,ldz,lwork,n + integer(ilp), intent(out) :: info + real(dp), intent(out) :: alphai(*),alphar(*),beta(*),work(*) + real(dp), intent(inout) :: h(ldh,*),q(ldq,*),t(ldt,*),z(ldz,*) + end subroutine dhgeqz +#else + module procedure stdlib_dhgeqz +#endif +#:if WITH_QP + module procedure stdlib_qhgeqz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine shgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alphar, alphai, & + beta, q, ldq, z, ldz, work,lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz,job + integer(ilp), intent(in) :: ihi,ilo,ldh,ldq,ldt,ldz,lwork,n + integer(ilp), intent(out) :: info + real(sp), intent(out) :: alphai(*),alphar(*),beta(*),work(*) + real(sp), intent(inout) :: h(ldh,*),q(ldq,*),t(ldt,*),z(ldz,*) + end subroutine shgeqz +#else + module procedure stdlib_shgeqz +#endif +#:if WITH_QP + module procedure stdlib_whgeqz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alpha, beta, q, & + ldq, z, ldz, work, lwork,rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,compz,job + integer(ilp), intent(in) :: ihi,ilo,ldh,ldq,ldt,ldz,lwork,n + integer(ilp), intent(out) :: info + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: alpha(*),beta(*),work(*) + complex(dp), intent(inout) :: h(ldh,*),q(ldq,*),t(ldt,*),z(ldz,*) + end subroutine zhgeqz +#else + module procedure stdlib_zhgeqz +#endif + end interface hgeqz + + + + !> HPCON: estimates the reciprocal of the condition number of a complex + !> Hermitian packed matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface hpcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chpcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + end subroutine chpcon +#else + module procedure stdlib_chpcon +#endif +#:if WITH_QP + module procedure stdlib_whpcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhpcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + end subroutine zhpcon +#else + module procedure stdlib_zhpcon +#endif + end interface hpcon + + + + !> HPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix in packed storage. + interface hpev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chpev( jobz, uplo, n, ap, w, z, ldz, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chpev +#else + module procedure stdlib_chpev +#endif +#:if WITH_QP + module procedure stdlib_whpev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhpev( jobz, uplo, n, ap, w, z, ldz, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhpev +#else + module procedure stdlib_zhpev +#endif + end interface hpev + + + + !> HPEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface hpevd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chpevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,rwork, lrwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lrwork,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chpevd +#else + module procedure stdlib_chpevd +#endif +#:if WITH_QP + module procedure stdlib_whpevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhpevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,rwork, lrwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lrwork,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhpevd +#else + module procedure stdlib_zhpevd +#endif + end interface hpevd + + + + !> HPGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by CPPTRF. + interface hpgst +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chpgst( itype, uplo, n, ap, bp, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,n + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(in) :: bp(*) + end subroutine chpgst +#else + module procedure stdlib_chpgst +#endif +#:if WITH_QP + module procedure stdlib_whpgst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhpgst( itype, uplo, n, ap, bp, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,n + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(in) :: bp(*) + end subroutine zhpgst +#else + module procedure stdlib_zhpgst +#endif + end interface hpgst + + + + !> HPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian, stored in packed format, + !> and B is also positive definite. + interface hpgv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chpgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,ldz,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ap(*),bp(*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chpgv +#else + module procedure stdlib_chpgv +#endif +#:if WITH_QP + module procedure stdlib_whpgv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhpgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,ldz,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ap(*),bp(*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhpgv +#else + module procedure stdlib_zhpgv +#endif + end interface hpgv + + + + !> HPGVD: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface hpgvd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chpgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, rwork, & + lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,ldz,liwork,lrwork,lwork,n + real(sp), intent(out) :: rwork(*),w(*) + complex(sp), intent(inout) :: ap(*),bp(*) + complex(sp), intent(out) :: work(*),z(ldz,*) + end subroutine chpgvd +#else + module procedure stdlib_chpgvd +#endif +#:if WITH_QP + module procedure stdlib_whpgvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhpgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, rwork, & + lrwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,ldz,liwork,lrwork,lwork,n + real(dp), intent(out) :: rwork(*),w(*) + complex(dp), intent(inout) :: ap(*),bp(*) + complex(dp), intent(out) :: work(*),z(ldz,*) + end subroutine zhpgvd +#else + module procedure stdlib_zhpgvd +#endif + end interface hpgvd + + + + !> HPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + interface hprfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: afp(*),ap(*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine chprfs +#else + module procedure stdlib_chprfs +#endif +#:if WITH_QP + module procedure stdlib_whprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: afp(*),ap(*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zhprfs +#else + module procedure stdlib_zhprfs +#endif + end interface hprfs + + + + !> HPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is Hermitian and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + interface hpsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chpsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(inout) :: ap(*),b(ldb,*) + end subroutine chpsv +#else + module procedure stdlib_chpsv +#endif +#:if WITH_QP + module procedure stdlib_whpsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhpsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(inout) :: ap(*),b(ldb,*) + end subroutine zhpsv +#else + module procedure stdlib_zhpsv +#endif + end interface hpsv + + + + !> HPTRD: reduces a complex Hermitian matrix A stored in packed form to + !> real symmetric tridiagonal form T by a unitary similarity + !> transformation: Q**H * A * Q = T. + interface hptrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chptrd( uplo, n, ap, d, e, tau, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: d(*),e(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: tau(*) + end subroutine chptrd +#else + module procedure stdlib_chptrd +#endif +#:if WITH_QP + module procedure stdlib_whptrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhptrd( uplo, n, ap, d, e, tau, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: d(*),e(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: tau(*) + end subroutine zhptrd +#else + module procedure stdlib_zhptrd +#endif + end interface hptrd + + + + !> HPTRF: computes the factorization of a complex Hermitian packed + !> matrix A using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + interface hptrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chptrf( uplo, n, ap, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: ap(*) + end subroutine chptrf +#else + module procedure stdlib_chptrf +#endif +#:if WITH_QP + module procedure stdlib_whptrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhptrf( uplo, n, ap, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: ap(*) + end subroutine zhptrf +#else + module procedure stdlib_zhptrf +#endif + end interface hptrf + + + + !> HPTRI: computes the inverse of a complex Hermitian indefinite matrix + !> A in packed storage using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHPTRF. + interface hptri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chptri( uplo, n, ap, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*) + end subroutine chptri +#else + module procedure stdlib_chptri +#endif +#:if WITH_QP + module procedure stdlib_whptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhptri( uplo, n, ap, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*) + end subroutine zhptri +#else + module procedure stdlib_zhptri +#endif + end interface hptri + + + + !> HPTRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A stored in packed format using the factorization + !> A = U*D*U**H or A = L*D*L**H computed by CHPTRF. + interface hptrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine chptrs +#else + module procedure stdlib_chptrs +#endif +#:if WITH_QP + module procedure stdlib_whptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zhptrs +#else + module procedure stdlib_zhptrs +#endif + end interface hptrs + + + + !> HSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a complex upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + interface hsein +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine chsein( side, eigsrc, initv, select, n, h, ldh, w, vl,ldvl, vr, ldvr, & + mm, m, work, rwork, ifaill,ifailr, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: eigsrc,initv,side + integer(ilp), intent(out) :: info,m,ifaill(*),ifailr(*) + integer(ilp), intent(in) :: ldh,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: h(ldh,*) + complex(sp), intent(inout) :: vl(ldvl,*),vr(ldvr,*),w(*) + complex(sp), intent(out) :: work(*) + end subroutine chsein +#else + module procedure stdlib_chsein +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dhsein( side, eigsrc, initv, select, n, h, ldh, wr, wi,vl, ldvl, vr, & + ldvr, mm, m, work, ifaill,ifailr, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: eigsrc,initv,side + integer(ilp), intent(out) :: info,m,ifaill(*),ifailr(*) + integer(ilp), intent(in) :: ldh,ldvl,ldvr,mm,n + logical(lk), intent(inout) :: select(*) + real(dp), intent(in) :: h(ldh,*),wi(*) + real(dp), intent(inout) :: vl(ldvl,*),vr(ldvr,*),wr(*) + real(dp), intent(out) :: work(*) + end subroutine dhsein +#else + module procedure stdlib_dhsein +#endif +#:if WITH_QP + module procedure stdlib_qhsein +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine shsein( side, eigsrc, initv, select, n, h, ldh, wr, wi,vl, ldvl, vr, & + ldvr, mm, m, work, ifaill,ifailr, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: eigsrc,initv,side + integer(ilp), intent(out) :: info,m,ifaill(*),ifailr(*) + integer(ilp), intent(in) :: ldh,ldvl,ldvr,mm,n + logical(lk), intent(inout) :: select(*) + real(sp), intent(in) :: h(ldh,*),wi(*) + real(sp), intent(inout) :: vl(ldvl,*),vr(ldvr,*),wr(*) + real(sp), intent(out) :: work(*) + end subroutine shsein +#else + module procedure stdlib_shsein +#endif +#:if WITH_QP + module procedure stdlib_whsein +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zhsein( side, eigsrc, initv, select, n, h, ldh, w, vl,ldvl, vr, ldvr, & + mm, m, work, rwork, ifaill,ifailr, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: eigsrc,initv,side + integer(ilp), intent(out) :: info,m,ifaill(*),ifailr(*) + integer(ilp), intent(in) :: ldh,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: h(ldh,*) + complex(dp), intent(inout) :: vl(ldvl,*),vr(ldvr,*),w(*) + complex(dp), intent(out) :: work(*) + end subroutine zhsein +#else + module procedure stdlib_zhsein +#endif + end interface hsein + + + + !> HSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. + interface hseqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine chseqr( job, compz, n, ilo, ihi, h, ldh, w, z, ldz,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + character, intent(in) :: compz,job + complex(sp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(sp), intent(out) :: w(*),work(*) + end subroutine chseqr +#else + module procedure stdlib_chseqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dhseqr( job, compz, n, ilo, ihi, h, ldh, wr, wi, z,ldz, work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + character, intent(in) :: compz,job + real(dp), intent(inout) :: h(ldh,*),z(ldz,*) + real(dp), intent(out) :: wi(*),work(*),wr(*) + end subroutine dhseqr +#else + module procedure stdlib_dhseqr +#endif +#:if WITH_QP + module procedure stdlib_qhseqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine shseqr( job, compz, n, ilo, ihi, h, ldh, wr, wi, z,ldz, work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + character, intent(in) :: compz,job + real(sp), intent(inout) :: h(ldh,*),z(ldz,*) + real(sp), intent(out) :: wi(*),work(*),wr(*) + end subroutine shseqr +#else + module procedure stdlib_shseqr +#endif +#:if WITH_QP + module procedure stdlib_whseqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zhseqr( job, compz, n, ilo, ihi, h, ldh, w, z, ldz,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + character, intent(in) :: compz,job + complex(dp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(dp), intent(out) :: w(*),work(*) + end subroutine zhseqr +#else + module procedure stdlib_zhseqr +#endif + end interface hseqr + + + + !> ISNAN: returns .TRUE. if its argument is NaN, and .FALSE. + !> otherwise. To be replaced by the Fortran 2003 intrinsic in the + !> future. + interface isnan +#ifdef STDLIB_EXTERNAL_LAPACK + pure logical(lk) function disnan( din ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: din + end function disnan +#else + module procedure stdlib_disnan +#endif +#:if WITH_QP + module procedure stdlib_qisnan +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure logical(lk) function sisnan( sin ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: sin + end function sisnan +#else + module procedure stdlib_sisnan +#endif + end interface isnan + + + + !> LA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + interface la_gbamv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,ldab,m,n,kl,ku,trans + complex(sp), intent(in) :: ab(ldab,*),x(*) + real(sp), intent(inout) :: y(*) + end subroutine cla_gbamv +#else + module procedure stdlib_cla_gbamv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,ab(ldab,*),x(*) + integer(ilp), intent(in) :: incx,incy,ldab,m,n,kl,ku,trans + real(dp), intent(inout) :: y(*) + end subroutine dla_gbamv +#else + module procedure stdlib_dla_gbamv +#endif +#:if WITH_QP + module procedure stdlib_qla_gbamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,ab(ldab,*),x(*) + integer(ilp), intent(in) :: incx,incy,ldab,m,n,kl,ku,trans + real(sp), intent(inout) :: y(*) + end subroutine sla_gbamv +#else + module procedure stdlib_sla_gbamv +#endif +#:if WITH_QP + module procedure stdlib_wla_gbamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,ldab,m,n,kl,ku,trans + complex(dp), intent(in) :: ab(ldab,*),x(*) + real(dp), intent(inout) :: y(*) + end subroutine zla_gbamv +#else + module procedure stdlib_zla_gbamv +#endif + end interface la_gbamv + + + + !> LA_GBRCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + interface la_gbrcond +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dla_gbrcond( trans, n, kl, ku, ab, ldab,afb, ldafb, ipiv, cmode, & + c,info, work, iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: n,ldab,ldafb,kl,ku,cmode,ipiv(*) + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: ab(ldab,*),afb(ldafb,*),c(*) + real(dp), intent(out) :: work(*) + end function dla_gbrcond +#else + module procedure stdlib_dla_gbrcond +#endif +#:if WITH_QP + module procedure stdlib_qla_gbrcond +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function sla_gbrcond( trans, n, kl, ku, ab, ldab, afb, ldafb,ipiv, cmode, & + c, info, work, iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: n,ldab,ldafb,kl,ku,cmode,ipiv(*) + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: ab(ldab,*),afb(ldafb,*),c(*) + real(sp), intent(out) :: work(*) + end function sla_gbrcond +#else + module procedure stdlib_sla_gbrcond +#endif + end interface la_gbrcond + + + + !> LA_GBRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + interface la_gbrcond_c +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_gbrcond_c( trans, n, kl, ku, ab, ldab, afb,ldafb, ipiv, c, & + capply, info, work,rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,kl,ku,ldab,ldafb,ipiv(*) + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: ab(ldab,*),afb(ldafb,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + end function cla_gbrcond_c +#else + module procedure stdlib_cla_gbrcond_c +#endif +#:if WITH_QP + module procedure stdlib_wla_gbrcond_c +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_gbrcond_c( trans, n, kl, ku, ab,ldab, afb, ldafb, ipiv,c, & + capply, info, work,rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,kl,ku,ldab,ldafb,ipiv(*) + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: ab(ldab,*),afb(ldafb,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + end function zla_gbrcond_c +#else + module procedure stdlib_zla_gbrcond_c +#endif + end interface la_gbrcond_c + + + + !> LA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + interface la_gbrpvgrw +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function cla_gbrpvgrw( n, kl, ku, ncols, ab, ldab, afb,ldafb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,kl,ku,ncols,ldab,ldafb + complex(sp), intent(in) :: ab(ldab,*),afb(ldafb,*) + end function cla_gbrpvgrw +#else + module procedure stdlib_cla_gbrpvgrw +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function dla_gbrpvgrw( n, kl, ku, ncols, ab,ldab, afb, ldafb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,kl,ku,ncols,ldab,ldafb + real(dp), intent(in) :: ab(ldab,*),afb(ldafb,*) + end function dla_gbrpvgrw +#else + module procedure stdlib_dla_gbrpvgrw +#endif +#:if WITH_QP + module procedure stdlib_qla_gbrpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function sla_gbrpvgrw( n, kl, ku, ncols, ab, ldab, afb,ldafb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,kl,ku,ncols,ldab,ldafb + real(sp), intent(in) :: ab(ldab,*),afb(ldafb,*) + end function sla_gbrpvgrw +#else + module procedure stdlib_sla_gbrpvgrw +#endif +#:if WITH_QP + module procedure stdlib_wla_gbrpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function zla_gbrpvgrw( n, kl, ku, ncols, ab,ldab, afb, ldafb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,kl,ku,ncols,ldab,ldafb + complex(dp), intent(in) :: ab(ldab,*),afb(ldafb,*) + end function zla_gbrpvgrw +#else + module procedure stdlib_zla_gbrpvgrw +#endif + end interface la_gbrpvgrw + + + + !> LA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + interface la_geamv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,lda,m,n,trans + complex(sp), intent(in) :: a(lda,*),x(*) + real(sp), intent(inout) :: y(*) + end subroutine cla_geamv +#else + module procedure stdlib_cla_geamv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dla_geamv ( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n,trans + real(dp), intent(inout) :: y(*) + end subroutine dla_geamv +#else + module procedure stdlib_dla_geamv +#endif +#:if WITH_QP + module procedure stdlib_qla_geamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,m,n,trans + real(sp), intent(inout) :: y(*) + end subroutine sla_geamv +#else + module procedure stdlib_sla_geamv +#endif +#:if WITH_QP + module procedure stdlib_wla_geamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,lda,m,n,trans + complex(dp), intent(in) :: a(lda,*),x(*) + real(dp), intent(inout) :: y(*) + end subroutine zla_geamv +#else + module procedure stdlib_zla_geamv +#endif + end interface la_geamv + + + + !> LA_GERCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + interface la_gercond +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dla_gercond( trans, n, a, lda, af,ldaf, ipiv, cmode, c,info, & + work, iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: n,lda,ldaf,cmode,ipiv(*) + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: a(lda,*),af(ldaf,*),c(*) + real(dp), intent(out) :: work(*) + end function dla_gercond +#else + module procedure stdlib_dla_gercond +#endif +#:if WITH_QP + module procedure stdlib_qla_gercond +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function sla_gercond( trans, n, a, lda, af, ldaf, ipiv,cmode, c, info, & + work, iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: n,lda,ldaf,cmode,ipiv(*) + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: a(lda,*),af(ldaf,*),c(*) + real(sp), intent(out) :: work(*) + end function sla_gercond +#else + module procedure stdlib_sla_gercond +#endif + end interface la_gercond + + + + !> LA_GERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + interface la_gercond_c +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_gercond_c( trans, n, a, lda, af, ldaf, ipiv, c,capply, info, & + work, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf,ipiv(*) + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + end function cla_gercond_c +#else + module procedure stdlib_cla_gercond_c +#endif +#:if WITH_QP + module procedure stdlib_wla_gercond_c +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_gercond_c( trans, n, a, lda, af,ldaf, ipiv, c, capply,info, & + work, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf,ipiv(*) + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + end function zla_gercond_c +#else + module procedure stdlib_zla_gercond_c +#endif + end interface la_gercond_c + + + + !> LA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + interface la_gerpvgrw +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function cla_gerpvgrw( n, ncols, a, lda, af, ldaf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,ncols,lda,ldaf + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + end function cla_gerpvgrw +#else + module procedure stdlib_cla_gerpvgrw +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function dla_gerpvgrw( n, ncols, a, lda, af,ldaf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,ncols,lda,ldaf + real(dp), intent(in) :: a(lda,*),af(ldaf,*) + end function dla_gerpvgrw +#else + module procedure stdlib_dla_gerpvgrw +#endif +#:if WITH_QP + module procedure stdlib_qla_gerpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function sla_gerpvgrw( n, ncols, a, lda, af, ldaf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,ncols,lda,ldaf + real(sp), intent(in) :: a(lda,*),af(ldaf,*) + end function sla_gerpvgrw +#else + module procedure stdlib_sla_gerpvgrw +#endif +#:if WITH_QP + module procedure stdlib_wla_gerpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function zla_gerpvgrw( n, ncols, a, lda, af,ldaf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,ncols,lda,ldaf + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + end function zla_gerpvgrw +#else + module procedure stdlib_zla_gerpvgrw +#endif + end interface la_gerpvgrw + + + + !> CLA_SYAMV performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + interface la_heamv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cla_heamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,lda,n,uplo + complex(sp), intent(in) :: a(lda,*),x(*) + real(sp), intent(inout) :: y(*) + end subroutine cla_heamv +#else + module procedure stdlib_cla_heamv +#endif +#:if WITH_QP + module procedure stdlib_wla_heamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zla_heamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,lda,n,uplo + complex(dp), intent(in) :: a(lda,*),x(*) + real(dp), intent(inout) :: y(*) + end subroutine zla_heamv +#else + module procedure stdlib_zla_heamv +#endif + end interface la_heamv + + + + !> LA_HERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + interface la_hercond_c +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_hercond_c( uplo, n, a, lda, af, ldaf, ipiv, c,capply, info, & + work, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf,ipiv(*) + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + end function cla_hercond_c +#else + module procedure stdlib_cla_hercond_c +#endif +#:if WITH_QP + module procedure stdlib_wla_hercond_c +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_hercond_c( uplo, n, a, lda, af,ldaf, ipiv, c, capply,info, & + work, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf,ipiv(*) + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + end function zla_hercond_c +#else + module procedure stdlib_zla_hercond_c +#endif + end interface la_hercond_c + + + + !> LA_HERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + interface la_herpvgrw +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_herpvgrw( uplo, n, info, a, lda, af, ldaf, ipiv,work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,info,lda,ldaf,ipiv(*) + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + real(sp), intent(out) :: work(*) + end function cla_herpvgrw +#else + module procedure stdlib_cla_herpvgrw +#endif +#:if WITH_QP + module procedure stdlib_wla_herpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_herpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,info,lda,ldaf,ipiv(*) + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + real(dp), intent(out) :: work(*) + end function zla_herpvgrw +#else + module procedure stdlib_zla_herpvgrw +#endif + end interface la_herpvgrw + + + + !> LA_LIN_BERR: computes componentwise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the componentwise absolute value of the matrix + !> or vector Z. + interface la_lin_berr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cla_lin_berr( n, nz, nrhs, res, ayb, berr ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,nz,nrhs + real(sp), intent(in) :: ayb(n,nrhs) + real(sp), intent(out) :: berr(nrhs) + complex(sp), intent(in) :: res(n,nrhs) + end subroutine cla_lin_berr +#else + module procedure stdlib_cla_lin_berr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dla_lin_berr ( n, nz, nrhs, res, ayb, berr ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,nz,nrhs + real(dp), intent(in) :: ayb(n,nrhs),res(n,nrhs) + real(dp), intent(out) :: berr(nrhs) + end subroutine dla_lin_berr +#else + module procedure stdlib_dla_lin_berr +#endif +#:if WITH_QP + module procedure stdlib_qla_lin_berr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sla_lin_berr( n, nz, nrhs, res, ayb, berr ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,nz,nrhs + real(sp), intent(in) :: ayb(n,nrhs),res(n,nrhs) + real(sp), intent(out) :: berr(nrhs) + end subroutine sla_lin_berr +#else + module procedure stdlib_sla_lin_berr +#endif +#:if WITH_QP + module procedure stdlib_wla_lin_berr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zla_lin_berr( n, nz, nrhs, res, ayb, berr ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,nz,nrhs + real(dp), intent(in) :: ayb(n,nrhs) + real(dp), intent(out) :: berr(nrhs) + complex(dp), intent(in) :: res(n,nrhs) + end subroutine zla_lin_berr +#else + module procedure stdlib_zla_lin_berr +#endif + end interface la_lin_berr + + + + !> LA_PORCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + interface la_porcond +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dla_porcond( uplo, n, a, lda, af, ldaf,cmode, c, info, work,& + iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,ldaf,cmode + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: a(lda,*),af(ldaf,*),c(*) + real(dp), intent(out) :: work(*) + end function dla_porcond +#else + module procedure stdlib_dla_porcond +#endif +#:if WITH_QP + module procedure stdlib_qla_porcond +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function sla_porcond( uplo, n, a, lda, af, ldaf, cmode, c,info, work, & + iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,ldaf,cmode + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: a(lda,*),af(ldaf,*),c(*) + real(sp), intent(out) :: work(*) + end function sla_porcond +#else + module procedure stdlib_sla_porcond +#endif + end interface la_porcond + + + + !> LA_PORCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector + interface la_porcond_c +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_porcond_c( uplo, n, a, lda, af, ldaf, c, capply,info, work, & + rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + end function cla_porcond_c +#else + module procedure stdlib_cla_porcond_c +#endif +#:if WITH_QP + module procedure stdlib_wla_porcond_c +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_porcond_c( uplo, n, a, lda, af,ldaf, c, capply, info,work, & + rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + end function zla_porcond_c +#else + module procedure stdlib_zla_porcond_c +#endif + end interface la_porcond_c + + + + !> LA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + interface la_porpvgrw +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_porpvgrw( uplo, ncols, a, lda, af, ldaf, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols,lda,ldaf + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + real(sp), intent(out) :: work(*) + end function cla_porpvgrw +#else + module procedure stdlib_cla_porpvgrw +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dla_porpvgrw( uplo, ncols, a, lda, af,ldaf, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols,lda,ldaf + real(dp), intent(in) :: a(lda,*),af(ldaf,*) + real(dp), intent(out) :: work(*) + end function dla_porpvgrw +#else + module procedure stdlib_dla_porpvgrw +#endif +#:if WITH_QP + module procedure stdlib_qla_porpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function sla_porpvgrw( uplo, ncols, a, lda, af, ldaf, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols,lda,ldaf + real(sp), intent(in) :: a(lda,*),af(ldaf,*) + real(sp), intent(out) :: work(*) + end function sla_porpvgrw +#else + module procedure stdlib_sla_porpvgrw +#endif +#:if WITH_QP + module procedure stdlib_wla_porpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_porpvgrw( uplo, ncols, a, lda, af,ldaf, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols,lda,ldaf + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + real(dp), intent(out) :: work(*) + end function zla_porpvgrw +#else + module procedure stdlib_zla_porpvgrw +#endif + end interface la_porpvgrw + + + + !> LA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + interface la_syamv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,lda,n,uplo + complex(sp), intent(in) :: a(lda,*),x(*) + real(sp), intent(inout) :: y(*) + end subroutine cla_syamv +#else + module procedure stdlib_cla_syamv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,n,uplo + real(dp), intent(inout) :: y(*) + end subroutine dla_syamv +#else + module procedure stdlib_dla_syamv +#endif +#:if WITH_QP + module procedure stdlib_qla_syamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + integer(ilp), intent(in) :: incx,incy,lda,n,uplo + real(sp), intent(inout) :: y(*) + end subroutine sla_syamv +#else + module procedure stdlib_sla_syamv +#endif +#:if WITH_QP + module procedure stdlib_wla_syamv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta + integer(ilp), intent(in) :: incx,incy,lda,n,uplo + complex(dp), intent(in) :: a(lda,*),x(*) + real(dp), intent(inout) :: y(*) + end subroutine zla_syamv +#else + module procedure stdlib_zla_syamv +#endif + end interface la_syamv + + + + !> LA_SYRCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + interface la_syrcond +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dla_syrcond( uplo, n, a, lda, af, ldaf,ipiv, cmode, c, info, & + work,iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,ldaf,cmode,ipiv(*) + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: a(lda,*),af(ldaf,*),c(*) + real(dp), intent(out) :: work(*) + end function dla_syrcond +#else + module procedure stdlib_dla_syrcond +#endif +#:if WITH_QP + module procedure stdlib_qla_syrcond +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function sla_syrcond( uplo, n, a, lda, af, ldaf, ipiv, cmode,c, info, & + work, iwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,ldaf,cmode,ipiv(*) + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: a(lda,*),af(ldaf,*),c(*) + real(sp), intent(out) :: work(*) + end function sla_syrcond +#else + module procedure stdlib_sla_syrcond +#endif + end interface la_syrcond + + + + !> LA_SYRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + interface la_syrcond_c +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_syrcond_c( uplo, n, a, lda, af, ldaf, ipiv, c,capply, info, & + work, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf,ipiv(*) + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + end function cla_syrcond_c +#else + module procedure stdlib_cla_syrcond_c +#endif +#:if WITH_QP + module procedure stdlib_wla_syrcond_c +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_syrcond_c( uplo, n, a, lda, af,ldaf, ipiv, c, capply,info, & + work, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n,lda,ldaf,ipiv(*) + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + end function zla_syrcond_c +#else + module procedure stdlib_zla_syrcond_c +#endif + end interface la_syrcond_c + + + + !> LA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + interface la_syrpvgrw +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function cla_syrpvgrw( uplo, n, info, a, lda, af, ldaf, ipiv,work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,info,lda,ldaf,ipiv(*) + complex(sp), intent(in) :: a(lda,*),af(ldaf,*) + real(sp), intent(out) :: work(*) + end function cla_syrpvgrw +#else + module procedure stdlib_cla_syrpvgrw +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dla_syrpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,info,lda,ldaf,ipiv(*) + real(dp), intent(in) :: a(lda,*),af(ldaf,*) + real(dp), intent(out) :: work(*) + end function dla_syrpvgrw +#else + module procedure stdlib_dla_syrpvgrw +#endif +#:if WITH_QP + module procedure stdlib_qla_syrpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function sla_syrpvgrw( uplo, n, info, a, lda, af, ldaf, ipiv,work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,info,lda,ldaf,ipiv(*) + real(sp), intent(in) :: a(lda,*),af(ldaf,*) + real(sp), intent(out) :: work(*) + end function sla_syrpvgrw +#else + module procedure stdlib_sla_syrpvgrw +#endif +#:if WITH_QP + module procedure stdlib_wla_syrpvgrw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zla_syrpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,info,lda,ldaf,ipiv(*) + complex(dp), intent(in) :: a(lda,*),af(ldaf,*) + real(dp), intent(out) :: work(*) + end function zla_syrpvgrw +#else + module procedure stdlib_zla_syrpvgrw +#endif + end interface la_syrpvgrw + + + + !> LA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + interface la_wwaddw +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cla_wwaddw( n, x, y, w ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: x(*),y(*) + complex(sp), intent(in) :: w(*) + end subroutine cla_wwaddw +#else + module procedure stdlib_cla_wwaddw +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dla_wwaddw( n, x, y, w ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: x(*),y(*) + real(dp), intent(in) :: w(*) + end subroutine dla_wwaddw +#else + module procedure stdlib_dla_wwaddw +#endif +#:if WITH_QP + module procedure stdlib_qla_wwaddw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sla_wwaddw( n, x, y, w ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: x(*),y(*) + real(sp), intent(in) :: w(*) + end subroutine sla_wwaddw +#else + module procedure stdlib_sla_wwaddw +#endif +#:if WITH_QP + module procedure stdlib_wla_wwaddw +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zla_wwaddw( n, x, y, w ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: x(*),y(*) + complex(dp), intent(in) :: w(*) + end subroutine zla_wwaddw +#else + module procedure stdlib_zla_wwaddw +#endif + end interface la_wwaddw + + + + !> LABAD: takes as input the values computed by DLAMCH for underflow and + !> overflow, and returns the square root of each of these values if the + !> log of LARGE is sufficiently large. This subroutine is intended to + !> identify machines with a large exponent range, such as the Crays, and + !> redefine the underflow and overflow limits to be the square roots of + !> the values computed by DLAMCH. This subroutine is needed because + !> DLAMCH does not compensate for poor arithmetic in the upper half of + !> the exponent range, as is found on a Cray. + interface labad +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlabad( small, large ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(inout) :: large,small + end subroutine dlabad +#else + module procedure stdlib_dlabad +#endif +#:if WITH_QP + module procedure stdlib_qlabad +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slabad( small, large ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(inout) :: large,small + end subroutine slabad +#else + module procedure stdlib_slabad +#endif + end interface labad + + + + !> LABRD: reduces the first NB rows and columns of a complex general + !> m by n matrix A to upper or lower real bidiagonal form by a unitary + !> transformation Q**H * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by CGEBRD + interface labrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldx,ldy,m,n,nb + real(sp), intent(out) :: d(*),e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: taup(*),tauq(*),x(ldx,*),y(ldy,*) + end subroutine clabrd +#else + module procedure stdlib_clabrd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldx,ldy,m,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*),e(*),taup(*),tauq(*),x(ldx,*),y(ldy,*) + end subroutine dlabrd +#else + module procedure stdlib_dlabrd +#endif +#:if WITH_QP + module procedure stdlib_qlabrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldx,ldy,m,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*),e(*),taup(*),tauq(*),x(ldx,*),y(ldy,*) + end subroutine slabrd +#else + module procedure stdlib_slabrd +#endif +#:if WITH_QP + module procedure stdlib_wlabrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldx,ldy,m,n,nb + real(dp), intent(out) :: d(*),e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: taup(*),tauq(*),x(ldx,*),y(ldy,*) + end subroutine zlabrd +#else + module procedure stdlib_zlabrd +#endif + end interface labrd + + + + !> LACGV: conjugates a complex vector of length N. + interface lacgv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clacgv( n, x, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + complex(sp), intent(inout) :: x(*) + end subroutine clacgv +#else + module procedure stdlib_clacgv +#endif +#:if WITH_QP + module procedure stdlib_wlacgv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlacgv( n, x, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + complex(dp), intent(inout) :: x(*) + end subroutine zlacgv +#else + module procedure stdlib_zlacgv +#endif + end interface lacgv + + + + !> LACON: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + interface lacon +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine clacon( n, v, x, est, kase ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: est + complex(sp), intent(out) :: v(n) + complex(sp), intent(inout) :: x(n) + end subroutine clacon +#else + module procedure stdlib_clacon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dlacon( n, v, x, isgn, est, kase ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: est,x(*) + integer(ilp), intent(out) :: isgn(*) + real(dp), intent(out) :: v(*) + end subroutine dlacon +#else + module procedure stdlib_dlacon +#endif +#:if WITH_QP + module procedure stdlib_qlacon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine slacon( n, v, x, isgn, est, kase ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: est,x(*) + integer(ilp), intent(out) :: isgn(*) + real(sp), intent(out) :: v(*) + end subroutine slacon +#else + module procedure stdlib_slacon +#endif +#:if WITH_QP + module procedure stdlib_wlacon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zlacon( n, v, x, est, kase ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: est + complex(dp), intent(out) :: v(n) + complex(dp), intent(inout) :: x(n) + end subroutine zlacon +#else + module procedure stdlib_zlacon +#endif + end interface lacon + + + + !> LACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + interface lacpy +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clacpy( uplo, m, n, a, lda, b, ldb ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldb,m,n + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: b(ldb,*) + end subroutine clacpy +#else + module procedure stdlib_clacpy +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlacpy( uplo, m, n, a, lda, b, ldb ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldb,m,n + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: b(ldb,*) + end subroutine dlacpy +#else + module procedure stdlib_dlacpy +#endif +#:if WITH_QP + module procedure stdlib_qlacpy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slacpy( uplo, m, n, a, lda, b, ldb ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldb,m,n + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: b(ldb,*) + end subroutine slacpy +#else + module procedure stdlib_slacpy +#endif +#:if WITH_QP + module procedure stdlib_wlacpy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlacpy( uplo, m, n, a, lda, b, ldb ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldb,m,n + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: b(ldb,*) + end subroutine zlacpy +#else + module procedure stdlib_zlacpy +#endif + end interface lacpy + + + + !> LACRM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by N and complex; B is N by N and real; + !> C is M by N and complex. + interface lacrm +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clacrm( m, n, a, lda, b, ldb, c, ldc, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + real(sp), intent(in) :: b(ldb,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: c(ldc,*) + end subroutine clacrm +#else + module procedure stdlib_clacrm +#endif +#:if WITH_QP + module procedure stdlib_wlacrm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlacrm( m, n, a, lda, b, ldb, c, ldc, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + real(dp), intent(in) :: b(ldb,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: c(ldc,*) + end subroutine zlacrm +#else + module procedure stdlib_zlacrm +#endif + end interface lacrm + + + + !> LACRT: performs the operation + !> ( c s )( x ) ==> ( x ) + !> ( -s c )( y ) ( y ) + !> where c and s are complex and the vectors x and y are complex. + interface lacrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clacrt( n, cx, incx, cy, incy, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(in) :: c,s + complex(sp), intent(inout) :: cx(*),cy(*) + end subroutine clacrt +#else + module procedure stdlib_clacrt +#endif +#:if WITH_QP + module procedure stdlib_wlacrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlacrt( n, cx, incx, cy, incy, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(in) :: c,s + complex(dp), intent(inout) :: cx(*),cy(*) + end subroutine zlacrt +#else + module procedure stdlib_zlacrt +#endif + end interface lacrt + + + + !> LADIV_F: := X / Y, where X and Y are complex. The computation of X / Y + !> will not overflow on an intermediary step unless the results + !> overflows. + interface ladiv_f +#ifdef STDLIB_EXTERNAL_LAPACK + pure complex(sp) function cladiv( x, y ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: x,y + end function cladiv +#else + module procedure stdlib_cladiv +#endif +#:if WITH_QP + module procedure stdlib_wladiv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure complex(dp) function zladiv( x, y ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: x,y + end function zladiv +#else + module procedure stdlib_zladiv +#endif + end interface ladiv_f + + + + !> LADIV_S: performs complex division in real arithmetic + !> a + i*b + !> p + i*q = --------- + !> c + i*d + !> The algorithm is due to Michael Baudin and Robert L. Smith + !> and can be found in the paper + !> "A Robust Complex Division in Scilab" + interface ladiv_s +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dladiv( a, b, c, d, p, q ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: a,b,c,d + real(dp), intent(out) :: p,q + end subroutine dladiv +#else + module procedure stdlib_dladiv +#endif +#:if WITH_QP + module procedure stdlib_qladiv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sladiv( a, b, c, d, p, q ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: a,b,c,d + real(sp), intent(out) :: p,q + end subroutine sladiv +#else + module procedure stdlib_sladiv +#endif + end interface ladiv_s + + + + interface ladiv1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dladiv1( a, b, c, d, p, q ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(inout) :: a + real(dp), intent(in) :: b,c,d + real(dp), intent(out) :: p,q + end subroutine dladiv1 +#else + module procedure stdlib_dladiv1 +#endif +#:if WITH_QP + module procedure stdlib_qladiv1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sladiv1( a, b, c, d, p, q ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(inout) :: a + real(sp), intent(in) :: b,c,d + real(sp), intent(out) :: p,q + end subroutine sladiv1 +#else + module procedure stdlib_sladiv1 +#endif + end interface ladiv1 + + + + interface ladiv2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function dladiv2( a, b, c, d, r, t ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: a,b,c,d,r,t + end function dladiv2 +#else + module procedure stdlib_dladiv2 +#endif +#:if WITH_QP + module procedure stdlib_qladiv2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function sladiv2( a, b, c, d, r, t ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: a,b,c,d,r,t + end function sladiv2 +#else + module procedure stdlib_sladiv2 +#endif + end interface ladiv2 + + + + !> LAEBZ: contains the iteration loops which compute and use the + !> function N(w), which is the count of eigenvalues of a symmetric + !> tridiagonal matrix T less than or equal to its argument w. It + !> performs a choice of two types of loops: + !> IJOB=1, followed by + !> IJOB=2: It takes as input a list of intervals and returns a list of + !> sufficiently small intervals whose union contains the same + !> eigenvalues as the union of the original intervals. + !> The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. + !> The output interval (AB(j,1),AB(j,2)] will contain + !> eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. + !> IJOB=3: It performs a binary search in each input interval + !> (AB(j,1),AB(j,2)] for a point w(j) such that + !> N(w(j))=NVAL(j), and uses C(j) as the starting point of + !> the search. If such a w(j) is found, then on output + !> AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output + !> (AB(j,1),AB(j,2)] will be a small interval containing the + !> point where N(w) jumps through NVAL(j), unless that point + !> lies outside the initial interval. + !> Note that the intervals are in all cases half-open intervals, + !> i.e., of the form (a,b] , which includes b but not a . + !> To avoid underflow, the matrix should be scaled so that its largest + !> element is no greater than overflow**(1/2) * underflow**(1/4) + !> in absolute value. To assure the most accurate computation + !> of small eigenvalues, the matrix should be scaled to be + !> not much smaller than that, either. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966 + !> Note: the arguments are, in general, *not* checked for unreasonable + !> values. + interface laebz +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaebz( ijob, nitmax, n, mmax, minp, nbmin, abstol,reltol, pivmin, & + d, e, e2, nval, ab, c, mout,nab, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ijob,minp,mmax,n,nbmin,nitmax + integer(ilp), intent(out) :: info,mout,iwork(*) + real(dp), intent(in) :: abstol,pivmin,reltol,d(*),e(*),e2(*) + integer(ilp), intent(inout) :: nab(mmax,*),nval(*) + real(dp), intent(inout) :: ab(mmax,*),c(*) + real(dp), intent(out) :: work(*) + end subroutine dlaebz +#else + module procedure stdlib_dlaebz +#endif +#:if WITH_QP + module procedure stdlib_qlaebz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaebz( ijob, nitmax, n, mmax, minp, nbmin, abstol,reltol, pivmin, & + d, e, e2, nval, ab, c, mout,nab, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ijob,minp,mmax,n,nbmin,nitmax + integer(ilp), intent(out) :: info,mout,iwork(*) + real(sp), intent(in) :: abstol,pivmin,reltol,d(*),e(*),e2(*) + integer(ilp), intent(inout) :: nab(mmax,*),nval(*) + real(sp), intent(inout) :: ab(mmax,*),c(*) + real(sp), intent(out) :: work(*) + end subroutine slaebz +#else + module procedure stdlib_slaebz +#endif + end interface laebz + + + + !> Using the divide and conquer method, LAED0: computes all eigenvalues + !> of a symmetric tridiagonal matrix which is one diagonal block of + !> those from reducing a dense or band Hermitian matrix and + !> corresponding eigenvectors of the dense or band matrix. + interface laed0 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claed0( qsiz, n, d, e, q, ldq, qstore, ldqs, rwork,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldq,ldqs,n,qsiz + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: q(ldq,*) + complex(sp), intent(out) :: qstore(ldqs,*) + end subroutine claed0 +#else + module procedure stdlib_claed0 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed0( icompq, qsiz, n, d, e, q, ldq, qstore, ldqs,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldq,ldqs,n,qsiz + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(inout) :: d(*),e(*),q(ldq,*) + real(dp), intent(out) :: qstore(ldqs,*),work(*) + end subroutine dlaed0 +#else + module procedure stdlib_dlaed0 +#endif +#:if WITH_QP + module procedure stdlib_qlaed0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed0( icompq, qsiz, n, d, e, q, ldq, qstore, ldqs,work, iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldq,ldqs,n,qsiz + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(inout) :: d(*),e(*),q(ldq,*) + real(sp), intent(out) :: qstore(ldqs,*),work(*) + end subroutine slaed0 +#else + module procedure stdlib_slaed0 +#endif +#:if WITH_QP + module procedure stdlib_wlaed0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaed0( qsiz, n, d, e, q, ldq, qstore, ldqs, rwork,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldq,ldqs,n,qsiz + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: q(ldq,*) + complex(dp), intent(out) :: qstore(ldqs,*) + end subroutine zlaed0 +#else + module procedure stdlib_zlaed0 +#endif + end interface laed0 + + + + !> LAED1: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles + !> the case in which eigenvalues only or eigenvalues and eigenvectors + !> of a full symmetric matrix (which was reduced to tridiagonal form) + !> are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**T*u, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by DLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + interface laed1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed1( n, d, q, ldq, indxq, rho, cutpnt, work, iwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: cutpnt,ldq,n + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(inout) :: rho,d(*),q(ldq,*) + integer(ilp), intent(inout) :: indxq(*) + real(dp), intent(out) :: work(*) + end subroutine dlaed1 +#else + module procedure stdlib_dlaed1 +#endif +#:if WITH_QP + module procedure stdlib_qlaed1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed1( n, d, q, ldq, indxq, rho, cutpnt, work, iwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: cutpnt,ldq,n + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(inout) :: rho,d(*),q(ldq,*) + integer(ilp), intent(inout) :: indxq(*) + real(sp), intent(out) :: work(*) + end subroutine slaed1 +#else + module procedure stdlib_slaed1 +#endif + end interface laed1 + + + + !> This subroutine computes the I-th updated eigenvalue of a symmetric + !> rank-one modification to a diagonal matrix whose elements are + !> given in the array d, and that + !> D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + interface laed4 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed4( n, i, d, z, delta, rho, dlam, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i,n + integer(ilp), intent(out) :: info + real(dp), intent(out) :: dlam,delta(*) + real(dp), intent(in) :: rho,d(*),z(*) + end subroutine dlaed4 +#else + module procedure stdlib_dlaed4 +#endif +#:if WITH_QP + module procedure stdlib_qlaed4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed4( n, i, d, z, delta, rho, dlam, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i,n + integer(ilp), intent(out) :: info + real(sp), intent(out) :: dlam,delta(*) + real(sp), intent(in) :: rho,d(*),z(*) + end subroutine slaed4 +#else + module procedure stdlib_slaed4 +#endif + end interface laed4 + + + + !> This subroutine computes the I-th eigenvalue of a symmetric rank-one + !> modification of a 2-by-2 diagonal matrix + !> diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal elements in the array D are assumed to satisfy + !> D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + interface laed5 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed5( i, d, z, delta, rho, dlam ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i + real(dp), intent(out) :: dlam,delta(2) + real(dp), intent(in) :: rho,d(2),z(2) + end subroutine dlaed5 +#else + module procedure stdlib_dlaed5 +#endif +#:if WITH_QP + module procedure stdlib_qlaed5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed5( i, d, z, delta, rho, dlam ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i + real(sp), intent(out) :: dlam,delta(2) + real(sp), intent(in) :: rho,d(2),z(2) + end subroutine slaed5 +#else + module procedure stdlib_slaed5 +#endif + end interface laed5 + + + + !> LAED6: computes the positive or negative root (closest to the origin) + !> of + !> z(1) z(2) z(3) + !> f(x) = rho + --------- + ---------- + --------- + !> d(1)-x d(2)-x d(3)-x + !> It is assumed that + !> if ORGATI = .true. the root is between d(2) and d(3); + !> otherwise it is between d(1) and d(2) + !> This routine will be called by DLAED4 when necessary. In most cases, + !> the root sought is the smallest in magnitude, though it might not be + !> in some extremely rare situations. + interface laed6 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed6( kniter, orgati, rho, d, z, finit, tau, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: orgati + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kniter + real(dp), intent(in) :: finit,rho,d(3),z(3) + real(dp), intent(out) :: tau + end subroutine dlaed6 +#else + module procedure stdlib_dlaed6 +#endif +#:if WITH_QP + module procedure stdlib_qlaed6 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed6( kniter, orgati, rho, d, z, finit, tau, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: orgati + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kniter + real(sp), intent(in) :: finit,rho,d(3),z(3) + real(sp), intent(out) :: tau + end subroutine slaed6 +#else + module procedure stdlib_slaed6 +#endif + end interface laed6 + + + + !> LAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense or banded + !> Hermitian matrix that has been reduced to tridiagonal form. + !> T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) + !> where Z = Q**Hu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine SLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine SLAED4 (as called by SLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + interface laed7 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claed7( n, cutpnt, qsiz, tlvls, curlvl, curpbm, d, q,ldq, rho, & + indxq, qstore, qptr, prmptr, perm,givptr, givcol, givnum, work, rwork, iwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: curlvl,curpbm,cutpnt,ldq,n,qsiz,tlvls + integer(ilp), intent(out) :: info,indxq(*),iwork(*) + real(sp), intent(inout) :: rho,d(*),givnum(2,*),qstore(*) + integer(ilp), intent(inout) :: givcol(2,*),givptr(*),perm(*),prmptr(*),qptr(*) + + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: q(ldq,*) + complex(sp), intent(out) :: work(*) + end subroutine claed7 +#else + module procedure stdlib_claed7 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed7( icompq, n, qsiz, tlvls, curlvl, curpbm, d, q,ldq, indxq, & + rho, cutpnt, qstore, qptr, prmptr,perm, givptr, givcol, givnum, work, iwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: curlvl,curpbm,cutpnt,icompq,ldq,n,qsiz,& + tlvls + integer(ilp), intent(out) :: info,indxq(*),iwork(*) + real(dp), intent(inout) :: rho,d(*),givnum(2,*),q(ldq,*),qstore(*) + integer(ilp), intent(inout) :: givcol(2,*),givptr(*),perm(*),prmptr(*),qptr(*) + + real(dp), intent(out) :: work(*) + end subroutine dlaed7 +#else + module procedure stdlib_dlaed7 +#endif +#:if WITH_QP + module procedure stdlib_qlaed7 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed7( icompq, n, qsiz, tlvls, curlvl, curpbm, d, q,ldq, indxq, & + rho, cutpnt, qstore, qptr, prmptr,perm, givptr, givcol, givnum, work, iwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: curlvl,curpbm,cutpnt,icompq,ldq,n,qsiz,& + tlvls + integer(ilp), intent(out) :: info,indxq(*),iwork(*) + real(sp), intent(inout) :: rho,d(*),givnum(2,*),q(ldq,*),qstore(*) + integer(ilp), intent(inout) :: givcol(2,*),givptr(*),perm(*),prmptr(*),qptr(*) + + real(sp), intent(out) :: work(*) + end subroutine slaed7 +#else + module procedure stdlib_slaed7 +#endif +#:if WITH_QP + module procedure stdlib_wlaed7 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaed7( n, cutpnt, qsiz, tlvls, curlvl, curpbm, d, q,ldq, rho, & + indxq, qstore, qptr, prmptr, perm,givptr, givcol, givnum, work, rwork, iwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: curlvl,curpbm,cutpnt,ldq,n,qsiz,tlvls + integer(ilp), intent(out) :: info,indxq(*),iwork(*) + real(dp), intent(inout) :: rho,d(*),givnum(2,*),qstore(*) + integer(ilp), intent(inout) :: givcol(2,*),givptr(*),perm(*),prmptr(*),qptr(*) + + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: q(ldq,*) + complex(dp), intent(out) :: work(*) + end subroutine zlaed7 +#else + module procedure stdlib_zlaed7 +#endif + end interface laed7 + + + + !> LAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + interface laed8 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claed8( k, n, qsiz, q, ldq, d, rho, cutpnt, z, dlamda,q2, ldq2, w, & + indxp, indx, indxq, perm, givptr,givcol, givnum, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: cutpnt,ldq,ldq2,n,qsiz + integer(ilp), intent(out) :: givptr,info,k,givcol(2,*),indx(*),indxp(*),perm(& + *) + real(sp), intent(inout) :: rho,d(*),z(*) + integer(ilp), intent(inout) :: indxq(*) + real(sp), intent(out) :: dlamda(*),givnum(2,*),w(*) + complex(sp), intent(inout) :: q(ldq,*) + complex(sp), intent(out) :: q2(ldq2,*) + end subroutine claed8 +#else + module procedure stdlib_claed8 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed8( icompq, k, n, qsiz, d, q, ldq, indxq, rho,cutpnt, z, & + dlamda, q2, ldq2, w, perm, givptr,givcol, givnum, indxp, indx, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: cutpnt,icompq,ldq,ldq2,n,qsiz + integer(ilp), intent(out) :: givptr,info,k,givcol(2,*),indx(*),indxp(*),perm(& + *) + real(dp), intent(inout) :: rho,d(*),q(ldq,*),z(*) + integer(ilp), intent(inout) :: indxq(*) + real(dp), intent(out) :: dlamda(*),givnum(2,*),q2(ldq2,*),w(*) + end subroutine dlaed8 +#else + module procedure stdlib_dlaed8 +#endif +#:if WITH_QP + module procedure stdlib_qlaed8 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed8( icompq, k, n, qsiz, d, q, ldq, indxq, rho,cutpnt, z, & + dlamda, q2, ldq2, w, perm, givptr,givcol, givnum, indxp, indx, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: cutpnt,icompq,ldq,ldq2,n,qsiz + integer(ilp), intent(out) :: givptr,info,k,givcol(2,*),indx(*),indxp(*),perm(& + *) + real(sp), intent(inout) :: rho,d(*),q(ldq,*),z(*) + integer(ilp), intent(inout) :: indxq(*) + real(sp), intent(out) :: dlamda(*),givnum(2,*),q2(ldq2,*),w(*) + end subroutine slaed8 +#else + module procedure stdlib_slaed8 +#endif +#:if WITH_QP + module procedure stdlib_wlaed8 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaed8( k, n, qsiz, q, ldq, d, rho, cutpnt, z, dlamda,q2, ldq2, w, & + indxp, indx, indxq, perm, givptr,givcol, givnum, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: cutpnt,ldq,ldq2,n,qsiz + integer(ilp), intent(out) :: givptr,info,k,givcol(2,*),indx(*),indxp(*),perm(& + *) + real(dp), intent(inout) :: rho,d(*),z(*) + integer(ilp), intent(inout) :: indxq(*) + real(dp), intent(out) :: dlamda(*),givnum(2,*),w(*) + complex(dp), intent(inout) :: q(ldq,*) + complex(dp), intent(out) :: q2(ldq2,*) + end subroutine zlaed8 +#else + module procedure stdlib_zlaed8 +#endif + end interface laed8 + + + + !> LAED9: finds the roots of the secular equation, as defined by the + !> values in D, Z, and RHO, between KSTART and KSTOP. It makes the + !> appropriate calls to DLAED4 and then stores the new matrix of + !> eigenvectors for use in calculating the next level of Z vectors. + interface laed9 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaed9( k, kstart, kstop, n, d, q, ldq, rho, dlamda, w,s, lds, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,kstart,kstop,ldq,lds,n + real(dp), intent(in) :: rho + real(dp), intent(out) :: d(*),q(ldq,*),s(lds,*) + real(dp), intent(inout) :: dlamda(*),w(*) + end subroutine dlaed9 +#else + module procedure stdlib_dlaed9 +#endif +#:if WITH_QP + module procedure stdlib_qlaed9 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaed9( k, kstart, kstop, n, d, q, ldq, rho, dlamda, w,s, lds, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,kstart,kstop,ldq,lds,n + real(sp), intent(in) :: rho + real(sp), intent(out) :: d(*),q(ldq,*),s(lds,*) + real(sp), intent(inout) :: dlamda(*),w(*) + end subroutine slaed9 +#else + module procedure stdlib_slaed9 +#endif + end interface laed9 + + + + !> LAEDA: computes the Z vector corresponding to the merge step in the + !> CURLVLth step of the merge process with TLVLS steps for the CURPBMth + !> problem. + interface laeda +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaeda( n, tlvls, curlvl, curpbm, prmptr, perm, givptr,givcol, & + givnum, q, qptr, z, ztemp, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: curlvl,curpbm,n,tlvls,givcol(2,*),givptr(*),perm(& + *),prmptr(*),qptr(*) + integer(ilp), intent(out) :: info + real(dp), intent(in) :: givnum(2,*),q(*) + real(dp), intent(out) :: z(*),ztemp(*) + end subroutine dlaeda +#else + module procedure stdlib_dlaeda +#endif +#:if WITH_QP + module procedure stdlib_qlaeda +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaeda( n, tlvls, curlvl, curpbm, prmptr, perm, givptr,givcol, & + givnum, q, qptr, z, ztemp, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: curlvl,curpbm,n,tlvls,givcol(2,*),givptr(*),perm(& + *),prmptr(*),qptr(*) + integer(ilp), intent(out) :: info + real(sp), intent(in) :: givnum(2,*),q(*) + real(sp), intent(out) :: z(*),ztemp(*) + end subroutine slaeda +#else + module procedure stdlib_slaeda +#endif + end interface laeda + + + + !> LAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue W of a complex upper Hessenberg + !> matrix H. + interface laein +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claein( rightv, noinit, n, h, ldh, w, v, b, ldb, rwork,eps3, & + smlnum, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: noinit,rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldh,n + real(sp), intent(in) :: eps3,smlnum + complex(sp), intent(in) :: w,h(ldh,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: b(ldb,*) + complex(sp), intent(inout) :: v(*) + end subroutine claein +#else + module procedure stdlib_claein +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaein( rightv, noinit, n, h, ldh, wr, wi, vr, vi, b,ldb, work, & + eps3, smlnum, bignum, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: noinit,rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldh,n + real(dp), intent(in) :: bignum,eps3,smlnum,wi,wr,h(ldh,*) + real(dp), intent(out) :: b(ldb,*),work(*) + real(dp), intent(inout) :: vi(*),vr(*) + end subroutine dlaein +#else + module procedure stdlib_dlaein +#endif +#:if WITH_QP + module procedure stdlib_qlaein +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaein( rightv, noinit, n, h, ldh, wr, wi, vr, vi, b,ldb, work, & + eps3, smlnum, bignum, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: noinit,rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldh,n + real(sp), intent(in) :: bignum,eps3,smlnum,wi,wr,h(ldh,*) + real(sp), intent(out) :: b(ldb,*),work(*) + real(sp), intent(inout) :: vi(*),vr(*) + end subroutine slaein +#else + module procedure stdlib_slaein +#endif +#:if WITH_QP + module procedure stdlib_wlaein +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaein( rightv, noinit, n, h, ldh, w, v, b, ldb, rwork,eps3, & + smlnum, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: noinit,rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldh,n + real(dp), intent(in) :: eps3,smlnum + complex(dp), intent(in) :: w,h(ldh,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: b(ldb,*) + complex(dp), intent(inout) :: v(*) + end subroutine zlaein +#else + module procedure stdlib_zlaein +#endif + end interface laein + + + + !> LAESY: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> ( ( A, B );( B, C ) ) + !> provided the norm of the matrix of eigenvectors is larger than + !> some threshold value. + !> RT1 is the eigenvalue of larger absolute value, and RT2 of + !> smaller absolute value. If the eigenvectors are computed, then + !> on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence + !> [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] + !> [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] + interface laesy +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claesy( a, b, c, rt1, rt2, evscal, cs1, sn1 ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: a,b,c + complex(sp), intent(out) :: cs1,evscal,rt1,rt2,sn1 + end subroutine claesy +#else + module procedure stdlib_claesy +#endif +#:if WITH_QP + module procedure stdlib_wlaesy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaesy( a, b, c, rt1, rt2, evscal, cs1, sn1 ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: a,b,c + complex(dp), intent(out) :: cs1,evscal,rt1,rt2,sn1 + end subroutine zlaesy +#else + module procedure stdlib_zlaesy +#endif + end interface laesy + + + + !> LAEXC: swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in + !> an upper quasi-triangular matrix T by an orthogonal similarity + !> transformation. + !> T must be in Schur canonical form, that is, block upper triangular + !> with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block + !> has its diagonal elements equal and its off-diagonal elements of + !> opposite sign. + interface laexc +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dlaexc( wantq, n, t, ldt, q, ldq, j1, n1, n2, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1,ldq,ldt,n,n1,n2 + real(dp), intent(inout) :: q(ldq,*),t(ldt,*) + real(dp), intent(out) :: work(*) + end subroutine dlaexc +#else + module procedure stdlib_dlaexc +#endif +#:if WITH_QP + module procedure stdlib_qlaexc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine slaexc( wantq, n, t, ldt, q, ldq, j1, n1, n2, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1,ldq,ldt,n,n1,n2 + real(sp), intent(inout) :: q(ldq,*),t(ldt,*) + real(sp), intent(out) :: work(*) + end subroutine slaexc +#else + module procedure stdlib_slaexc +#endif + end interface laexc + + + + !> LAGTF: factorizes the matrix (T - lambda*I), where T is an n by n + !> tridiagonal matrix and lambda is a scalar, as + !> T - lambda*I = PLU, + !> where P is a permutation matrix, L is a unit lower tridiagonal matrix + !> with at most one non-zero sub-diagonal elements per column and U is + !> an upper triangular matrix with at most two non-zero super-diagonal + !> elements per column. + !> The factorization is obtained by Gaussian elimination with partial + !> pivoting and implicit row scaling. + !> The parameter LAMBDA is included in the routine so that LAGTF may + !> be used, in conjunction with DLAGTS, to obtain eigenvectors of T by + !> inverse iteration. + interface lagtf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlagtf( n, a, lambda, b, c, tol, d, in, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,in(*) + integer(ilp), intent(in) :: n + real(dp), intent(in) :: lambda,tol + real(dp), intent(inout) :: a(*),b(*),c(*) + real(dp), intent(out) :: d(*) + end subroutine dlagtf +#else + module procedure stdlib_dlagtf +#endif +#:if WITH_QP + module procedure stdlib_qlagtf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slagtf( n, a, lambda, b, c, tol, d, in, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,in(*) + integer(ilp), intent(in) :: n + real(sp), intent(in) :: lambda,tol + real(sp), intent(inout) :: a(*),b(*),c(*) + real(sp), intent(out) :: d(*) + end subroutine slagtf +#else + module procedure stdlib_slagtf +#endif + end interface lagtf + + + + !> LAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + interface lagtm +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(in) :: alpha,beta + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: d(*),dl(*),du(*),x(ldx,*) + end subroutine clagtm +#else + module procedure stdlib_clagtm +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(in) :: alpha,beta,d(*),dl(*),du(*),x(ldx,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dlagtm +#else + module procedure stdlib_dlagtm +#endif +#:if WITH_QP + module procedure stdlib_qlagtm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(in) :: alpha,beta,d(*),dl(*),du(*),x(ldx,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine slagtm +#else + module procedure stdlib_slagtm +#endif +#:if WITH_QP + module procedure stdlib_wlagtm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(in) :: alpha,beta + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: d(*),dl(*),du(*),x(ldx,*) + end subroutine zlagtm +#else + module procedure stdlib_zlagtm +#endif + end interface lagtm + + + + !> LAGTS: may be used to solve one of the systems of equations + !> (T - lambda*I)*x = y or (T - lambda*I)**T*x = y, + !> where T is an n by n tridiagonal matrix, for x, following the + !> factorization of (T - lambda*I) as + !> (T - lambda*I) = P*L*U , + !> by routine DLAGTF. The choice of equation to be solved is + !> controlled by the argument JOB, and in each case there is an option + !> to perturb zero or very small diagonal elements of U, this option + !> being intended for use in applications such as inverse iteration. + interface lagts +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlagts( job, n, a, b, c, d, in, y, tol, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: job,n,in(*) + real(dp), intent(inout) :: tol,y(*) + real(dp), intent(in) :: a(*),b(*),c(*),d(*) + end subroutine dlagts +#else + module procedure stdlib_dlagts +#endif +#:if WITH_QP + module procedure stdlib_qlagts +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slagts( job, n, a, b, c, d, in, y, tol, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: job,n,in(*) + real(sp), intent(inout) :: tol,y(*) + real(sp), intent(in) :: a(*),b(*),c(*),d(*) + end subroutine slagts +#else + module procedure stdlib_slagts +#endif + end interface lagts + + + + !> LAHEF: computes a partial factorization of a complex Hermitian + !> matrix A using the Bunch-Kaufman diagonal pivoting method. The + !> partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> LAHEF is an auxiliary routine called by CHETRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + interface lahef +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clahef( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + end subroutine clahef +#else + module procedure stdlib_clahef +#endif +#:if WITH_QP + module procedure stdlib_wlahef +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlahef( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + end subroutine zlahef +#else + module procedure stdlib_zlahef +#endif + end interface lahef + + + + !> LAHEF_AA: factorizes a panel of a complex hermitian matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + interface lahef_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clahef_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: m,nb,j1,lda,ldh + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*),h(ldh,*) + complex(sp), intent(out) :: work(*) + end subroutine clahef_aa +#else + module procedure stdlib_clahef_aa +#endif +#:if WITH_QP + module procedure stdlib_wlahef_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlahef_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: m,nb,j1,lda,ldh + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*),h(ldh,*) + complex(dp), intent(out) :: work(*) + end subroutine zlahef_aa +#else + module procedure stdlib_zlahef_aa +#endif + end interface lahef_aa + + + + !> LAHEF_RK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> LAHEF_RK is an auxiliary routine called by CHETRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + interface lahef_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clahef_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*),e(*) + end subroutine clahef_rk +#else + module procedure stdlib_clahef_rk +#endif +#:if WITH_QP + module procedure stdlib_wlahef_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlahef_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*),e(*) + end subroutine zlahef_rk +#else + module procedure stdlib_zlahef_rk +#endif + end interface lahef_rk + + + + !> LAHEF_ROOK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting + !> method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> LAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + interface lahef_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clahef_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + end subroutine clahef_rook +#else + module procedure stdlib_clahef_rook +#endif +#:if WITH_QP + module procedure stdlib_wlahef_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlahef_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + end subroutine zlahef_rook +#else + module procedure stdlib_zlahef_rook +#endif + end interface lahef_rook + + + + !> LAHQR: is an auxiliary routine called by CHSEQR to update the + !> eigenvalues and Schur decomposition already computed by CHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + interface lahqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + complex(sp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(sp), intent(out) :: w(*) + end subroutine clahqr +#else + module procedure stdlib_clahqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, & + ldz, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + real(dp), intent(inout) :: h(ldh,*),z(ldz,*) + real(dp), intent(out) :: wi(*),wr(*) + end subroutine dlahqr +#else + module procedure stdlib_dlahqr +#endif +#:if WITH_QP + module procedure stdlib_qlahqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, & + ldz, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + real(sp), intent(inout) :: h(ldh,*),z(ldz,*) + real(sp), intent(out) :: wi(*),wr(*) + end subroutine slahqr +#else + module procedure stdlib_slahqr +#endif +#:if WITH_QP + module procedure stdlib_wlahqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + complex(dp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(dp), intent(out) :: w(*) + end subroutine zlahqr +#else + module procedure stdlib_zlahqr +#endif + end interface lahqr + + + + !> LAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then LAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**H gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**H and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] + !> [ conjg(gamma) ] + !> where alpha = x**H*w. + interface laic1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claic1( job, j, x, sest, w, gamma, sestpr, s, c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: j,job + real(sp), intent(in) :: sest + real(sp), intent(out) :: sestpr + complex(sp), intent(out) :: c,s + complex(sp), intent(in) :: gamma,w(j),x(j) + end subroutine claic1 +#else + module procedure stdlib_claic1 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: j,job + real(dp), intent(out) :: c,s,sestpr + real(dp), intent(in) :: gamma,sest,w(j),x(j) + end subroutine dlaic1 +#else + module procedure stdlib_dlaic1 +#endif +#:if WITH_QP + module procedure stdlib_qlaic1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: j,job + real(sp), intent(out) :: c,s,sestpr + real(sp), intent(in) :: gamma,sest,w(j),x(j) + end subroutine slaic1 +#else + module procedure stdlib_slaic1 +#endif +#:if WITH_QP + module procedure stdlib_wlaic1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: j,job + real(dp), intent(in) :: sest + real(dp), intent(out) :: sestpr + complex(dp), intent(out) :: c,s + complex(dp), intent(in) :: gamma,w(j),x(j) + end subroutine zlaic1 +#else + module procedure stdlib_zlaic1 +#endif + end interface laic1 + + + + !> This routine is not for general use. It exists solely to avoid + !> over-optimization in DISNAN. + !> LAISNAN: checks for NaNs by comparing its two arguments for + !> inequality. NaN is the only floating-point value where NaN != NaN + !> returns .TRUE. To check for NaNs, pass the same variable as both + !> arguments. + !> A compiler must assume that the two arguments are + !> not the same variable, and the test will not be optimized away. + !> Interprocedural or whole-program optimization may delete this + !> test. The ISNAN functions will be replaced by the correct + !> Fortran 03 intrinsic once the intrinsic is widely available. + interface laisnan +#ifdef STDLIB_EXTERNAL_LAPACK + pure logical(lk) function dlaisnan( din1, din2 ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: din1,din2 + end function dlaisnan +#else + module procedure stdlib_dlaisnan +#endif +#:if WITH_QP + module procedure stdlib_qlaisnan +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure logical(lk) function slaisnan( sin1, sin2 ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: sin1,sin2 + end function slaisnan +#else + module procedure stdlib_slaisnan +#endif + end interface laisnan + + + + !> LALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + interface lals0 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: givptr,icompq,k,ldb,ldbx,ldgcol,ldgnum,nl,nr,nrhs,& + sqre,givcol(ldgcol,*),perm(*) + integer(ilp), intent(out) :: info + real(sp), intent(in) :: c,s,difl(*),difr(ldgnum,*),givnum(ldgnum,*),poles(& + ldgnum,*),z(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: bx(ldbx,*) + end subroutine clals0 +#else + module procedure stdlib_clals0 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: givptr,icompq,k,ldb,ldbx,ldgcol,ldgnum,nl,nr,nrhs,& + sqre,givcol(ldgcol,*),perm(*) + integer(ilp), intent(out) :: info + real(dp), intent(in) :: c,s,difl(*),difr(ldgnum,*),givnum(ldgnum,*),poles(& + ldgnum,*),z(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(out) :: bx(ldbx,*),work(*) + end subroutine dlals0 +#else + module procedure stdlib_dlals0 +#endif +#:if WITH_QP + module procedure stdlib_qlals0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: givptr,icompq,k,ldb,ldbx,ldgcol,ldgnum,nl,nr,nrhs,& + sqre,givcol(ldgcol,*),perm(*) + integer(ilp), intent(out) :: info + real(sp), intent(in) :: c,s,difl(*),difr(ldgnum,*),givnum(ldgnum,*),poles(& + ldgnum,*),z(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(out) :: bx(ldbx,*),work(*) + end subroutine slals0 +#else + module procedure stdlib_slals0 +#endif +#:if WITH_QP + module procedure stdlib_wlals0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: givptr,icompq,k,ldb,ldbx,ldgcol,ldgnum,nl,nr,nrhs,& + sqre,givcol(ldgcol,*),perm(*) + integer(ilp), intent(out) :: info + real(dp), intent(in) :: c,s,difl(*),difr(ldgnum,*),givnum(ldgnum,*),poles(& + ldgnum,*),z(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: bx(ldbx,*) + end subroutine zlals0 +#else + module procedure stdlib_zlals0 +#endif + end interface lals0 + + + + !> LALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, LALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, LALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by LALSA. + interface lalsa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, & + difl, difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, rwork,iwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldb,ldbx,ldgcol,ldu,n,nrhs,smlsiz,givcol(& + ldgcol,*),givptr(*),k(*),perm(ldgcol,*) + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: c(*),difl(ldu,*),difr(ldu,*),givnum(ldu,*),poles(ldu,& + *),s(*),u(ldu,*),vt(ldu,*),z(ldu,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: bx(ldbx,*) + end subroutine clalsa +#else + module procedure stdlib_clalsa +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, & + difl, difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldb,ldbx,ldgcol,ldu,n,nrhs,smlsiz,givcol(& + ldgcol,*),givptr(*),k(*),perm(ldgcol,*) + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(out) :: bx(ldbx,*),work(*) + real(dp), intent(in) :: c(*),difl(ldu,*),difr(ldu,*),givnum(ldu,*),poles(ldu,& + *),s(*),u(ldu,*),vt(ldu,*),z(ldu,*) + end subroutine dlalsa +#else + module procedure stdlib_dlalsa +#endif +#:if WITH_QP + module procedure stdlib_qlalsa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, & + difl, difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldb,ldbx,ldgcol,ldu,n,nrhs,smlsiz,givcol(& + ldgcol,*),givptr(*),k(*),perm(ldgcol,*) + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(out) :: bx(ldbx,*),work(*) + real(sp), intent(in) :: c(*),difl(ldu,*),difr(ldu,*),givnum(ldu,*),poles(ldu,& + *),s(*),u(ldu,*),vt(ldu,*),z(ldu,*) + end subroutine slalsa +#else + module procedure stdlib_slalsa +#endif +#:if WITH_QP + module procedure stdlib_wlalsa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, & + difl, difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, rwork,iwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldb,ldbx,ldgcol,ldu,n,nrhs,smlsiz,givcol(& + ldgcol,*),givptr(*),k(*),perm(ldgcol,*) + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: c(*),difl(ldu,*),difr(ldu,*),givnum(ldu,*),poles(ldu,& + *),s(*),u(ldu,*),vt(ldu,*),z(ldu,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: bx(ldbx,*) + end subroutine zlalsa +#else + module procedure stdlib_zlalsa +#endif + end interface lalsa + + + + !> LALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface lalsd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, & + rwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: ldb,n,nrhs,smlsiz + real(sp), intent(in) :: rcond + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine clalsd +#else + module procedure stdlib_clalsd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: ldb,n,nrhs,smlsiz + real(dp), intent(in) :: rcond + real(dp), intent(inout) :: b(ldb,*),d(*),e(*) + real(dp), intent(out) :: work(*) + end subroutine dlalsd +#else + module procedure stdlib_dlalsd +#endif +#:if WITH_QP + module procedure stdlib_qlalsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: ldb,n,nrhs,smlsiz + real(sp), intent(in) :: rcond + real(sp), intent(inout) :: b(ldb,*),d(*),e(*) + real(sp), intent(out) :: work(*) + end subroutine slalsd +#else + module procedure stdlib_slalsd +#endif +#:if WITH_QP + module procedure stdlib_wlalsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, & + rwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,rank,iwork(*) + integer(ilp), intent(in) :: ldb,n,nrhs,smlsiz + real(dp), intent(in) :: rcond + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zlalsd +#else + module procedure stdlib_zlalsd +#endif + end interface lalsd + + + + !> LAMRG: will create a permutation list which will merge the elements + !> of A (which is composed of two independently sorted sets) into a + !> single set which is sorted in ascending order. + interface lamrg +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlamrg( n1, n2, a, dtrd1, dtrd2, index ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: dtrd1,dtrd2,n1,n2 + integer(ilp), intent(out) :: index(*) + real(dp), intent(in) :: a(*) + end subroutine dlamrg +#else + module procedure stdlib_dlamrg +#endif +#:if WITH_QP + module procedure stdlib_qlamrg +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slamrg( n1, n2, a, strd1, strd2, index ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n1,n2,strd1,strd2 + integer(ilp), intent(out) :: index(*) + real(sp), intent(in) :: a(*) + end subroutine slamrg +#else + module procedure stdlib_slamrg +#endif + end interface lamrg + + + + !> LAMSWLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (CLASWLQ) + interface lamswlq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + complex(sp), intent(in) :: a(lda,*),t(ldt,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: c(ldc,*) + end subroutine clamswlq +#else + module procedure stdlib_clamswlq +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + real(dp), intent(in) :: a(lda,*),t(ldt,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: c(ldc,*) + end subroutine dlamswlq +#else + module procedure stdlib_dlamswlq +#endif +#:if WITH_QP + module procedure stdlib_qlamswlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + real(sp), intent(in) :: a(lda,*),t(ldt,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: c(ldc,*) + end subroutine slamswlq +#else + module procedure stdlib_slamswlq +#endif +#:if WITH_QP + module procedure stdlib_wlamswlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + complex(dp), intent(in) :: a(lda,*),t(ldt,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zlamswlq +#else + module procedure stdlib_zlamswlq +#endif + end interface lamswlq + + + + !> LAMTSQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (CLATSQR) + interface lamtsqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + complex(sp), intent(in) :: a(lda,*),t(ldt,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: c(ldc,*) + end subroutine clamtsqr +#else + module procedure stdlib_clamtsqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + real(dp), intent(in) :: a(lda,*),t(ldt,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: c(ldc,*) + end subroutine dlamtsqr +#else + module procedure stdlib_dlamtsqr +#endif +#:if WITH_QP + module procedure stdlib_qlamtsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + real(sp), intent(in) :: a(lda,*),t(ldt,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: c(ldc,*) + end subroutine slamtsqr +#else + module procedure stdlib_slamtsqr +#endif +#:if WITH_QP + module procedure stdlib_wlamtsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,k,mb,nb,ldt,lwork,ldc + complex(dp), intent(in) :: a(lda,*),t(ldt,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: c(ldc,*) + end subroutine zlamtsqr +#else + module procedure stdlib_zlamtsqr +#endif + end interface lamtsqr + + + + !> LANEG: computes the Sturm count, the number of negative pivots + !> encountered while factoring tridiagonal T - sigma I = L D L^T. + !> This implementation works directly on the factors without forming + !> the tridiagonal matrix T. The Sturm count is also the number of + !> eigenvalues of T less than sigma. + !> This routine is called from DLARRB. + !> The current routine does not use the PIVMIN parameter but rather + !> requires IEEE-754 propagation of Infinities and NaNs. This + !> routine also has no input range restrictions but does require + !> default exception handling such that x/0 produces Inf when x is + !> non-zero, and Inf/Inf produces NaN. For more information, see: + !> Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in + !> Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on + !> Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 + !> (Tech report version in LAWN 172 with the same title.) + interface laneg +#ifdef STDLIB_EXTERNAL_LAPACK + pure integer(ilp) function dlaneg( n, d, lld, sigma, pivmin, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,r + real(dp), intent(in) :: pivmin,sigma,d(*),lld(*) + end function dlaneg +#else + module procedure stdlib_dlaneg +#endif +#:if WITH_QP + module procedure stdlib_qlaneg +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure integer(ilp) function slaneg( n, d, lld, sigma, pivmin, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n,r + real(sp), intent(in) :: pivmin,sigma,d(*),lld(*) + end function slaneg +#else + module procedure stdlib_slaneg +#endif + end interface laneg + + + + !> LANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + interface langb +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clangb( norm, n, kl, ku, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: kl,ku,ldab,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + end function clangb +#else + module procedure stdlib_clangb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlangb( norm, n, kl, ku, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: kl,ku,ldab,n + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + end function dlangb +#else + module procedure stdlib_dlangb +#endif +#:if WITH_QP + module procedure stdlib_qlangb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slangb( norm, n, kl, ku, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: kl,ku,ldab,n + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + end function slangb +#else + module procedure stdlib_slangb +#endif +#:if WITH_QP + module procedure stdlib_wlangb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlangb( norm, n, kl, ku, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: kl,ku,ldab,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + end function zlangb +#else + module procedure stdlib_zlangb +#endif + end interface langb + + + + !> LANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex matrix A. + interface lange +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clange( norm, m, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + end function clange +#else + module procedure stdlib_clange +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlange( norm, m, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + end function dlange +#else + module procedure stdlib_dlange +#endif +#:if WITH_QP + module procedure stdlib_qlange +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slange( norm, m, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + end function slange +#else + module procedure stdlib_slange +#endif +#:if WITH_QP + module procedure stdlib_wlange +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlange( norm, m, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + end function zlange +#else + module procedure stdlib_zlange +#endif + end interface lange + + + + !> LANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex tridiagonal matrix A. + interface langt +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function clangt( norm, n, dl, d, du ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + complex(sp), intent(in) :: d(*),dl(*),du(*) + end function clangt +#else + module procedure stdlib_clangt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function dlangt( norm, n, dl, d, du ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + real(dp), intent(in) :: d(*),dl(*),du(*) + end function dlangt +#else + module procedure stdlib_dlangt +#endif +#:if WITH_QP + module procedure stdlib_qlangt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function slangt( norm, n, dl, d, du ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + real(sp), intent(in) :: d(*),dl(*),du(*) + end function slangt +#else + module procedure stdlib_slangt +#endif +#:if WITH_QP + module procedure stdlib_wlangt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function zlangt( norm, n, dl, d, du ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + complex(dp), intent(in) :: d(*),dl(*),du(*) + end function zlangt +#else + module procedure stdlib_zlangt +#endif + end interface langt + + + + !> LANHB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n hermitian band matrix A, with k super-diagonals. + interface lanhb +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clanhb( norm, uplo, n, k, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + end function clanhb +#else + module procedure stdlib_clanhb +#endif +#:if WITH_QP + module procedure stdlib_wlanhb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlanhb( norm, uplo, n, k, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + end function zlanhb +#else + module procedure stdlib_zlanhb +#endif + end interface lanhb + + + + !> LANHE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A. + interface lanhe +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clanhe( norm, uplo, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + end function clanhe +#else + module procedure stdlib_clanhe +#endif +#:if WITH_QP + module procedure stdlib_wlanhe +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlanhe( norm, uplo, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + end function zlanhe +#else + module procedure stdlib_zlanhe +#endif + end interface lanhe + + + + !> LANHF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian matrix A in RFP format. + interface lanhf +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clanhf( norm, transr, uplo, n, a, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,transr,uplo + integer(ilp), intent(in) :: n + real(sp), intent(out) :: work(0:*) + complex(sp), intent(in) :: a(0:*) + end function clanhf +#else + module procedure stdlib_clanhf +#endif +#:if WITH_QP + module procedure stdlib_wlanhf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlanhf( norm, transr, uplo, n, a, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,transr,uplo + integer(ilp), intent(in) :: n + real(dp), intent(out) :: work(0:*) + complex(dp), intent(in) :: a(0:*) + end function zlanhf +#else + module procedure stdlib_zlanhf +#endif + end interface lanhf + + + + !> LANHP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A, supplied in packed form. + interface lanhp +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clanhp( norm, uplo, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ap(*) + end function clanhp +#else + module procedure stdlib_clanhp +#endif +#:if WITH_QP + module procedure stdlib_wlanhp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlanhp( norm, uplo, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ap(*) + end function zlanhp +#else + module procedure stdlib_zlanhp +#endif + end interface lanhp + + + + !> LANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + interface lanhs +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clanhs( norm, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + end function clanhs +#else + module procedure stdlib_clanhs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlanhs( norm, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + end function dlanhs +#else + module procedure stdlib_dlanhs +#endif +#:if WITH_QP + module procedure stdlib_qlanhs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slanhs( norm, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + end function slanhs +#else + module procedure stdlib_slanhs +#endif +#:if WITH_QP + module procedure stdlib_wlanhs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlanhs( norm, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + end function zlanhs +#else + module procedure stdlib_zlanhs +#endif + end interface lanhs + + + + !> LANHT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian tridiagonal matrix A. + interface lanht +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function clanht( norm, n, d, e ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + real(sp), intent(in) :: d(*) + complex(sp), intent(in) :: e(*) + end function clanht +#else + module procedure stdlib_clanht +#endif +#:if WITH_QP + module procedure stdlib_wlanht +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function zlanht( norm, n, d, e ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + real(dp), intent(in) :: d(*) + complex(dp), intent(in) :: e(*) + end function zlanht +#else + module procedure stdlib_zlanht +#endif + end interface lanht + + + + !> LANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + interface lansb +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clansb( norm, uplo, n, k, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + end function clansb +#else + module procedure stdlib_clansb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlansb( norm, uplo, n, k, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + end function dlansb +#else + module procedure stdlib_dlansb +#endif +#:if WITH_QP + module procedure stdlib_qlansb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slansb( norm, uplo, n, k, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + end function slansb +#else + module procedure stdlib_slansb +#endif +#:if WITH_QP + module procedure stdlib_wlansb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlansb( norm, uplo, n, k, ab, ldab,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + end function zlansb +#else + module procedure stdlib_zlansb +#endif + end interface lansb + + + + !> LANSF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A in RFP format. + interface lansf +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlansf( norm, transr, uplo, n, a, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,transr,uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: a(0:*) + real(dp), intent(out) :: work(0:*) + end function dlansf +#else + module procedure stdlib_dlansf +#endif +#:if WITH_QP + module procedure stdlib_qlansf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slansf( norm, transr, uplo, n, a, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,transr,uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: a(0:*) + real(sp), intent(out) :: work(0:*) + end function slansf +#else + module procedure stdlib_slansf +#endif + end interface lansf + + + + !> LANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A, supplied in packed form. + interface lansp +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clansp( norm, uplo, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ap(*) + end function clansp +#else + module procedure stdlib_clansp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlansp( norm, uplo, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: ap(*) + real(dp), intent(out) :: work(*) + end function dlansp +#else + module procedure stdlib_dlansp +#endif +#:if WITH_QP + module procedure stdlib_qlansp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slansp( norm, uplo, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: ap(*) + real(sp), intent(out) :: work(*) + end function slansp +#else + module procedure stdlib_slansp +#endif +#:if WITH_QP + module procedure stdlib_wlansp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlansp( norm, uplo, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ap(*) + end function zlansp +#else + module procedure stdlib_zlansp +#endif + end interface lansp + + + + !> LANST: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric tridiagonal matrix A. + interface lanst +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(dp) function dlanst( norm, n, d, e ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + real(dp), intent(in) :: d(*),e(*) + end function dlanst +#else + module procedure stdlib_dlanst +#endif +#:if WITH_QP + module procedure stdlib_qlanst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure real(sp) function slanst( norm, n, d, e ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm + integer(ilp), intent(in) :: n + real(sp), intent(in) :: d(*),e(*) + end function slanst +#else + module procedure stdlib_slanst +#endif + end interface lanst + + + + !> LANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A. + interface lansy +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clansy( norm, uplo, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + end function clansy +#else + module procedure stdlib_clansy +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlansy( norm, uplo, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + end function dlansy +#else + module procedure stdlib_dlansy +#endif +#:if WITH_QP + module procedure stdlib_qlansy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slansy( norm, uplo, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + end function slansy +#else + module procedure stdlib_slansy +#endif +#:if WITH_QP + module procedure stdlib_wlansy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlansy( norm, uplo, n, a, lda, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: norm,uplo + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + end function zlansy +#else + module procedure stdlib_zlansy +#endif + end interface lansy + + + + !> LANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + interface lantb +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clantb( norm, uplo, diag, n, k, ab,ldab, work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + end function clantb +#else + module procedure stdlib_clantb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlantb( norm, uplo, diag, n, k, ab,ldab, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + end function dlantb +#else + module procedure stdlib_dlantb +#endif +#:if WITH_QP + module procedure stdlib_qlantb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slantb( norm, uplo, diag, n, k, ab,ldab, work ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + end function slantb +#else + module procedure stdlib_slantb +#endif +#:if WITH_QP + module procedure stdlib_wlantb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlantb( norm, uplo, diag, n, k, ab,ldab, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: k,ldab,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + end function zlantb +#else + module procedure stdlib_zlantb +#endif + end interface lantb + + + + !> LANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + interface lantp +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clantp( norm, uplo, diag, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ap(*) + end function clantp +#else + module procedure stdlib_clantp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlantp( norm, uplo, diag, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: ap(*) + real(dp), intent(out) :: work(*) + end function dlantp +#else + module procedure stdlib_dlantp +#endif +#:if WITH_QP + module procedure stdlib_qlantp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slantp( norm, uplo, diag, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: ap(*) + real(sp), intent(out) :: work(*) + end function slantp +#else + module procedure stdlib_slantp +#endif +#:if WITH_QP + module procedure stdlib_wlantp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlantp( norm, uplo, diag, n, ap, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ap(*) + end function zlantp +#else + module procedure stdlib_zlantp +#endif + end interface lantp + + + + !> LANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + interface lantr +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function clantr( norm, uplo, diag, m, n, a, lda,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + end function clantr +#else + module procedure stdlib_clantr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function dlantr( norm, uplo, diag, m, n, a, lda,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + end function dlantr +#else + module procedure stdlib_dlantr +#endif +#:if WITH_QP + module procedure stdlib_qlantr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(sp) function slantr( norm, uplo, diag, m, n, a, lda,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + end function slantr +#else + module procedure stdlib_slantr +#endif +#:if WITH_QP + module procedure stdlib_wlantr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + real(dp) function zlantr( norm, uplo, diag, m, n, a, lda,work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + end function zlantr +#else + module procedure stdlib_zlantr +#endif + end interface lantr + + + + !> LAORHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine DORHR_COL. In DORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine LAORHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + interface laorhr_col_getrfnp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaorhr_col_getrfnp( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*) + end subroutine dlaorhr_col_getrfnp +#else + module procedure stdlib_dlaorhr_col_getrfnp +#endif +#:if WITH_QP + module procedure stdlib_qlaorhr_col_getrfnp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaorhr_col_getrfnp( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*) + end subroutine slaorhr_col_getrfnp +#else + module procedure stdlib_slaorhr_col_getrfnp +#endif + end interface laorhr_col_getrfnp + + + + !> LAORHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine DORHR_COL. In DORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> LAORHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine DLAORHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, LAORHR_COL_GETRFNP2 + !> is self-sufficient and can be used without DLAORHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + interface laorhr_col_getrfnp2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine dlaorhr_col_getrfnp2( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*) + end subroutine dlaorhr_col_getrfnp2 +#else + module procedure stdlib_dlaorhr_col_getrfnp2 +#endif +#:if WITH_QP + module procedure stdlib_qlaorhr_col_getrfnp2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine slaorhr_col_getrfnp2( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*) + end subroutine slaorhr_col_getrfnp2 +#else + module procedure stdlib_slaorhr_col_getrfnp2 +#endif + end interface laorhr_col_getrfnp2 + + + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + interface lapll +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clapll( n, x, incx, y, incy, ssmin ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(out) :: ssmin + complex(sp), intent(inout) :: x(*),y(*) + end subroutine clapll +#else + module procedure stdlib_clapll +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlapll( n, x, incx, y, incy, ssmin ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(out) :: ssmin + real(dp), intent(inout) :: x(*),y(*) + end subroutine dlapll +#else + module procedure stdlib_dlapll +#endif +#:if WITH_QP + module procedure stdlib_qlapll +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slapll( n, x, incx, y, incy, ssmin ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(out) :: ssmin + real(sp), intent(inout) :: x(*),y(*) + end subroutine slapll +#else + module procedure stdlib_slapll +#endif +#:if WITH_QP + module procedure stdlib_wlapll +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlapll( n, x, incx, y, incy, ssmin ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(out) :: ssmin + complex(dp), intent(inout) :: x(*),y(*) + end subroutine zlapll +#else + module procedure stdlib_zlapll +#endif + end interface lapll + + + + !> LAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + interface lapmr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clapmr( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine clapmr +#else + module procedure stdlib_clapmr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlapmr( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dlapmr +#else + module procedure stdlib_dlapmr +#endif +#:if WITH_QP + module procedure stdlib_qlapmr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slapmr( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine slapmr +#else + module procedure stdlib_slapmr +#endif +#:if WITH_QP + module procedure stdlib_wlapmr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlapmr( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zlapmr +#else + module procedure stdlib_zlapmr +#endif + end interface lapmr + + + + !> LAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + interface lapmt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clapmt( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine clapmt +#else + module procedure stdlib_clapmt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlapmt( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dlapmt +#else + module procedure stdlib_dlapmt +#endif +#:if WITH_QP + module procedure stdlib_qlapmt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slapmt( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine slapmt +#else + module procedure stdlib_slapmt +#endif +#:if WITH_QP + module procedure stdlib_wlapmt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlapmt( forwrd, m, n, x, ldx, k ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx,m,n + integer(ilp), intent(inout) :: k(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zlapmt +#else + module procedure stdlib_zlapmt +#endif + end interface lapmt + + + + !> LAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + interface laqgb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + complex(sp), intent(inout) :: ab(ldab,*) + end subroutine claqgb +#else + module procedure stdlib_claqgb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + real(dp), intent(inout) :: ab(ldab,*) + end subroutine dlaqgb +#else + module procedure stdlib_dlaqgb +#endif +#:if WITH_QP + module procedure stdlib_qlaqgb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(sp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + real(sp), intent(inout) :: ab(ldab,*) + end subroutine slaqgb +#else + module procedure stdlib_slaqgb +#endif +#:if WITH_QP + module procedure stdlib_wlaqgb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: kl,ku,ldab,m,n + real(dp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + complex(dp), intent(inout) :: ab(ldab,*) + end subroutine zlaqgb +#else + module procedure stdlib_zlaqgb +#endif + end interface laqgb + + + + !> LAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + interface laqge +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine claqge +#else + module procedure stdlib_claqge +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + real(dp), intent(inout) :: a(lda,*) + end subroutine dlaqge +#else + module procedure stdlib_dlaqge +#endif +#:if WITH_QP + module procedure stdlib_qlaqge +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + real(sp), intent(inout) :: a(lda,*) + end subroutine slaqge +#else + module procedure stdlib_slaqge +#endif +#:if WITH_QP + module procedure stdlib_wlaqge +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(in) :: amax,colcnd,rowcnd,c(*),r(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlaqge +#else + module procedure stdlib_zlaqge +#endif + end interface laqge + + + + !> LAQHB: equilibrates an Hermitian band matrix A using the scaling + !> factors in the vector S. + interface laqhb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqhb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(in) :: amax,scond + real(sp), intent(out) :: s(*) + complex(sp), intent(inout) :: ab(ldab,*) + end subroutine claqhb +#else + module procedure stdlib_claqhb +#endif +#:if WITH_QP + module procedure stdlib_wlaqhb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqhb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(in) :: amax,scond + real(dp), intent(out) :: s(*) + complex(dp), intent(inout) :: ab(ldab,*) + end subroutine zlaqhb +#else + module procedure stdlib_zlaqhb +#endif + end interface laqhb + + + + !> LAQHE: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + interface laqhe +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqhe( uplo, n, a, lda, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: amax,scond,s(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine claqhe +#else + module procedure stdlib_claqhe +#endif +#:if WITH_QP + module procedure stdlib_wlaqhe +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqhe( uplo, n, a, lda, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: amax,scond,s(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlaqhe +#else + module procedure stdlib_zlaqhe +#endif + end interface laqhe + + + + !> LAQHP: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + interface laqhp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqhp( uplo, n, ap, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: amax,scond,s(*) + complex(sp), intent(inout) :: ap(*) + end subroutine claqhp +#else + module procedure stdlib_claqhp +#endif +#:if WITH_QP + module procedure stdlib_wlaqhp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqhp( uplo, n, ap, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: amax,scond,s(*) + complex(dp), intent(inout) :: ap(*) + end subroutine zlaqhp +#else + module procedure stdlib_zlaqhp +#endif + end interface laqhp + + + + !> LAQPS: computes a step of QR factorization with column pivoting + !> of a complex M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + interface laqps +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda,ldf,m,n,nb,offset + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: vn1(*),vn2(*) + complex(sp), intent(inout) :: a(lda,*),auxv(*),f(ldf,*) + complex(sp), intent(out) :: tau(*) + end subroutine claqps +#else + module procedure stdlib_claqps +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda,ldf,m,n,nb,offset + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: a(lda,*),auxv(*),f(ldf,*),vn1(*),vn2(*) + real(dp), intent(out) :: tau(*) + end subroutine dlaqps +#else + module procedure stdlib_dlaqps +#endif +#:if WITH_QP + module procedure stdlib_qlaqps +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda,ldf,m,n,nb,offset + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: a(lda,*),auxv(*),f(ldf,*),vn1(*),vn2(*) + real(sp), intent(out) :: tau(*) + end subroutine slaqps +#else + module procedure stdlib_slaqps +#endif +#:if WITH_QP + module procedure stdlib_wlaqps +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda,ldf,m,n,nb,offset + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: vn1(*),vn2(*) + complex(dp), intent(inout) :: a(lda,*),auxv(*),f(ldf,*) + complex(dp), intent(out) :: tau(*) + end subroutine zlaqps +#else + module procedure stdlib_zlaqps +#endif + end interface laqps + + + + !> LAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + interface laqr0 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + complex(sp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(sp), intent(out) :: w(*),work(*) + end subroutine claqr0 +#else + module procedure stdlib_claqr0 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + real(dp), intent(inout) :: h(ldh,*),z(ldz,*) + real(dp), intent(out) :: wi(*),work(*),wr(*) + end subroutine dlaqr0 +#else + module procedure stdlib_dlaqr0 +#endif +#:if WITH_QP + module procedure stdlib_qlaqr0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine slaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + real(sp), intent(inout) :: h(ldh,*),z(ldz,*) + real(sp), intent(out) :: wi(*),work(*),wr(*) + end subroutine slaqr0 +#else + module procedure stdlib_slaqr0 +#endif +#:if WITH_QP + module procedure stdlib_wlaqr0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + complex(dp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(dp), intent(out) :: w(*),work(*) + end subroutine zlaqr0 +#else + module procedure stdlib_zlaqr0 +#endif + end interface laqr0 + + + + !> Given a 2-by-2 or 3-by-3 matrix H, LAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - s1*I)*(H - s2*I) + !> scaling to avoid overflows and most underflows. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + interface laqr1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqr1( n, h, ldh, s1, s2, v ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(sp), intent(in) :: s1,s2,h(ldh,*) + integer(ilp), intent(in) :: ldh,n + complex(sp), intent(out) :: v(*) + end subroutine claqr1 +#else + module procedure stdlib_claqr1 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: si1,si2,sr1,sr2,h(ldh,*) + integer(ilp), intent(in) :: ldh,n + real(dp), intent(out) :: v(*) + end subroutine dlaqr1 +#else + module procedure stdlib_dlaqr1 +#endif +#:if WITH_QP + module procedure stdlib_qlaqr1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: si1,si2,sr1,sr2,h(ldh,*) + integer(ilp), intent(in) :: ldh,n + real(sp), intent(out) :: v(*) + end subroutine slaqr1 +#else + module procedure stdlib_slaqr1 +#endif +#:if WITH_QP + module procedure stdlib_wlaqr1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqr1( n, h, ldh, s1, s2, v ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + complex(dp), intent(in) :: s1,s2,h(ldh,*) + integer(ilp), intent(in) :: ldh,n + complex(dp), intent(out) :: v(*) + end subroutine zlaqr1 +#else + module procedure stdlib_zlaqr1 +#endif + end interface laqr1 + + + + !> LAQR4: implements one level of recursion for CLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by CLAQR0 and, for large enough + !> deflation window size, it may be called by CLAQR3. This + !> subroutine is identical to CLAQR0 except that it calls CLAQR2 + !> instead of CLAQR3. + !> LAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + interface laqr4 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + complex(sp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(sp), intent(out) :: w(*),work(*) + end subroutine claqr4 +#else + module procedure stdlib_claqr4 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + real(dp), intent(inout) :: h(ldh,*),z(ldz,*) + real(dp), intent(out) :: wi(*),work(*),wr(*) + end subroutine dlaqr4 +#else + module procedure stdlib_dlaqr4 +#endif +#:if WITH_QP + module procedure stdlib_qlaqr4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine slaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + real(sp), intent(inout) :: h(ldh,*),z(ldz,*) + real(sp), intent(out) :: wi(*),work(*),wr(*) + end subroutine slaqr4 +#else + module procedure stdlib_slaqr4 +#endif +#:if WITH_QP + module procedure stdlib_wlaqr4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, & + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ihiz,ilo,iloz,ldh,ldz,lwork,n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt,wantz + complex(dp), intent(inout) :: h(ldh,*),z(ldz,*) + complex(dp), intent(out) :: w(*),work(*) + end subroutine zlaqr4 +#else + module procedure stdlib_zlaqr4 +#endif + end interface laqr4 + + + + !> LAQR5: called by CLAQR0 performs a + !> single small-bulge multi-shift QR sweep. + interface laqr5 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, & + iloz, ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihiz,iloz,kacc22,kbot,ktop,ldh,ldu,ldv,ldwh,ldwv,& + ldz,n,nh,nshfts,nv + logical(lk), intent(in) :: wantt,wantz + complex(sp), intent(inout) :: h(ldh,*),s(*),z(ldz,*) + complex(sp), intent(out) :: u(ldu,*),v(ldv,*),wh(ldwh,*),wv(ldwv,*) + end subroutine claqr5 +#else + module procedure stdlib_claqr5 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh,& + iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihiz,iloz,kacc22,kbot,ktop,ldh,ldu,ldv,ldwh,ldwv,& + ldz,n,nh,nshfts,nv + logical(lk), intent(in) :: wantt,wantz + real(dp), intent(inout) :: h(ldh,*),si(*),sr(*),z(ldz,*) + real(dp), intent(out) :: u(ldu,*),v(ldv,*),wh(ldwh,*),wv(ldwv,*) + end subroutine dlaqr5 +#else + module procedure stdlib_dlaqr5 +#endif +#:if WITH_QP + module procedure stdlib_qlaqr5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh,& + iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihiz,iloz,kacc22,kbot,ktop,ldh,ldu,ldv,ldwh,ldwv,& + ldz,n,nh,nshfts,nv + logical(lk), intent(in) :: wantt,wantz + real(sp), intent(inout) :: h(ldh,*),si(*),sr(*),z(ldz,*) + real(sp), intent(out) :: u(ldu,*),v(ldv,*),wh(ldwh,*),wv(ldwv,*) + end subroutine slaqr5 +#else + module procedure stdlib_slaqr5 +#endif +#:if WITH_QP + module procedure stdlib_wlaqr5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, & + iloz, ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihiz,iloz,kacc22,kbot,ktop,ldh,ldu,ldv,ldwh,ldwv,& + ldz,n,nh,nshfts,nv + logical(lk), intent(in) :: wantt,wantz + complex(dp), intent(inout) :: h(ldh,*),s(*),z(ldz,*) + complex(dp), intent(out) :: u(ldu,*),v(ldv,*),wh(ldwh,*),wv(ldwv,*) + end subroutine zlaqr5 +#else + module procedure stdlib_zlaqr5 +#endif + end interface laqr5 + + + + !> LAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + interface laqsb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(in) :: amax,scond,s(*) + complex(sp), intent(inout) :: ab(ldab,*) + end subroutine claqsb +#else + module procedure stdlib_claqsb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(in) :: amax,scond,s(*) + real(dp), intent(inout) :: ab(ldab,*) + end subroutine dlaqsb +#else + module procedure stdlib_dlaqsb +#endif +#:if WITH_QP + module procedure stdlib_qlaqsb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(in) :: amax,scond,s(*) + real(sp), intent(inout) :: ab(ldab,*) + end subroutine slaqsb +#else + module procedure stdlib_slaqsb +#endif +#:if WITH_QP + module procedure stdlib_wlaqsb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(in) :: amax,scond,s(*) + complex(dp), intent(inout) :: ab(ldab,*) + end subroutine zlaqsb +#else + module procedure stdlib_zlaqsb +#endif + end interface laqsb + + + + !> LAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + interface laqsp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqsp( uplo, n, ap, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: amax,scond,s(*) + complex(sp), intent(inout) :: ap(*) + end subroutine claqsp +#else + module procedure stdlib_claqsp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqsp( uplo, n, ap, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: amax,scond,s(*) + real(dp), intent(inout) :: ap(*) + end subroutine dlaqsp +#else + module procedure stdlib_dlaqsp +#endif +#:if WITH_QP + module procedure stdlib_qlaqsp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqsp( uplo, n, ap, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: amax,scond,s(*) + real(sp), intent(inout) :: ap(*) + end subroutine slaqsp +#else + module procedure stdlib_slaqsp +#endif +#:if WITH_QP + module procedure stdlib_wlaqsp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqsp( uplo, n, ap, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: amax,scond,s(*) + complex(dp), intent(inout) :: ap(*) + end subroutine zlaqsp +#else + module procedure stdlib_zlaqsp +#endif + end interface laqsp + + + + !> LAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + interface laqsy +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqsy( uplo, n, a, lda, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: amax,scond,s(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine claqsy +#else + module procedure stdlib_claqsy +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqsy( uplo, n, a, lda, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: amax,scond,s(*) + real(dp), intent(inout) :: a(lda,*) + end subroutine dlaqsy +#else + module procedure stdlib_dlaqsy +#endif +#:if WITH_QP + module procedure stdlib_qlaqsy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqsy( uplo, n, a, lda, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: amax,scond,s(*) + real(sp), intent(inout) :: a(lda,*) + end subroutine slaqsy +#else + module procedure stdlib_slaqsy +#endif +#:if WITH_QP + module procedure stdlib_wlaqsy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqsy( uplo, n, a, lda, s, scond, amax, equed ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: amax,scond,s(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlaqsy +#else + module procedure stdlib_zlaqsy +#endif + end interface laqsy + + + + !> LAQTR: solves the real quasi-triangular system + !> op(T)*p = scale*c, if LREAL = .TRUE. + !> or the complex quasi-triangular systems + !> op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. + !> in real arithmetic, where T is upper quasi-triangular. + !> If LREAL = .FALSE., then the first diagonal block of T must be + !> 1 by 1, B is the specially structured matrix + !> B = [ b(1) b(2) ... b(n) ] + !> [ w ] + !> [ w ] + !> [ . ] + !> [ w ] + !> op(A) = A or A**T, A**T denotes the transpose of + !> matrix A. + !> On input, X = [ c ]. On output, X = [ p ]. + !> [ d ] [ q ] + !> This subroutine is designed for the condition number estimation + !> in routine DTRSNA. + interface laqtr +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dlaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: lreal,ltran + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldt,n + real(dp), intent(out) :: scale,work(*) + real(dp), intent(in) :: w,b(*),t(ldt,*) + real(dp), intent(inout) :: x(*) + end subroutine dlaqtr +#else + module procedure stdlib_dlaqtr +#endif +#:if WITH_QP + module procedure stdlib_qlaqtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine slaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: lreal,ltran + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldt,n + real(sp), intent(out) :: scale,work(*) + real(sp), intent(in) :: w,b(*),t(ldt,*) + real(sp), intent(inout) :: x(*) + end subroutine slaqtr +#else + module procedure stdlib_slaqtr +#endif + end interface laqtr + + + + !> LAQZ0: computes the eigenvalues of a matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by CGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices, P and S are an upper triangular + !> matrices. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from CGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the unitary factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + interface laqz0 +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine claqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, & + alpha, beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: wants,wantq,wantz + integer(ilp), intent(in) :: n,ilo,ihi,lda,ldb,ldq,ldz,lwork,rec + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + complex(sp), intent(inout) :: alpha(*),beta(*),work(*) + real(sp), intent(out) :: rwork(*) + end subroutine claqz0 +#else + module procedure stdlib_claqz0 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine dlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, & + alphar, alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: wants,wantq,wantz + integer(ilp), intent(in) :: n,ilo,ihi,lda,ldb,ldq,ldz,lwork,rec + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + real(dp), intent(inout) :: alphar(*),alphai(*),beta(*),work(*) + end subroutine dlaqz0 +#else + module procedure stdlib_dlaqz0 +#endif +#:if WITH_QP + module procedure stdlib_qlaqz0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine slaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, & + alphar, alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: wants,wantq,wantz + integer(ilp), intent(in) :: n,ilo,ihi,lda,ldb,ldq,ldz,lwork,rec + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + real(sp), intent(inout) :: alphar(*),alphai(*),beta(*),work(*) + end subroutine slaqz0 +#else + module procedure stdlib_slaqz0 +#endif +#:if WITH_QP + module procedure stdlib_wlaqz0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine zlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, & + alpha, beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: wants,wantq,wantz + integer(ilp), intent(in) :: n,ilo,ihi,lda,ldb,ldq,ldz,lwork,rec + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + complex(dp), intent(inout) :: alpha(*),beta(*),work(*) + real(dp), intent(out) :: rwork(*) + end subroutine zlaqz0 +#else + module procedure stdlib_zlaqz0 +#endif + end interface laqz0 + + + + !> LAQZ1: chases a 1x1 shift bulge in a matrix pencil down a single position + interface laqz1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, & + qstart, q, ldq, nz, zstart, z, ldz ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: ilq,ilz + integer(ilp), intent(in) :: k,lda,ldb,ldq,ldz,istartm,istopm,nq,nz,qstart,& + zstart,ihi + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine claqz1 +#else + module procedure stdlib_claqz1 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldb + real(dp), intent(in) :: a(lda,*),b(ldb,*),sr1,sr2,si,beta1,beta2 + real(dp), intent(out) :: v(*) + end subroutine dlaqz1 +#else + module procedure stdlib_dlaqz1 +#endif +#:if WITH_QP + module procedure stdlib_qlaqz1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldb + real(sp), intent(in) :: a(lda,*),b(ldb,*),sr1,sr2,si,beta1,beta2 + real(sp), intent(out) :: v(*) + end subroutine slaqz1 +#else + module procedure stdlib_slaqz1 +#endif +#:if WITH_QP + module procedure stdlib_wlaqz1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, & + qstart, q, ldq, nz, zstart, z, ldz ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: ilq,ilz + integer(ilp), intent(in) :: k,lda,ldb,ldq,ldz,istartm,istopm,nq,nz,qstart,& + zstart,ihi + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine zlaqz1 +#else + module procedure stdlib_zlaqz1 +#endif + end interface laqz1 + + + + !> LAQZ4: Executes a single multishift QZ sweep + interface laqz4 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr,& + si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: ilschur,ilq,ilz + integer(ilp), intent(in) :: n,ilo,ihi,lda,ldb,ldq,ldz,lwork,nshifts,& + nblock_desired,ldqc,ldzc + real(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*),qc(ldqc,*),zc(& + ldzc,*) + real(dp), intent(inout) :: work(*) + real(dp), intent(inout) :: sr(*),si(*),ss(*) + integer(ilp), intent(out) :: info + end subroutine dlaqz4 +#else + module procedure stdlib_dlaqz4 +#endif +#:if WITH_QP + module procedure stdlib_qlaqz4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr,& + si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: ilschur,ilq,ilz + integer(ilp), intent(in) :: n,ilo,ihi,lda,ldb,ldq,ldz,lwork,nshifts,& + nblock_desired,ldqc,ldzc + real(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*),qc(ldqc,*),zc(& + ldzc,*) + real(sp), intent(inout) :: work(*) + real(sp), intent(inout) :: sr(*),si(*),ss(*) + integer(ilp), intent(out) :: info + end subroutine slaqz4 +#else + module procedure stdlib_slaqz4 +#endif + end interface laqz4 + + + + !> LAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + interface lar1v +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc,& + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1,bn,n + integer(ilp), intent(out) :: negcnt,isuppz(*) + integer(ilp), intent(inout) :: r + real(sp), intent(in) :: gaptol,lambda,pivmin,d(*),l(*),ld(*),lld(*) + real(sp), intent(out) :: mingma,nrminv,resid,rqcorr,ztz,work(*) + complex(sp), intent(inout) :: z(*) + end subroutine clar1v +#else + module procedure stdlib_clar1v +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc,& + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1,bn,n + integer(ilp), intent(out) :: negcnt,isuppz(*) + integer(ilp), intent(inout) :: r + real(dp), intent(in) :: gaptol,lambda,pivmin,d(*),l(*),ld(*),lld(*) + real(dp), intent(out) :: mingma,nrminv,resid,rqcorr,ztz,work(*) + real(dp), intent(inout) :: z(*) + end subroutine dlar1v +#else + module procedure stdlib_dlar1v +#endif +#:if WITH_QP + module procedure stdlib_qlar1v +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc,& + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1,bn,n + integer(ilp), intent(out) :: negcnt,isuppz(*) + integer(ilp), intent(inout) :: r + real(sp), intent(in) :: gaptol,lambda,pivmin,d(*),l(*),ld(*),lld(*) + real(sp), intent(out) :: mingma,nrminv,resid,rqcorr,ztz,work(*) + real(sp), intent(inout) :: z(*) + end subroutine slar1v +#else + module procedure stdlib_slar1v +#endif +#:if WITH_QP + module procedure stdlib_wlar1v +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc,& + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1,bn,n + integer(ilp), intent(out) :: negcnt,isuppz(*) + integer(ilp), intent(inout) :: r + real(dp), intent(in) :: gaptol,lambda,pivmin,d(*),l(*),ld(*),lld(*) + real(dp), intent(out) :: mingma,nrminv,resid,rqcorr,ztz,work(*) + complex(dp), intent(inout) :: z(*) + end subroutine zlar1v +#else + module procedure stdlib_zlar1v +#endif + end interface lar1v + + + + !> LAR2V: applies a vector of complex plane rotations with real cosines + !> from both sides to a sequence of 2-by-2 complex Hermitian matrices, + !> defined by the elements of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := + !> ( conjg(z(i)) y(i) ) + !> ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) + !> ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) + interface lar2v +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clar2v( n, x, y, z, incx, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,n + real(sp), intent(in) :: c(*) + complex(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: x(*),y(*),z(*) + end subroutine clar2v +#else + module procedure stdlib_clar2v +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlar2v( n, x, y, z, incx, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,n + real(dp), intent(in) :: c(*),s(*) + real(dp), intent(inout) :: x(*),y(*),z(*) + end subroutine dlar2v +#else + module procedure stdlib_dlar2v +#endif +#:if WITH_QP + module procedure stdlib_qlar2v +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slar2v( n, x, y, z, incx, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,n + real(sp), intent(in) :: c(*),s(*) + real(sp), intent(inout) :: x(*),y(*),z(*) + end subroutine slar2v +#else + module procedure stdlib_slar2v +#endif +#:if WITH_QP + module procedure stdlib_wlar2v +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlar2v( n, x, y, z, incx, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,n + real(dp), intent(in) :: c(*) + complex(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: x(*),y(*),z(*) + end subroutine zlar2v +#else + module procedure stdlib_zlar2v +#endif + end interface lar2v + + + + !> LARCM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by M and real; B is M by N and complex; + !> C is M by N and complex. + interface larcm +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarcm( m, n, a, lda, b, ldb, c, ldc, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: b(ldb,*) + complex(sp), intent(out) :: c(ldc,*) + end subroutine clarcm +#else + module procedure stdlib_clarcm +#endif +#:if WITH_QP + module procedure stdlib_wlarcm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarcm( m, n, a, lda, b, ldb, c, ldc, rwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: lda,ldb,ldc,m,n + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: b(ldb,*) + complex(dp), intent(out) :: c(ldc,*) + end subroutine zlarcm +#else + module procedure stdlib_zlarcm +#endif + end interface larcm + + + + !> LARF: applies a complex elementary reflector H to a complex M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead + !> tau. + interface larf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarf( side, m, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,ldc,m,n + complex(sp), intent(in) :: tau,v(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine clarf +#else + module procedure stdlib_clarf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarf( side, m, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,ldc,m,n + real(dp), intent(in) :: tau,v(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dlarf +#else + module procedure stdlib_dlarf +#endif +#:if WITH_QP + module procedure stdlib_qlarf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarf( side, m, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,ldc,m,n + real(sp), intent(in) :: tau,v(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine slarf +#else + module procedure stdlib_slarf +#endif +#:if WITH_QP + module procedure stdlib_wlarf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarf( side, m, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,ldc,m,n + complex(dp), intent(in) :: tau,v(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zlarf +#else + module procedure stdlib_zlarf +#endif + end interface larf + + + + !> LARFB: applies a complex block reflector H or its transpose H**H to a + !> complex M-by-N matrix C, from either the left or the right. + interface larfb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,ldc,ldt,ldv,ldwork,m,n + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(in) :: t(ldt,*),v(ldv,*) + complex(sp), intent(out) :: work(ldwork,*) + end subroutine clarfb +#else + module procedure stdlib_clarfb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,ldc,ldt,ldv,ldwork,m,n + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(in) :: t(ldt,*),v(ldv,*) + real(dp), intent(out) :: work(ldwork,*) + end subroutine dlarfb +#else + module procedure stdlib_dlarfb +#endif +#:if WITH_QP + module procedure stdlib_qlarfb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,ldc,ldt,ldv,ldwork,m,n + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(in) :: t(ldt,*),v(ldv,*) + real(sp), intent(out) :: work(ldwork,*) + end subroutine slarfb +#else + module procedure stdlib_slarfb +#endif +#:if WITH_QP + module procedure stdlib_wlarfb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,ldc,ldt,ldv,ldwork,m,n + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(in) :: t(ldt,*),v(ldv,*) + complex(dp), intent(out) :: work(ldwork,*) + end subroutine zlarfb +#else + module procedure stdlib_zlarfb +#endif + end interface larfb + + + + !> LARFB_GETT: applies a complex Householder block reflector H from the + !> left to a complex (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + interface larfb_gett +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: ident + integer(ilp), intent(in) :: k,lda,ldb,ldt,ldwork,m,n + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(in) :: t(ldt,*) + complex(sp), intent(out) :: work(ldwork,*) + end subroutine clarfb_gett +#else + module procedure stdlib_clarfb_gett +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: ident + integer(ilp), intent(in) :: k,lda,ldb,ldt,ldwork,m,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(out) :: work(ldwork,*) + end subroutine dlarfb_gett +#else + module procedure stdlib_dlarfb_gett +#endif +#:if WITH_QP + module procedure stdlib_qlarfb_gett +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: ident + integer(ilp), intent(in) :: k,lda,ldb,ldt,ldwork,m,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(out) :: work(ldwork,*) + end subroutine slarfb_gett +#else + module procedure stdlib_slarfb_gett +#endif +#:if WITH_QP + module procedure stdlib_wlarfb_gett +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: ident + integer(ilp), intent(in) :: k,lda,ldb,ldt,ldwork,m,n + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(in) :: t(ldt,*) + complex(dp), intent(out) :: work(ldwork,*) + end subroutine zlarfb_gett +#else + module procedure stdlib_zlarfb_gett +#endif + end interface larfb_gett + + + + !> LARFG: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, with beta real, and x is an + !> (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + !> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + interface larfg +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarfg( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + complex(sp), intent(inout) :: alpha,x(*) + complex(sp), intent(out) :: tau + end subroutine clarfg +#else + module procedure stdlib_clarfg +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarfg( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(dp), intent(inout) :: alpha,x(*) + real(dp), intent(out) :: tau + end subroutine dlarfg +#else + module procedure stdlib_dlarfg +#endif +#:if WITH_QP + module procedure stdlib_qlarfg +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarfg( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(sp), intent(inout) :: alpha,x(*) + real(sp), intent(out) :: tau + end subroutine slarfg +#else + module procedure stdlib_slarfg +#endif +#:if WITH_QP + module procedure stdlib_wlarfg +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarfg( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + complex(dp), intent(inout) :: alpha,x(*) + complex(dp), intent(out) :: tau + end subroutine zlarfg +#else + module procedure stdlib_zlarfg +#endif + end interface larfg + + + + !> LARFGP: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is real and non-negative, and + !> x is an (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + interface larfgp +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine clarfgp( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + complex(sp), intent(inout) :: alpha,x(*) + complex(sp), intent(out) :: tau + end subroutine clarfgp +#else + module procedure stdlib_clarfgp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dlarfgp( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(dp), intent(inout) :: alpha,x(*) + real(dp), intent(out) :: tau + end subroutine dlarfgp +#else + module procedure stdlib_dlarfgp +#endif +#:if WITH_QP + module procedure stdlib_qlarfgp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine slarfgp( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(sp), intent(inout) :: alpha,x(*) + real(sp), intent(out) :: tau + end subroutine slarfgp +#else + module procedure stdlib_slarfgp +#endif +#:if WITH_QP + module procedure stdlib_wlarfgp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zlarfgp( n, alpha, x, incx, tau ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + complex(dp), intent(inout) :: alpha,x(*) + complex(dp), intent(out) :: tau + end subroutine zlarfgp +#else + module procedure stdlib_zlarfgp +#endif + end interface larfgp + + + + !> LARFT: forms the triangular factor T of a complex block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + interface larft +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + complex(sp), intent(out) :: t(ldt,*) + complex(sp), intent(in) :: tau(*),v(ldv,*) + end subroutine clarft +#else + module procedure stdlib_clarft +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + real(dp), intent(out) :: t(ldt,*) + real(dp), intent(in) :: tau(*),v(ldv,*) + end subroutine dlarft +#else + module procedure stdlib_dlarft +#endif +#:if WITH_QP + module procedure stdlib_qlarft +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + real(sp), intent(out) :: t(ldt,*) + real(sp), intent(in) :: tau(*),v(ldv,*) + end subroutine slarft +#else + module procedure stdlib_slarft +#endif +#:if WITH_QP + module procedure stdlib_wlarft +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + complex(dp), intent(out) :: t(ldt,*) + complex(dp), intent(in) :: tau(*),v(ldv,*) + end subroutine zlarft +#else + module procedure stdlib_zlarft +#endif + end interface larft + + + + !> LARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n Hermitian matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + interface larfy +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarfy( uplo, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv,ldc,n + complex(sp), intent(in) :: tau,v(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine clarfy +#else + module procedure stdlib_clarfy +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarfy( uplo, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv,ldc,n + real(dp), intent(in) :: tau,v(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dlarfy +#else + module procedure stdlib_dlarfy +#endif +#:if WITH_QP + module procedure stdlib_qlarfy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarfy( uplo, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv,ldc,n + real(sp), intent(in) :: tau,v(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine slarfy +#else + module procedure stdlib_slarfy +#endif +#:if WITH_QP + module procedure stdlib_wlarfy +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarfy( uplo, n, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv,ldc,n + complex(dp), intent(in) :: tau,v(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zlarfy +#else + module procedure stdlib_zlarfy +#endif + end interface larfy + + + + !> LARGV: generates a vector of complex plane rotations with real + !> cosines, determined by elements of the complex vectors x and y. + !> For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( r(i) ) + !> ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) + !> where c(i)**2 + ABS(s(i))**2 = 1 + !> The following conventions are used (these are the same as in CLARTG, + !> but differ from the BLAS1 routine CROTG): + !> If y(i)=0, then c(i)=1 and s(i)=0. + !> If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. + interface largv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clargv( n, x, incx, y, incy, c, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(sp), intent(out) :: c(*) + complex(sp), intent(inout) :: x(*),y(*) + end subroutine clargv +#else + module procedure stdlib_clargv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlargv( n, x, incx, y, incy, c, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(dp), intent(out) :: c(*) + real(dp), intent(inout) :: x(*),y(*) + end subroutine dlargv +#else + module procedure stdlib_dlargv +#endif +#:if WITH_QP + module procedure stdlib_qlargv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slargv( n, x, incx, y, incy, c, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(sp), intent(out) :: c(*) + real(sp), intent(inout) :: x(*),y(*) + end subroutine slargv +#else + module procedure stdlib_slargv +#endif +#:if WITH_QP + module procedure stdlib_wlargv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlargv( n, x, incx, y, incy, c, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(dp), intent(out) :: c(*) + complex(dp), intent(inout) :: x(*),y(*) + end subroutine zlargv +#else + module procedure stdlib_zlargv +#endif + end interface largv + + + + !> LARNV: returns a vector of n random complex numbers from a uniform or + !> normal distribution. + interface larnv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarnv( idist, iseed, n, x ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: idist,n + integer(ilp), intent(inout) :: iseed(4) + complex(sp), intent(out) :: x(*) + end subroutine clarnv +#else + module procedure stdlib_clarnv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarnv( idist, iseed, n, x ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: idist,n + integer(ilp), intent(inout) :: iseed(4) + real(dp), intent(out) :: x(*) + end subroutine dlarnv +#else + module procedure stdlib_dlarnv +#endif +#:if WITH_QP + module procedure stdlib_qlarnv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarnv( idist, iseed, n, x ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: idist,n + integer(ilp), intent(inout) :: iseed(4) + real(sp), intent(out) :: x(*) + end subroutine slarnv +#else + module procedure stdlib_slarnv +#endif +#:if WITH_QP + module procedure stdlib_wlarnv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarnv( idist, iseed, n, x ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: idist,n + integer(ilp), intent(inout) :: iseed(4) + complex(dp), intent(out) :: x(*) + end subroutine zlarnv +#else + module procedure stdlib_zlarnv +#endif + end interface larnv + + + + !> Compute the splitting points with threshold SPLTOL. + !> LARRA: sets any "small" off-diagonal elements to zero. + interface larra +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarra( n, d, e, e2, spltol, tnrm,nsplit, isplit, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,nsplit,isplit(*) + integer(ilp), intent(in) :: n + real(dp), intent(in) :: spltol,tnrm,d(*) + real(dp), intent(inout) :: e(*),e2(*) + end subroutine dlarra +#else + module procedure stdlib_dlarra +#endif +#:if WITH_QP + module procedure stdlib_qlarra +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarra( n, d, e, e2, spltol, tnrm,nsplit, isplit, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,nsplit,isplit(*) + integer(ilp), intent(in) :: n + real(sp), intent(in) :: spltol,tnrm,d(*) + real(sp), intent(inout) :: e(*),e2(*) + end subroutine slarra +#else + module procedure stdlib_slarra +#endif + end interface larra + + + + !> Given the relatively robust representation(RRR) L D L^T, LARRB: + !> does "limited" bisection to refine the eigenvalues of L D L^T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses and their gaps are input in WERR + !> and WGAP, respectively. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + interface larrb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrb( n, d, lld, ifirst, ilast, rtol1,rtol2, offset, w, wgap, & + werr, work, iwork,pivmin, spdiam, twist, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ifirst,ilast,n,offset,twist + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: pivmin,rtol1,rtol2,spdiam,d(*),lld(*) + real(dp), intent(inout) :: w(*),werr(*),wgap(*) + real(dp), intent(out) :: work(*) + end subroutine dlarrb +#else + module procedure stdlib_dlarrb +#endif +#:if WITH_QP + module procedure stdlib_qlarrb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrb( n, d, lld, ifirst, ilast, rtol1,rtol2, offset, w, wgap, & + werr, work, iwork,pivmin, spdiam, twist, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ifirst,ilast,n,offset,twist + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: pivmin,rtol1,rtol2,spdiam,d(*),lld(*) + real(sp), intent(inout) :: w(*),werr(*),wgap(*) + real(sp), intent(out) :: work(*) + end subroutine slarrb +#else + module procedure stdlib_slarrb +#endif + end interface larrb + + + + !> Find the number of eigenvalues of the symmetric tridiagonal matrix T + !> that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T + !> if JOBT = 'L'. + interface larrc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrc( jobt, n, vl, vu, d, e, pivmin,eigcnt, lcnt, rcnt, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobt + integer(ilp), intent(out) :: eigcnt,info,lcnt,rcnt + integer(ilp), intent(in) :: n + real(dp), intent(in) :: pivmin,vl,vu,d(*),e(*) + end subroutine dlarrc +#else + module procedure stdlib_dlarrc +#endif +#:if WITH_QP + module procedure stdlib_qlarrc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrc( jobt, n, vl, vu, d, e, pivmin,eigcnt, lcnt, rcnt, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobt + integer(ilp), intent(out) :: eigcnt,info,lcnt,rcnt + integer(ilp), intent(in) :: n + real(sp), intent(in) :: pivmin,vl,vu,d(*),e(*) + end subroutine slarrc +#else + module procedure stdlib_slarrc +#endif + end interface larrc + + + + !> LARRD: computes the eigenvalues of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from DSTEMR. + !> The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + interface larrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrd( range, order, n, vl, vu, il, iu, gers,reltol, d, e, e2, & + pivmin, nsplit, isplit,m, w, werr, wl, wu, iblock, indexw,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: order,range + integer(ilp), intent(in) :: il,iu,n,nsplit,isplit(*) + integer(ilp), intent(out) :: info,m,iblock(*),indexw(*),iwork(*) + real(dp), intent(in) :: pivmin,reltol,vl,vu,d(*),e(*),e2(*),gers(*) + real(dp), intent(out) :: wl,wu,w(*),werr(*),work(*) + end subroutine dlarrd +#else + module procedure stdlib_dlarrd +#endif +#:if WITH_QP + module procedure stdlib_qlarrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrd( range, order, n, vl, vu, il, iu, gers,reltol, d, e, e2, & + pivmin, nsplit, isplit,m, w, werr, wl, wu, iblock, indexw,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: order,range + integer(ilp), intent(in) :: il,iu,n,nsplit,isplit(*) + integer(ilp), intent(out) :: info,m,iblock(*),indexw(*),iwork(*) + real(sp), intent(in) :: pivmin,reltol,vl,vu,d(*),e(*),e2(*),gers(*) + real(sp), intent(out) :: wl,wu,w(*),werr(*),work(*) + end subroutine slarrd +#else + module procedure stdlib_slarrd +#endif + end interface larrd + + + + !> To find the desired eigenvalues of a given real symmetric + !> tridiagonal matrix T, LARRE: sets any "small" off-diagonal + !> elements to zero, and for each unreduced block T_i, it finds + !> (a) a suitable shift at one end of the block's spectrum, + !> (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and + !> (c) eigenvalues of each L_i D_i L_i^T. + !> The representations and eigenvalues found are then used by + !> DSTEMR to compute the eigenvectors of T. + !> The accuracy varies depending on whether bisection is used to + !> find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to + !> conpute all and then discard any unwanted one. + !> As an added benefit, LARRE also outputs the n + !> Gerschgorin intervals for the matrices L_i D_i L_i^T. + interface larre +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarre( range, n, vl, vu, il, iu, d, e, e2,rtol1, rtol2, spltol, & + nsplit, isplit, m,w, werr, wgap, iblock, indexw, gers, pivmin,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: range + integer(ilp), intent(in) :: il,iu,n + integer(ilp), intent(out) :: info,m,nsplit,iblock(*),isplit(*),iwork(*),& + indexw(*) + real(dp), intent(out) :: pivmin,gers(*),w(*),werr(*),wgap(*),work(*) + real(dp), intent(in) :: rtol1,rtol2,spltol + real(dp), intent(inout) :: vl,vu,d(*),e(*),e2(*) + end subroutine dlarre +#else + module procedure stdlib_dlarre +#endif +#:if WITH_QP + module procedure stdlib_qlarre +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarre( range, n, vl, vu, il, iu, d, e, e2,rtol1, rtol2, spltol, & + nsplit, isplit, m,w, werr, wgap, iblock, indexw, gers, pivmin,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: range + integer(ilp), intent(in) :: il,iu,n + integer(ilp), intent(out) :: info,m,nsplit,iblock(*),isplit(*),iwork(*),& + indexw(*) + real(sp), intent(out) :: pivmin,gers(*),w(*),werr(*),wgap(*),work(*) + real(sp), intent(in) :: rtol1,rtol2,spltol + real(sp), intent(inout) :: vl,vu,d(*),e(*),e2(*) + end subroutine slarre +#else + module procedure stdlib_slarre +#endif + end interface larre + + + + !> Given the initial representation L D L^T and its cluster of close + !> eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... + !> W( CLEND ), LARRF: finds a new relatively robust representation + !> L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the + !> eigenvalues of L(+) D(+) L(+)^T is relatively isolated. + interface larrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrf( n, d, l, ld, clstrt, clend,w, wgap, werr,spdiam, clgapl, & + clgapr, pivmin, sigma,dplus, lplus, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: clstrt,clend,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: clgapl,clgapr,pivmin,spdiam,d(*),l(*),ld(*),w(*),werr(& + *) + real(dp), intent(out) :: sigma,dplus(*),lplus(*),work(*) + real(dp), intent(inout) :: wgap(*) + end subroutine dlarrf +#else + module procedure stdlib_dlarrf +#endif +#:if WITH_QP + module procedure stdlib_qlarrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrf( n, d, l, ld, clstrt, clend,w, wgap, werr,spdiam, clgapl, & + clgapr, pivmin, sigma,dplus, lplus, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: clstrt,clend,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: clgapl,clgapr,pivmin,spdiam,d(*),l(*),ld(*),w(*),werr(& + *) + real(sp), intent(out) :: sigma,dplus(*),lplus(*),work(*) + real(sp), intent(inout) :: wgap(*) + end subroutine slarrf +#else + module procedure stdlib_slarrf +#endif + end interface larrf + + + + !> Given the initial eigenvalue approximations of T, LARRJ: + !> does bisection to refine the eigenvalues of T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses in WERR. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + interface larrj +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrj( n, d, e2, ifirst, ilast,rtol, offset, w, werr, work, iwork,& + pivmin, spdiam, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ifirst,ilast,n,offset + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(in) :: pivmin,rtol,spdiam,d(*),e2(*) + real(dp), intent(inout) :: w(*),werr(*) + real(dp), intent(out) :: work(*) + end subroutine dlarrj +#else + module procedure stdlib_dlarrj +#endif +#:if WITH_QP + module procedure stdlib_qlarrj +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrj( n, d, e2, ifirst, ilast,rtol, offset, w, werr, work, iwork,& + pivmin, spdiam, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ifirst,ilast,n,offset + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(in) :: pivmin,rtol,spdiam,d(*),e2(*) + real(sp), intent(inout) :: w(*),werr(*) + real(sp), intent(out) :: work(*) + end subroutine slarrj +#else + module procedure stdlib_slarrj +#endif + end interface larrj + + + + !> LARRK: computes one eigenvalue of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from DSTEMR. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + interface larrk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrk( n, iw, gl, gu,d, e2, pivmin, reltol, w, werr, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: iw,n + real(dp), intent(in) :: pivmin,reltol,gl,gu,d(*),e2(*) + real(dp), intent(out) :: w,werr + end subroutine dlarrk +#else + module procedure stdlib_dlarrk +#endif +#:if WITH_QP + module procedure stdlib_qlarrk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrk( n, iw, gl, gu,d, e2, pivmin, reltol, w, werr, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: iw,n + real(sp), intent(in) :: pivmin,reltol,gl,gu,d(*),e2(*) + real(sp), intent(out) :: w,werr + end subroutine slarrk +#else + module procedure stdlib_slarrk +#endif + end interface larrk + + + + !> Perform tests to decide whether the symmetric tridiagonal matrix T + !> warrants expensive computations which guarantee high relative accuracy + !> in the eigenvalues. + interface larrr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrr( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: d(*) + real(dp), intent(inout) :: e(*) + end subroutine dlarrr +#else + module procedure stdlib_dlarrr +#endif +#:if WITH_QP + module procedure stdlib_qlarrr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrr( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: d(*) + real(sp), intent(inout) :: e(*) + end subroutine slarrr +#else + module procedure stdlib_slarrr +#endif + end interface larrr + + + + !> LARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by SLARRE. + interface larrv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: dol,dou,ldz,m,n,iblock(*),indexw(*),isplit(*) + + integer(ilp), intent(out) :: info,isuppz(*),iwork(*) + real(sp), intent(in) :: minrgp,pivmin,vl,vu,gers(*) + real(sp), intent(inout) :: rtol1,rtol2,d(*),l(*),w(*),werr(*),wgap(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(out) :: z(ldz,*) + end subroutine clarrv +#else + module procedure stdlib_clarrv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: dol,dou,ldz,m,n,iblock(*),indexw(*),isplit(*) + + integer(ilp), intent(out) :: info,isuppz(*),iwork(*) + real(dp), intent(in) :: minrgp,pivmin,vl,vu,gers(*) + real(dp), intent(inout) :: rtol1,rtol2,d(*),l(*),w(*),werr(*),wgap(*) + real(dp), intent(out) :: work(*),z(ldz,*) + end subroutine dlarrv +#else + module procedure stdlib_dlarrv +#endif +#:if WITH_QP + module procedure stdlib_qlarrv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: dol,dou,ldz,m,n,iblock(*),indexw(*),isplit(*) + + integer(ilp), intent(out) :: info,isuppz(*),iwork(*) + real(sp), intent(in) :: minrgp,pivmin,vl,vu,gers(*) + real(sp), intent(inout) :: rtol1,rtol2,d(*),l(*),w(*),werr(*),wgap(*) + real(sp), intent(out) :: work(*),z(ldz,*) + end subroutine slarrv +#else + module procedure stdlib_slarrv +#endif +#:if WITH_QP + module procedure stdlib_wlarrv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: dol,dou,ldz,m,n,iblock(*),indexw(*),isplit(*) + + integer(ilp), intent(out) :: info,isuppz(*),iwork(*) + real(dp), intent(in) :: minrgp,pivmin,vl,vu,gers(*) + real(dp), intent(inout) :: rtol1,rtol2,d(*),l(*),w(*),werr(*),wgap(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(out) :: z(ldz,*) + end subroutine zlarrv +#else + module procedure stdlib_zlarrv +#endif + end interface larrv + + + + !> ! + !> + !> LARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -conjg(S) C ] [ G ] [ 0 ] + !> where C is real and C**2 + |S|**2 = 1. + !> The mathematical formulas used for C and S are + !> sgn(x) = { x / |x|, x != 0 + !> { 1, x = 0 + !> R = sgn(F) * sqrt(|F|**2 + |G|**2) + !> C = |F| / sqrt(|F|**2 + |G|**2) + !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) + !> When F and G are real, the formulas simplify to C = F/R and + !> S = G/R, and the returned values of C, S, and R should be + !> identical to those returned by LARTG. + !> The algorithm used to compute these quantities incorporates scaling + !> to avoid overflow or underflow in computing the square root of the + !> sum of squares. + !> This is a faster version of the BLAS1 routine CROTG, except for + !> the following differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0, then C=0 and S is chosen so that R is real. + !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. + interface lartg +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clartg( f, g, c, s, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(out) :: c + complex(sp), intent(in) :: f,g + complex(sp), intent(out) :: r,s + end subroutine clartg +#else + module procedure stdlib_clartg +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlartg( f, g, c, s, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(out) :: c,r,s + real(dp), intent(in) :: f,g + end subroutine dlartg +#else + module procedure stdlib_dlartg +#endif +#:if WITH_QP + module procedure stdlib_qlartg +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slartg( f, g, c, s, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(out) :: c,r,s + real(sp), intent(in) :: f,g + end subroutine slartg +#else + module procedure stdlib_slartg +#endif +#:if WITH_QP + module procedure stdlib_wlartg +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlartg( f, g, c, s, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(out) :: c + complex(dp), intent(in) :: f,g + complex(dp), intent(out) :: r,s + end subroutine zlartg +#else + module procedure stdlib_zlartg +#endif + end interface lartg + + + + !> LARTGP: generates a plane rotation so that + !> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + !> [ -SN CS ] [ G ] [ 0 ] + !> This is a slower, more accurate version of the Level 1 BLAS routine DROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then CS=(+/-)1 and SN=0. + !> If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1. + !> The sign is chosen so that R >= 0. + interface lartgp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlartgp( f, g, cs, sn, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(out) :: cs,r,sn + real(dp), intent(in) :: f,g + end subroutine dlartgp +#else + module procedure stdlib_dlartgp +#endif +#:if WITH_QP + module procedure stdlib_qlartgp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slartgp( f, g, cs, sn, r ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(out) :: cs,r,sn + real(sp), intent(in) :: f,g + end subroutine slartgp +#else + module procedure stdlib_slartgp +#endif + end interface lartgp + + + + !> LARTGS: generates a plane rotation designed to introduce a bulge in + !> Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD + !> problem. X and Y are the top-row entries, and SIGMA is the shift. + !> The computed CS and SN define a plane rotation satisfying + !> [ CS SN ] . [ X^2 - SIGMA ] = [ R ], + !> [ -SN CS ] [ X * Y ] [ 0 ] + !> with R nonnegative. If X^2 - SIGMA and X * Y are 0, then the + !> rotation is by PI/2. + interface lartgs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlartgs( x, y, sigma, cs, sn ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(out) :: cs,sn + real(dp), intent(in) :: sigma,x,y + end subroutine dlartgs +#else + module procedure stdlib_dlartgs +#endif +#:if WITH_QP + module procedure stdlib_qlartgs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slartgs( x, y, sigma, cs, sn ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(out) :: cs,sn + real(sp), intent(in) :: sigma,x,y + end subroutine slartgs +#else + module procedure stdlib_slartgs +#endif + end interface lartgs + + + + !> LARTV: applies a vector of complex plane rotations with real cosines + !> to elements of the complex vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) + interface lartv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clartv( n, x, incx, y, incy, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(sp), intent(in) :: c(*) + complex(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: x(*),y(*) + end subroutine clartv +#else + module procedure stdlib_clartv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlartv( n, x, incx, y, incy, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(dp), intent(in) :: c(*),s(*) + real(dp), intent(inout) :: x(*),y(*) + end subroutine dlartv +#else + module procedure stdlib_dlartv +#endif +#:if WITH_QP + module procedure stdlib_qlartv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slartv( n, x, incx, y, incy, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(sp), intent(in) :: c(*),s(*) + real(sp), intent(inout) :: x(*),y(*) + end subroutine slartv +#else + module procedure stdlib_slartv +#endif +#:if WITH_QP + module procedure stdlib_wlartv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlartv( n, x, incx, y, incy, c, s, incc ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incc,incx,incy,n + real(dp), intent(in) :: c(*) + complex(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: x(*),y(*) + end subroutine zlartv +#else + module procedure stdlib_zlartv +#endif + end interface lartv + + + + !> LARUV: returns a vector of n random real numbers from a uniform (0,1) + !> distribution (n <= 128). + !> This is an auxiliary routine called by DLARNV and ZLARNV. + interface laruv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaruv( iseed, n, x ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + integer(ilp), intent(inout) :: iseed(4) + real(dp), intent(out) :: x(n) + end subroutine dlaruv +#else + module procedure stdlib_dlaruv +#endif +#:if WITH_QP + module procedure stdlib_qlaruv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaruv( iseed, n, x ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: n + integer(ilp), intent(inout) :: iseed(4) + real(sp), intent(out) :: x(n) + end subroutine slaruv +#else + module procedure stdlib_slaruv +#endif + end interface laruv + + + + !> LARZ: applies a complex elementary reflector H to a complex + !> M-by-N matrix C, from either the left or the right. H is represented + !> in the form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead + !> tau. + !> H is a product of k elementary reflectors as returned by CTZRZF. + interface larz +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarz( side, m, n, l, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,l,ldc,m,n + complex(sp), intent(in) :: tau,v(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + end subroutine clarz +#else + module procedure stdlib_clarz +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarz( side, m, n, l, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,l,ldc,m,n + real(dp), intent(in) :: tau,v(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + end subroutine dlarz +#else + module procedure stdlib_dlarz +#endif +#:if WITH_QP + module procedure stdlib_qlarz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarz( side, m, n, l, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,l,ldc,m,n + real(sp), intent(in) :: tau,v(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + end subroutine slarz +#else + module procedure stdlib_slarz +#endif +#:if WITH_QP + module procedure stdlib_wlarz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarz( side, m, n, l, v, incv, tau, c, ldc, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side + integer(ilp), intent(in) :: incv,l,ldc,m,n + complex(dp), intent(in) :: tau,v(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + end subroutine zlarz +#else + module procedure stdlib_zlarz +#endif + end interface larz + + + + !> LARZB: applies a complex block reflector H or its transpose H**H + !> to a complex distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + interface larzb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,ldc,ldt,ldv,ldwork,m,n + complex(sp), intent(inout) :: c(ldc,*),t(ldt,*),v(ldv,*) + complex(sp), intent(out) :: work(ldwork,*) + end subroutine clarzb +#else + module procedure stdlib_clarzb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,ldc,ldt,ldv,ldwork,m,n + real(dp), intent(inout) :: c(ldc,*),t(ldt,*),v(ldv,*) + real(dp), intent(out) :: work(ldwork,*) + end subroutine dlarzb +#else + module procedure stdlib_dlarzb +#endif +#:if WITH_QP + module procedure stdlib_qlarzb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,ldc,ldt,ldv,ldwork,m,n + real(sp), intent(inout) :: c(ldc,*),t(ldt,*),v(ldv,*) + real(sp), intent(out) :: work(ldwork,*) + end subroutine slarzb +#else + module procedure stdlib_slarzb +#endif +#:if WITH_QP + module procedure stdlib_wlarzb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,ldc,ldt,ldv,ldwork,m,n + complex(dp), intent(inout) :: c(ldc,*),t(ldt,*),v(ldv,*) + complex(dp), intent(out) :: work(ldwork,*) + end subroutine zlarzb +#else + module procedure stdlib_zlarzb +#endif + end interface larzb + + + + !> LARZT: forms the triangular factor T of a complex block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + interface larzt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + complex(sp), intent(out) :: t(ldt,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(inout) :: v(ldv,*) + end subroutine clarzt +#else + module procedure stdlib_clarzt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + real(dp), intent(out) :: t(ldt,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(inout) :: v(ldv,*) + end subroutine dlarzt +#else + module procedure stdlib_dlarzt +#endif +#:if WITH_QP + module procedure stdlib_qlarzt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + real(sp), intent(out) :: t(ldt,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(inout) :: v(ldv,*) + end subroutine slarzt +#else + module procedure stdlib_slarzt +#endif +#:if WITH_QP + module procedure stdlib_wlarzt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,storev + integer(ilp), intent(in) :: k,ldt,ldv,n + complex(dp), intent(out) :: t(ldt,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(inout) :: v(ldv,*) + end subroutine zlarzt +#else + module procedure stdlib_zlarzt +#endif + end interface larzt + + + + !> LASCL: multiplies the M by N complex matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + interface lascl +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,lda,m,n + real(sp), intent(in) :: cfrom,cto + complex(sp), intent(inout) :: a(lda,*) + end subroutine clascl +#else + module procedure stdlib_clascl +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,lda,m,n + real(dp), intent(in) :: cfrom,cto + real(dp), intent(inout) :: a(lda,*) + end subroutine dlascl +#else + module procedure stdlib_dlascl +#endif +#:if WITH_QP + module procedure stdlib_qlascl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,lda,m,n + real(sp), intent(in) :: cfrom,cto + real(sp), intent(inout) :: a(lda,*) + end subroutine slascl +#else + module procedure stdlib_slascl +#endif +#:if WITH_QP + module procedure stdlib_wlascl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl,ku,lda,m,n + real(dp), intent(in) :: cfrom,cto + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlascl +#else + module procedure stdlib_zlascl +#endif + end interface lascl + + + + !> Using a divide and conquer approach, LASD0: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M + !> matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + !> The algorithm computes orthogonal matrices U and VT such that + !> B = U * S * VT. The singular values S are overwritten on D. + !> A related subroutine, DLASDA, computes only the singular values, + !> and optionally, the singular vectors in compact form. + interface lasd0 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu,ldvt,n,smlsiz,sqre + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: u(ldu,*),vt(ldvt,*),work(*) + end subroutine dlasd0 +#else + module procedure stdlib_dlasd0 +#endif +#:if WITH_QP + module procedure stdlib_qlasd0 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu,ldvt,n,smlsiz,sqre + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: u(ldu,*),vt(ldvt,*),work(*) + end subroutine slasd0 +#else + module procedure stdlib_slasd0 +#endif + end interface lasd0 + + + + !> LASD1: computes the SVD of an upper bidiagonal N-by-M matrix B, + !> where N = NL + NR + 1 and M = N + SQRE. LASD1 is called from DLASD0. + !> A related subroutine DLASD7 handles the case in which the singular + !> values (and the singular vectors in factored form) are desired. + !> LASD1 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The left singular vectors of the original matrix are stored in U, and + !> the transpose of the right singular vectors are stored in VT, and the + !> singular values are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or when there are zeros in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLASD2. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the square roots of the + !> roots of the secular equation via the routine DLASD4 (as called + !> by DLASD3). This routine also calculates the singular vectors of + !> the current problem. + !> The final stage consists of computing the updated singular vectors + !> directly using the updated singular values. The singular vectors + !> for the current problem are multiplied with the singular vectors + !> from the overall problem. + interface lasd1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork,& + work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu,ldvt,nl,nr,sqre + real(dp), intent(inout) :: alpha,beta,d(*),u(ldu,*),vt(ldvt,*) + integer(ilp), intent(inout) :: idxq(*) + real(dp), intent(out) :: work(*) + end subroutine dlasd1 +#else + module procedure stdlib_dlasd1 +#endif +#:if WITH_QP + module procedure stdlib_qlasd1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork,& + work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu,ldvt,nl,nr,sqre + real(sp), intent(inout) :: alpha,beta,d(*),u(ldu,*),vt(ldvt,*) + integer(ilp), intent(inout) :: idxq(*) + real(sp), intent(out) :: work(*) + end subroutine slasd1 +#else + module procedure stdlib_slasd1 +#endif + end interface lasd1 + + + + !> This subroutine computes the square root of the I-th updated + !> eigenvalue of a positive symmetric rank-one modification to + !> a positive diagonal matrix whose entries are given as the squares + !> of the corresponding entries in the array d, and that + !> 0 <= D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) * diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + interface lasd4 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd4( n, i, d, z, delta, rho, sigma, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: rho,d(*),z(*) + real(dp), intent(out) :: sigma,delta(*),work(*) + end subroutine dlasd4 +#else + module procedure stdlib_dlasd4 +#endif +#:if WITH_QP + module procedure stdlib_qlasd4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd4( n, i, d, z, delta, rho, sigma, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: rho,d(*),z(*) + real(sp), intent(out) :: sigma,delta(*),work(*) + end subroutine slasd4 +#else + module procedure stdlib_slasd4 +#endif + end interface lasd4 + + + + !> This subroutine computes the square root of the I-th eigenvalue + !> of a positive symmetric rank-one modification of a 2-by-2 diagonal + !> matrix + !> diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal entries in the array D are assumed to satisfy + !> 0 <= D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + interface lasd5 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd5( i, d, z, delta, rho, dsigma, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i + real(dp), intent(out) :: dsigma,delta(2),work(2) + real(dp), intent(in) :: rho,d(2),z(2) + end subroutine dlasd5 +#else + module procedure stdlib_dlasd5 +#endif +#:if WITH_QP + module procedure stdlib_qlasd5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd5( i, d, z, delta, rho, dsigma, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i + real(sp), intent(out) :: dsigma,delta(2),work(2) + real(sp), intent(in) :: rho,d(2),z(2) + end subroutine slasd5 +#else + module procedure stdlib_slasd5 +#endif + end interface lasd5 + + + + !> LASD6: computes the SVD of an updated upper bidiagonal matrix B + !> obtained by merging two smaller ones by appending a row. This + !> routine is used only for the problem which requires all singular + !> values and optionally singular vector matrices in factored form. + !> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + !> A related subroutine, DLASD1, handles the case in which all singular + !> values and singular vectors of the bidiagonal matrix are desired. + !> LASD6 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The singular values of B can be computed using D1, D2, the first + !> components of all the right singular vectors of the lower block, and + !> the last components of all the right singular vectors of the upper + !> block. These components are stored and updated in VF and VL, + !> respectively, in LASD6. Hence U and VT are not explicitly + !> referenced. + !> The singular values are stored in D. The algorithm consists of two + !> stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or if there is a zero + !> in the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLASD7. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the roots of the + !> secular equation via the routine DLASD4 (as called by DLASD8). + !> This routine also updates VF and VL and computes the distances + !> between the updated singular values and the old singular + !> values. + !> LASD6 is called from DLASDA. + interface lasd6 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & + givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: givptr,info,k,givcol(ldgcol,*),iwork(*),perm(*) + + integer(ilp), intent(in) :: icompq,ldgcol,ldgnum,nl,nr,sqre + real(dp), intent(inout) :: alpha,beta,d(*),vf(*),vl(*) + real(dp), intent(out) :: c,s,difl(*),difr(*),givnum(ldgnum,*),poles(ldgnum,*),& + work(*),z(*) + integer(ilp), intent(inout) :: idxq(*) + end subroutine dlasd6 +#else + module procedure stdlib_dlasd6 +#endif +#:if WITH_QP + module procedure stdlib_qlasd6 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & + givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: givptr,info,k,givcol(ldgcol,*),iwork(*),perm(*) + + integer(ilp), intent(in) :: icompq,ldgcol,ldgnum,nl,nr,sqre + real(sp), intent(inout) :: alpha,beta,d(*),vf(*),vl(*) + real(sp), intent(out) :: c,s,difl(*),difr(*),givnum(ldgnum,*),poles(ldgnum,*),& + work(*),z(*) + integer(ilp), intent(inout) :: idxq(*) + end subroutine slasd6 +#else + module procedure stdlib_slasd6 +#endif + end interface lasd6 + + + + !> LASD7: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. There + !> are two ways in which deflation can occur: when two or more singular + !> values are close together or if there is a tiny entry in the Z + !> vector. For each such occurrence the order of the related + !> secular equation problem is reduced by one. + !> LASD7 is called from DLASD6. + interface lasd7 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & + beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: givptr,info,k,givcol(ldgcol,*),idx(*),idxp(*),& + perm(*) + integer(ilp), intent(in) :: icompq,ldgcol,ldgnum,nl,nr,sqre + real(dp), intent(in) :: alpha,beta + real(dp), intent(out) :: c,s,dsigma(*),givnum(ldgnum,*),vfw(*),vlw(*),z(*),zw(& + *) + integer(ilp), intent(inout) :: idxq(*) + real(dp), intent(inout) :: d(*),vf(*),vl(*) + end subroutine dlasd7 +#else + module procedure stdlib_dlasd7 +#endif +#:if WITH_QP + module procedure stdlib_qlasd7 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & + beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: givptr,info,k,givcol(ldgcol,*),idx(*),idxp(*),& + perm(*) + integer(ilp), intent(in) :: icompq,ldgcol,ldgnum,nl,nr,sqre + real(sp), intent(in) :: alpha,beta + real(sp), intent(out) :: c,s,dsigma(*),givnum(ldgnum,*),vfw(*),vlw(*),z(*),zw(& + *) + integer(ilp), intent(inout) :: idxq(*) + real(sp), intent(inout) :: d(*),vf(*),vl(*) + end subroutine slasd7 +#else + module procedure stdlib_slasd7 +#endif + end interface lasd7 + + + + !> LASD8: finds the square roots of the roots of the secular equation, + !> as defined by the values in DSIGMA and Z. It makes the appropriate + !> calls to DLASD4, and stores, for each element in D, the distance + !> to its two nearest poles (elements in DSIGMA). It also updates + !> the arrays VF and VL, the first and last components of all the + !> right singular vectors of the original bidiagonal matrix. + !> LASD8 is called from DLASD6. + interface lasd8 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,k,lddifr + integer(ilp), intent(out) :: info + real(dp), intent(out) :: d(*),difl(*),difr(lddifr,*),work(*) + real(dp), intent(inout) :: dsigma(*),vf(*),vl(*),z(*) + end subroutine dlasd8 +#else + module procedure stdlib_dlasd8 +#endif +#:if WITH_QP + module procedure stdlib_qlasd8 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,k,lddifr + integer(ilp), intent(out) :: info + real(sp), intent(out) :: d(*),difl(*),difr(lddifr,*),work(*) + real(sp), intent(inout) :: dsigma(*),vf(*),vl(*),z(*) + end subroutine slasd8 +#else + module procedure stdlib_slasd8 +#endif + end interface lasd8 + + + + !> Using a divide and conquer approach, LASDA: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + !> B with diagonal D and offdiagonal E, where M = N + SQRE. The + !> algorithm computes the singular values in the SVD B = U * S * VT. + !> The orthogonal matrices U and VT are optionally computed in + !> compact form. + !> A related subroutine, DLASD0, computes the singular values and + !> the singular vectors in explicit form. + interface lasda +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z,& + poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldgcol,ldu,n,smlsiz,sqre + integer(ilp), intent(out) :: info,givcol(ldgcol,*),givptr(*),iwork(*),k(*),& + perm(ldgcol,*) + real(dp), intent(out) :: c(*),difl(ldu,*),difr(ldu,*),givnum(ldu,*),poles(ldu,& + *),s(*),u(ldu,*),vt(ldu,*),work(*),z(ldu,*) + real(dp), intent(inout) :: d(*),e(*) + end subroutine dlasda +#else + module procedure stdlib_dlasda +#endif +#:if WITH_QP + module procedure stdlib_qlasda +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z,& + poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: icompq,ldgcol,ldu,n,smlsiz,sqre + integer(ilp), intent(out) :: info,givcol(ldgcol,*),givptr(*),iwork(*),k(*),& + perm(ldgcol,*) + real(sp), intent(out) :: c(*),difl(ldu,*),difr(ldu,*),givnum(ldu,*),poles(ldu,& + *),s(*),u(ldu,*),vt(ldu,*),work(*),z(ldu,*) + real(sp), intent(inout) :: d(*),e(*) + end subroutine slasda +#else + module procedure stdlib_slasda +#endif + end interface lasda + + + + !> LASDQ: computes the singular value decomposition (SVD) of a real + !> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + !> E, accumulating the transformations if desired. Letting B denote + !> the input bidiagonal matrix, the algorithm computes orthogonal + !> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose + !> of P). The singular values S are overwritten on D. + !> The input matrix U is changed to U * Q if desired. + !> The input matrix VT is changed to P**T * VT if desired. + !> The input matrix C is changed to Q**T * C if desired. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3, for a detailed description of the algorithm. + interface lasdq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, & + ldc, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,ldu,ldvt,n,ncc,ncvt,nru,sqre + real(dp), intent(inout) :: c(ldc,*),d(*),e(*),u(ldu,*),vt(ldvt,*) + real(dp), intent(out) :: work(*) + end subroutine dlasdq +#else + module procedure stdlib_dlasdq +#endif +#:if WITH_QP + module procedure stdlib_qlasdq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, & + ldc, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,ldu,ldvt,n,ncc,ncvt,nru,sqre + real(sp), intent(inout) :: c(ldc,*),d(*),e(*),u(ldu,*),vt(ldvt,*) + real(sp), intent(out) :: work(*) + end subroutine slasdq +#else + module procedure stdlib_slasdq +#endif + end interface lasdq + + + + !> LASET: initializes a 2-D array A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + interface laset +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claset( uplo, m, n, alpha, beta, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,m,n + complex(sp), intent(in) :: alpha,beta + complex(sp), intent(out) :: a(lda,*) + end subroutine claset +#else + module procedure stdlib_claset +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaset( uplo, m, n, alpha, beta, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(in) :: alpha,beta + real(dp), intent(out) :: a(lda,*) + end subroutine dlaset +#else + module procedure stdlib_dlaset +#endif +#:if WITH_QP + module procedure stdlib_qlaset +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaset( uplo, m, n, alpha, beta, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(in) :: alpha,beta + real(sp), intent(out) :: a(lda,*) + end subroutine slaset +#else + module procedure stdlib_slaset +#endif +#:if WITH_QP + module procedure stdlib_wlaset +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaset( uplo, m, n, alpha, beta, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,m,n + complex(dp), intent(in) :: alpha,beta + complex(dp), intent(out) :: a(lda,*) + end subroutine zlaset +#else + module procedure stdlib_zlaset +#endif + end interface laset + + + + !> LASQ1: computes the singular values of a real N-by-N bidiagonal + !> matrix with diagonal D and off-diagonal E. The singular values + !> are computed to high relative accuracy, in the absence of + !> denormalization, underflow and overflow. The algorithm was first + !> presented in + !> "Accurate singular values and differential qd algorithms" by K. V. + !> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + !> 1994, + !> and the present implementation is described in "An implementation of + !> the dqds Algorithm (Positive Case)", LAPACK Working Note. + interface lasq1 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasq1( n, d, e, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: work(*) + end subroutine dlasq1 +#else + module procedure stdlib_dlasq1 +#endif +#:if WITH_QP + module procedure stdlib_qlasq1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasq1( n, d, e, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: work(*) + end subroutine slasq1 +#else + module procedure stdlib_slasq1 +#endif + end interface lasq1 + + + + !> LASQ4: computes an approximation TAU to the smallest eigenvalue + !> using values of d from the previous transform. + interface lasq4 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasq4( i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn,dn1, dn2, tau, & + ttype, g ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i0,n0,n0in,pp + integer(ilp), intent(out) :: ttype + real(dp), intent(in) :: dmin,dmin1,dmin2,dn,dn1,dn2,z(*) + real(dp), intent(inout) :: g + real(dp), intent(out) :: tau + end subroutine dlasq4 +#else + module procedure stdlib_dlasq4 +#endif +#:if WITH_QP + module procedure stdlib_qlasq4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasq4( i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn,dn1, dn2, tau, & + ttype, g ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i0,n0,n0in,pp + integer(ilp), intent(out) :: ttype + real(sp), intent(in) :: dmin,dmin1,dmin2,dn,dn1,dn2,z(*) + real(sp), intent(inout) :: g + real(sp), intent(out) :: tau + end subroutine slasq4 +#else + module procedure stdlib_slasq4 +#endif + end interface lasq4 + + + + !> LASQ5: computes one dqds transform in ping-pong form, one + !> version for IEEE machines another for non IEEE machines. + interface lasq5 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2,dn, dnm1, & + dnm2, ieee, eps ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0,n0,pp + real(dp), intent(out) :: dmin,dmin1,dmin2,dn,dnm1,dnm2 + real(dp), intent(inout) :: tau,z(*) + real(dp), intent(in) :: sigma,eps + end subroutine dlasq5 +#else + module procedure stdlib_dlasq5 +#endif +#:if WITH_QP + module procedure stdlib_qlasq5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2,dn, dnm1, & + dnm2, ieee, eps ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0,n0,pp + real(sp), intent(out) :: dmin,dmin1,dmin2,dn,dnm1,dnm2 + real(sp), intent(inout) :: tau,z(*) + real(sp), intent(in) :: sigma,eps + end subroutine slasq5 +#else + module procedure stdlib_slasq5 +#endif + end interface lasq5 + + + + !> LASQ6: computes one dqd (shift equal to zero) transform in + !> ping-pong form, with protection against underflow and overflow. + interface lasq6 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasq6( i0, n0, z, pp, dmin, dmin1, dmin2, dn,dnm1, dnm2 ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i0,n0,pp + real(dp), intent(out) :: dmin,dmin1,dmin2,dn,dnm1,dnm2 + real(dp), intent(inout) :: z(*) + end subroutine dlasq6 +#else + module procedure stdlib_dlasq6 +#endif +#:if WITH_QP + module procedure stdlib_qlasq6 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasq6( i0, n0, z, pp, dmin, dmin1, dmin2, dn,dnm1, dnm2 ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: i0,n0,pp + real(sp), intent(out) :: dmin,dmin1,dmin2,dn,dnm1,dnm2 + real(sp), intent(inout) :: z(*) + end subroutine slasq6 +#else + module procedure stdlib_slasq6 +#endif + end interface lasq6 + + + + !> LASR: applies a sequence of real plane rotations to a complex matrix + !> A, from either the left or the right. + !> When SIDE = 'L', the transformation takes the form + !> A := P*A + !> and when SIDE = 'R', the transformation takes the form + !> A := A*P**T + !> where P is an orthogonal matrix consisting of a sequence of z plane + !> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + !> and P**T is the transpose of P. + !> When DIRECT = 'F' (Forward sequence), then + !> P = P(z-1) * ... * P(2) * P(1) + !> and when DIRECT = 'B' (Backward sequence), then + !> P = P(1) * P(2) * ... * P(z-1) + !> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + !> R(k) = ( c(k) s(k) ) + !> = ( -s(k) c(k) ). + !> When PIVOT = 'V' (Variable pivot), the rotation is performed + !> for the plane (k,k+1), i.e., P(k) has the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears as a rank-2 modification to the identity matrix in + !> rows and columns k and k+1. + !> When PIVOT = 'T' (Top pivot), the rotation is performed for the + !> plane (1,k+1), so P(k) has the form + !> P(k) = ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears in rows and columns 1 and k+1. + !> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + !> performed for the plane (k,z), giving P(k) the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> where R(k) appears in rows and columns k and z. The rotations are + !> performed without ever forming P(k) explicitly. + interface lasr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clasr( side, pivot, direct, m, n, c, s, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,pivot,side + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(in) :: c(*),s(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine clasr +#else + module procedure stdlib_clasr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasr( side, pivot, direct, m, n, c, s, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,pivot,side + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: c(*),s(*) + end subroutine dlasr +#else + module procedure stdlib_dlasr +#endif +#:if WITH_QP + module procedure stdlib_qlasr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasr( side, pivot, direct, m, n, c, s, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,pivot,side + integer(ilp), intent(in) :: lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: c(*),s(*) + end subroutine slasr +#else + module procedure stdlib_slasr +#endif +#:if WITH_QP + module procedure stdlib_wlasr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlasr( side, pivot, direct, m, n, c, s, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,pivot,side + integer(ilp), intent(in) :: lda,m,n + real(dp), intent(in) :: c(*),s(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlasr +#else + module procedure stdlib_zlasr +#endif + end interface lasr + + + + !> Sort the numbers in D in increasing order (if ID = 'I') or + !> in decreasing order (if ID = 'D' ). + !> Use Quick Sort, reverting to Insertion sort on arrays of + !> size <= 20. Dimension of STACK limits N to about 2**32. + interface lasrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasrt( id, n, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: id + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: d(*) + end subroutine dlasrt +#else + module procedure stdlib_dlasrt +#endif +#:if WITH_QP + module procedure stdlib_qlasrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasrt( id, n, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: id + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: d(*) + end subroutine slasrt +#else + module procedure stdlib_slasrt +#endif + end interface lasrt + + + + !> ! + !> + !> LASSQ: returns the values scl and smsq such that + !> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + !> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + !> assumed to be non-negative. + !> scale and sumsq must be supplied in SCALE and SUMSQ and + !> scl and smsq are overwritten on SCALE and SUMSQ respectively. + !> If scale * sqrt( sumsq ) > tbig then + !> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, + !> and if 0 < scale * sqrt( sumsq ) < tsml then + !> we require: scale <= sqrt( HUGE ) / ssml on entry, + !> where + !> tbig -- upper threshold for values whose square is representable; + !> sbig -- scaling constant for big numbers; \see la_constants.f90 + !> tsml -- lower threshold for values whose square is representable; + !> ssml -- scaling constant for small numbers; \see la_constants.f90 + !> and + !> TINY*EPS -- tiniest representable number; + !> HUGE -- biggest representable number. + interface lassq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine classq( n, x, incx, scl, sumsq ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(sp), intent(inout) :: scl,sumsq + complex(sp), intent(in) :: x(*) + end subroutine classq +#else + module procedure stdlib_classq +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlassq( n, x, incx, scl, sumsq ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(dp), intent(inout) :: scl,sumsq + real(dp), intent(in) :: x(*) + end subroutine dlassq +#else + module procedure stdlib_dlassq +#endif +#:if WITH_QP + module procedure stdlib_qlassq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slassq( n, x, incx, scl, sumsq ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(sp), intent(inout) :: scl,sumsq + real(sp), intent(in) :: x(*) + end subroutine slassq +#else + module procedure stdlib_slassq +#endif +#:if WITH_QP + module procedure stdlib_wlassq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlassq( n, x, incx, scl, sumsq ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(dp), intent(inout) :: scl,sumsq + complex(dp), intent(in) :: x(*) + end subroutine zlassq +#else + module procedure stdlib_zlassq +#endif + end interface lassq + + + + !> LASWLQ: computes a blocked Tall-Skinny LQ factorization of + !> a complex M-by-N matrix A for M <= N: + !> A = ( L 0 ) * Q, + !> where: + !> Q is a n-by-N orthogonal matrix, stored on exit in an implicit + !> form in the elements above the diagonal of the array A and in + !> the elements of the array T; + !> L is a lower-triangular M-by-M matrix stored on exit in + !> the elements on and below the diagonal of the array A. + !> 0 is a M-by-(N-M) zero matrix, if M < N, and is not stored. + interface laswlq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,lwork,ldt + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*),t(ldt,*) + end subroutine claswlq +#else + module procedure stdlib_claswlq +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,lwork,ldt + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*),t(ldt,*) + end subroutine dlaswlq +#else + module procedure stdlib_dlaswlq +#endif +#:if WITH_QP + module procedure stdlib_qlaswlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,lwork,ldt + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*),t(ldt,*) + end subroutine slaswlq +#else + module procedure stdlib_slaswlq +#endif +#:if WITH_QP + module procedure stdlib_wlaswlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,lwork,ldt + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*),t(ldt,*) + end subroutine zlaswlq +#else + module procedure stdlib_zlaswlq +#endif + end interface laswlq + + + + !> LASWP: performs a series of row interchanges on the matrix A. + !> One row interchange is initiated for each of rows K1 through K2 of A. + interface laswp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claswp( n, a, lda, k1, k2, ipiv, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k1,k2,lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine claswp +#else + module procedure stdlib_claswp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlaswp( n, a, lda, k1, k2, ipiv, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k1,k2,lda,n,ipiv(*) + real(dp), intent(inout) :: a(lda,*) + end subroutine dlaswp +#else + module procedure stdlib_dlaswp +#endif +#:if WITH_QP + module procedure stdlib_qlaswp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slaswp( n, a, lda, k1, k2, ipiv, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k1,k2,lda,n,ipiv(*) + real(sp), intent(inout) :: a(lda,*) + end subroutine slaswp +#else + module procedure stdlib_slaswp +#endif +#:if WITH_QP + module procedure stdlib_wlaswp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaswp( n, a, lda, k1, k2, ipiv, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,k1,k2,lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlaswp +#else + module procedure stdlib_zlaswp +#endif + end interface laswp + + + + !> LASYF: computes a partial factorization of a complex symmetric matrix + !> A using the Bunch-Kaufman diagonal pivoting method. The partial + !> factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**T denotes the transpose of U. + !> LASYF is an auxiliary routine called by CSYTRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + interface lasyf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + end subroutine clasyf +#else + module procedure stdlib_clasyf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(ldw,*) + end subroutine dlasyf +#else + module procedure stdlib_dlasyf +#endif +#:if WITH_QP + module procedure stdlib_qlasyf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(ldw,*) + end subroutine slasyf +#else + module procedure stdlib_slasyf +#endif +#:if WITH_QP + module procedure stdlib_wlasyf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + end subroutine zlasyf +#else + module procedure stdlib_zlasyf +#endif + end interface lasyf + + + + !> DLATRF_AA factorizes a panel of a complex symmetric matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + interface lasyf_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: m,nb,j1,lda,ldh + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*),h(ldh,*) + complex(sp), intent(out) :: work(*) + end subroutine clasyf_aa +#else + module procedure stdlib_clasyf_aa +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: m,nb,j1,lda,ldh + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*),h(ldh,*) + real(dp), intent(out) :: work(*) + end subroutine dlasyf_aa +#else + module procedure stdlib_dlasyf_aa +#endif +#:if WITH_QP + module procedure stdlib_qlasyf_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: m,nb,j1,lda,ldh + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*),h(ldh,*) + real(sp), intent(out) :: work(*) + end subroutine slasyf_aa +#else + module procedure stdlib_slasyf_aa +#endif +#:if WITH_QP + module procedure stdlib_wlasyf_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: m,nb,j1,lda,ldh + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*),h(ldh,*) + complex(dp), intent(out) :: work(*) + end subroutine zlasyf_aa +#else + module procedure stdlib_zlasyf_aa +#endif + end interface lasyf_aa + + + + !> LASYF_RK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> LASYF_RK is an auxiliary routine called by CSYTRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + interface lasyf_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*),w(ldw,*) + end subroutine clasyf_rk +#else + module procedure stdlib_clasyf_rk +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*),w(ldw,*) + end subroutine dlasyf_rk +#else + module procedure stdlib_dlasyf_rk +#endif +#:if WITH_QP + module procedure stdlib_qlasyf_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*),w(ldw,*) + end subroutine slasyf_rk +#else + module procedure stdlib_slasyf_rk +#endif +#:if WITH_QP + module procedure stdlib_wlasyf_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*),w(ldw,*) + end subroutine zlasyf_rk +#else + module procedure stdlib_zlasyf_rk +#endif + end interface lasyf_rk + + + + !> LASYF_ROOK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> LASYF_ROOK is an auxiliary routine called by CSYTRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + interface lasyf_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + end subroutine clasyf_rook +#else + module procedure stdlib_clasyf_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(ldw,*) + end subroutine dlasyf_rook +#else + module procedure stdlib_dlasyf_rook +#endif +#:if WITH_QP + module procedure stdlib_qlasyf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(ldw,*) + end subroutine slasyf_rook +#else + module procedure stdlib_slasyf_rook +#endif +#:if WITH_QP + module procedure stdlib_wlasyf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,kb,ipiv(*) + integer(ilp), intent(in) :: lda,ldw,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + end subroutine zlasyf_rook +#else + module procedure stdlib_zlasyf_rook +#endif + end interface lasyf_rook + + + + !> LATBS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular band matrix. Here A**T denotes the transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine CTBSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + interface latbs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(out) :: scale + real(sp), intent(inout) :: cnorm(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: x(*) + end subroutine clatbs +#else + module procedure stdlib_clatbs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(out) :: scale + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: cnorm(*),x(*) + end subroutine dlatbs +#else + module procedure stdlib_dlatbs +#endif +#:if WITH_QP + module procedure stdlib_qlatbs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(out) :: scale + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: cnorm(*),x(*) + end subroutine slatbs +#else + module procedure stdlib_slatbs +#endif +#:if WITH_QP + module procedure stdlib_wlatbs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(out) :: scale + real(dp), intent(inout) :: cnorm(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: x(*) + end subroutine zlatbs +#else + module procedure stdlib_zlatbs +#endif + end interface latbs + + + + !> LATDF: computes the contribution to the reciprocal Dif-estimate + !> by solving for x in Z * x = b, where b is chosen such that the norm + !> of x is as large as possible. It is assumed that LU decomposition + !> of Z has been computed by CGETC2. On entry RHS = f holds the + !> contribution from earlier solved sub-systems, and on return RHS = x. + !> The factorization of Z returned by CGETC2 has the form + !> Z = P * L * U * Q, where P and Q are permutation matrices. L is lower + !> triangular with unit diagonal elements and U is upper triangular. + interface latdf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ijob,ldz,n,ipiv(*),jpiv(*) + real(sp), intent(inout) :: rdscal,rdsum + complex(sp), intent(inout) :: rhs(*),z(ldz,*) + end subroutine clatdf +#else + module procedure stdlib_clatdf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ijob,ldz,n,ipiv(*),jpiv(*) + real(dp), intent(inout) :: rdscal,rdsum,rhs(*),z(ldz,*) + end subroutine dlatdf +#else + module procedure stdlib_dlatdf +#endif +#:if WITH_QP + module procedure stdlib_qlatdf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ijob,ldz,n,ipiv(*),jpiv(*) + real(sp), intent(inout) :: rdscal,rdsum,rhs(*),z(ldz,*) + end subroutine slatdf +#else + module procedure stdlib_slatdf +#endif +#:if WITH_QP + module procedure stdlib_wlatdf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ijob,ldz,n,ipiv(*),jpiv(*) + real(dp), intent(inout) :: rdscal,rdsum + complex(dp), intent(inout) :: rhs(*),z(ldz,*) + end subroutine zlatdf +#else + module procedure stdlib_zlatdf +#endif + end interface latdf + + + + !> LATPS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular matrix stored in packed form. Here A**T denotes the + !> transpose of A, A**H denotes the conjugate transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine CTPSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + interface latps +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: scale + real(sp), intent(inout) :: cnorm(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: x(*) + end subroutine clatps +#else + module procedure stdlib_clatps +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: scale + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: cnorm(*),x(*) + end subroutine dlatps +#else + module procedure stdlib_dlatps +#endif +#:if WITH_QP + module procedure stdlib_qlatps +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: scale + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: cnorm(*),x(*) + end subroutine slatps +#else + module procedure stdlib_slatps +#endif +#:if WITH_QP + module procedure stdlib_wlatps +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: scale + real(dp), intent(inout) :: cnorm(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: x(*) + end subroutine zlatps +#else + module procedure stdlib_zlatps +#endif + end interface latps + + + + !> LATRD: reduces NB rows and columns of a complex Hermitian matrix A to + !> Hermitian tridiagonal form by a unitary similarity + !> transformation Q**H * A * Q, and returns the matrices V and W which are + !> needed to apply the transformation to the unreduced part of A. + !> If UPLO = 'U', LATRD reduces the last NB rows and columns of a + !> matrix, of which the upper triangle is supplied; + !> if UPLO = 'L', LATRD reduces the first NB rows and columns of a + !> matrix, of which the lower triangle is supplied. + !> This is an auxiliary routine called by CHETRD. + interface latrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldw,n,nb + real(sp), intent(out) :: e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),w(ldw,*) + end subroutine clatrd +#else + module procedure stdlib_clatrd +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldw,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*),tau(*),w(ldw,*) + end subroutine dlatrd +#else + module procedure stdlib_dlatrd +#endif +#:if WITH_QP + module procedure stdlib_qlatrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldw,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*),tau(*),w(ldw,*) + end subroutine slatrd +#else + module procedure stdlib_slatrd +#endif +#:if WITH_QP + module procedure stdlib_wlatrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda,ldw,n,nb + real(dp), intent(out) :: e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),w(ldw,*) + end subroutine zlatrd +#else + module procedure stdlib_zlatrd +#endif + end interface latrd + + + + !> LATRS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow. Here A is an upper or lower + !> triangular matrix, A**T denotes the transpose of A, A**H denotes the + !> conjugate transpose of A, x and b are n-element vectors, and s is a + !> scaling factor, usually less than or equal to 1, chosen so that the + !> components of x will be less than the overflow threshold. If the + !> unscaled problem will not cause overflow, the Level 2 BLAS routine + !> CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + !> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + interface latrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: scale + real(sp), intent(inout) :: cnorm(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + end subroutine clatrs +#else + module procedure stdlib_clatrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: scale + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: cnorm(*),x(*) + end subroutine dlatrs +#else + module procedure stdlib_dlatrs +#endif +#:if WITH_QP + module procedure stdlib_qlatrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: scale + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: cnorm(*),x(*) + end subroutine slatrs +#else + module procedure stdlib_slatrs +#endif +#:if WITH_QP + module procedure stdlib_wlatrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,normin,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: scale + real(dp), intent(inout) :: cnorm(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + end subroutine zlatrs +#else + module procedure stdlib_zlatrs +#endif + end interface latrs + + + + !> LATRZ: factors the M-by-(M+L) complex upper trapezoidal matrix + !> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means + !> of unitary transformations, where Z is an (M+L)-by-(M+L) unitary + !> matrix and, R and A1 are M-by-M upper triangular matrices. + interface latrz +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatrz( m, n, l, a, lda, tau, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: l,lda,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine clatrz +#else + module procedure stdlib_clatrz +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatrz( m, n, l, a, lda, tau, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: l,lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dlatrz +#else + module procedure stdlib_dlatrz +#endif +#:if WITH_QP + module procedure stdlib_qlatrz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatrz( m, n, l, a, lda, tau, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: l,lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine slatrz +#else + module procedure stdlib_slatrz +#endif +#:if WITH_QP + module procedure stdlib_wlatrz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatrz( m, n, l, a, lda, tau, work ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: l,lda,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine zlatrz +#else + module procedure stdlib_zlatrz +#endif + end interface latrz + + + + !> LATSQR: computes a blocked Tall-Skinny QR factorization of + !> a complex M-by-N matrix A for M >= N: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix, stored on exit in an implicit + !> form in the elements below the diagonal of the array A and in + !> the elements of the array T; + !> R is an upper-triangular N-by-N matrix, stored on exit in + !> the elements on and above the diagonal of the array A. + !> 0 is a (M-N)-by-N zero matrix, and is not stored. + interface latsqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,ldt,lwork + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*),t(ldt,*) + end subroutine clatsqr +#else + module procedure stdlib_clatsqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,ldt,lwork + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*),t(ldt,*) + end subroutine dlatsqr +#else + module procedure stdlib_dlatsqr +#endif +#:if WITH_QP + module procedure stdlib_qlatsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,ldt,lwork + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*),t(ldt,*) + end subroutine slatsqr +#else + module procedure stdlib_slatsqr +#endif +#:if WITH_QP + module procedure stdlib_wlatsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n,mb,nb,ldt,lwork + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*),t(ldt,*) + end subroutine zlatsqr +#else + module procedure stdlib_zlatsqr +#endif + end interface latsqr + + + + !> LAUNHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine LAUNHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + interface launhr_col_getrfnp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine claunhr_col_getrfnp( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: d(*) + end subroutine claunhr_col_getrfnp +#else + module procedure stdlib_claunhr_col_getrfnp +#endif +#:if WITH_QP + module procedure stdlib_wlaunhr_col_getrfnp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlaunhr_col_getrfnp( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: d(*) + end subroutine zlaunhr_col_getrfnp +#else + module procedure stdlib_zlaunhr_col_getrfnp +#endif + end interface launhr_col_getrfnp + + + + !> LAUNHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> LAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine CLAUNHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, LAUNHR_COL_GETRFNP2 + !> is self-sufficient and can be used without CLAUNHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + interface launhr_col_getrfnp2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine claunhr_col_getrfnp2( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: d(*) + end subroutine claunhr_col_getrfnp2 +#else + module procedure stdlib_claunhr_col_getrfnp2 +#endif +#:if WITH_QP + module procedure stdlib_wlaunhr_col_getrfnp2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine zlaunhr_col_getrfnp2( m, n, a, lda, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: d(*) + end subroutine zlaunhr_col_getrfnp2 +#else + module procedure stdlib_zlaunhr_col_getrfnp2 +#endif + end interface launhr_col_getrfnp2 + + + + !> LAUUM: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the blocked form of the algorithm, calling Level 3 BLAS. + interface lauum +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine clauum( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine clauum +#else + module procedure stdlib_clauum +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dlauum( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dlauum +#else + module procedure stdlib_dlauum +#endif +#:if WITH_QP + module procedure stdlib_qlauum +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine slauum( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + end subroutine slauum +#else + module procedure stdlib_slauum +#endif +#:if WITH_QP + module procedure stdlib_wlauum +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zlauum( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zlauum +#else + module procedure stdlib_zlauum +#endif + end interface lauum + + + + !> OPGTR: generates a real orthogonal matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> DSPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + interface opgtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dopgtr( uplo, n, ap, tau, q, ldq, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq,n + real(dp), intent(in) :: ap(*),tau(*) + real(dp), intent(out) :: q(ldq,*),work(*) + end subroutine dopgtr +#else + module procedure stdlib_dopgtr +#endif +#:if WITH_QP + module procedure stdlib_qopgtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sopgtr( uplo, n, ap, tau, q, ldq, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq,n + real(sp), intent(in) :: ap(*),tau(*) + real(sp), intent(out) :: q(ldq,*),work(*) + end subroutine sopgtr +#else + module procedure stdlib_sopgtr +#endif + end interface opgtr + + + + !> OPMTR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by DSPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + interface opmtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dopmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,m,n + real(dp), intent(inout) :: ap(*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dopmtr +#else + module procedure stdlib_dopmtr +#endif +#:if WITH_QP + module procedure stdlib_qopmtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sopmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,m,n + real(sp), intent(inout) :: ap(*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sopmtr +#else + module procedure stdlib_sopmtr +#endif + end interface opmtr + + + + !> ORBDB: simultaneously bidiagonalizes the blocks of an M-by-M + !> partitioned orthogonal matrix X: + !> [ B11 | B12 0 0 ] + !> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**T + !> X = [-----------] = [---------] [----------------] [---------] . + !> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] + !> [ 0 | 0 0 I ] + !> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is + !> not the case, then X must be transposed and/or permuted. This can be + !> done in constant time using the TRANS and SIGNS options. See DORCSD + !> for details.) + !> The orthogonal matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- + !> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are + !> represented implicitly by Householder vectors. + !> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + interface orbdb +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dorbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: signs,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11,ldx12,ldx21,ldx22,lwork,m,p,q + real(dp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),tauq2(*),& + work(*) + real(dp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(ldx22,*) + + end subroutine dorbdb +#else + module procedure stdlib_dorbdb +#endif +#:if WITH_QP + module procedure stdlib_qorbdb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sorbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: signs,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11,ldx12,ldx21,ldx22,lwork,m,p,q + real(sp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),tauq2(*),& + work(*) + real(sp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(ldx22,*) + + end subroutine sorbdb +#else + module procedure stdlib_sorbdb +#endif + end interface orbdb + + + + !> ORBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines DORBDB2, DORBDB3, and DORBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + interface orbdb1 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),work(*) + + real(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine dorbdb1 +#else + module procedure stdlib_dorbdb1 +#endif +#:if WITH_QP + module procedure stdlib_qorbdb1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),work(*) + + real(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine sorbdb1 +#else + module procedure stdlib_sorbdb1 +#endif + end interface orbdb1 + + + + !> ORBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines DORBDB1, DORBDB3, and DORBDB4 handle cases in + !> which P is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + interface orbdb2 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),work(*) + + real(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine dorbdb2 +#else + module procedure stdlib_dorbdb2 +#endif +#:if WITH_QP + module procedure stdlib_qorbdb2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),work(*) + + real(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine sorbdb2 +#else + module procedure stdlib_sorbdb2 +#endif + end interface orbdb2 + + + + !> ORBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines DORBDB1, DORBDB2, and DORBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + interface orbdb3 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),work(*) + + real(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine dorbdb3 +#else + module procedure stdlib_dorbdb3 +#endif +#:if WITH_QP + module procedure stdlib_qorbdb3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*),taup1(*),taup2(*),tauq1(*),work(*) + + real(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine sorbdb3 +#else + module procedure stdlib_sorbdb3 +#endif + end interface orbdb3 + + + + !> ORBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines DORBDB1, DORBDB2, and DORBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + interface orbdb4 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, phantom, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*),phantom(*),taup1(*),taup2(*),tauq1(*)& + ,work(*) + real(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine dorbdb4 +#else + module procedure stdlib_dorbdb4 +#endif +#:if WITH_QP + module procedure stdlib_qorbdb4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, phantom, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*),phantom(*),taup1(*),taup2(*),tauq1(*)& + ,work(*) + real(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine sorbdb4 +#else + module procedure stdlib_sorbdb4 +#endif + end interface orbdb4 + + + + !> ORBDB5: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then some other vector from the orthogonal complement + !> is returned. This vector is chosen in an arbitrary but deterministic + !> way. + interface orbdb5 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: x1(*),x2(*) + end subroutine dorbdb5 +#else + module procedure stdlib_dorbdb5 +#endif +#:if WITH_QP + module procedure stdlib_qorbdb5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: x1(*),x2(*) + end subroutine sorbdb5 +#else + module procedure stdlib_sorbdb5 +#endif + end interface orbdb5 + + + + !> ORBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + interface orbdb6 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: x1(*),x2(*) + end subroutine dorbdb6 +#else + module procedure stdlib_dorbdb6 +#endif +#:if WITH_QP + module procedure stdlib_qorbdb6 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: x1(*),x2(*) + end subroutine sorbdb6 +#else + module procedure stdlib_sorbdb6 +#endif + end interface orbdb6 + + + + !> ORCSD: computes the CS decomposition of an M-by-M partitioned + !> orthogonal matrix X: + !> [ I 0 0 | 0 0 0 ] + !> [ 0 C 0 | 0 -S 0 ] + !> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**T + !> X = [-----------] = [---------] [---------------------] [---------] . + !> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] + !> [ 0 S 0 | 0 C 0 ] + !> [ 0 0 I | 0 0 0 ] + !> X11 is P-by-Q. The orthogonal matrices U1, U2, V1, and V2 are P-by-P, + !> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are + !> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in + !> which R = MIN(P,M-P,Q,M-Q). + interface orcsd +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine dorcsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, & + x11, ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, & + ldv1t, v2t,ldv2t, work, lwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,signs,trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,ldx11,ldx12,ldx21,ldx22,& + lwork,m,p,q + real(dp), intent(out) :: theta(*),u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),v2t(& + ldv2t,*),work(*) + real(dp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(ldx22,*) + + end subroutine dorcsd +#else + module procedure stdlib_dorcsd +#endif +#:if WITH_QP + module procedure stdlib_qorcsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine sorcsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, & + x11, ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, & + ldv1t, v2t,ldv2t, work, lwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,signs,trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,ldx11,ldx12,ldx21,ldx22,& + lwork,m,p,q + real(sp), intent(out) :: theta(*),u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),v2t(& + ldv2t,*),work(*) + real(sp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(ldx22,*) + + end subroutine sorcsd +#else + module procedure stdlib_sorcsd +#endif + end interface orcsd + + + + !> ORCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The orthogonal matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + interface orcsd2by1 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dorcsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta,& + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,lwork,ldx11,ldx21,m,p,q + real(dp), intent(out) :: theta(*),u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),work(*) + + real(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine dorcsd2by1 +#else + module procedure stdlib_dorcsd2by1 +#endif +#:if WITH_QP + module procedure stdlib_qorcsd2by1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sorcsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta,& + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,lwork,ldx11,ldx21,m,p,q + real(sp), intent(out) :: theta(*),u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),work(*) + + real(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine sorcsd2by1 +#else + module procedure stdlib_sorcsd2by1 +#endif + end interface orcsd2by1 + + + + !> ORG2L: generates an m by n real matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. + interface org2l +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorg2l( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorg2l +#else + module procedure stdlib_dorg2l +#endif +#:if WITH_QP + module procedure stdlib_qorg2l +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorg2l( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorg2l +#else + module procedure stdlib_sorg2l +#endif + end interface org2l + + + + !> ORG2R: generates an m by n real matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. + interface org2r +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorg2r( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorg2r +#else + module procedure stdlib_dorg2r +#endif +#:if WITH_QP + module procedure stdlib_qorg2r +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorg2r( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorg2r +#else + module procedure stdlib_sorg2r +#endif + end interface org2r + + + + !> ORGBR: generates one of the real orthogonal matrices Q or P**T + !> determined by DGEBRD when reducing a real matrix A to bidiagonal + !> form: A = Q * B * P**T. Q and P**T are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and ORGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and ORGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + !> is of order N: + !> if k < n, P**T = G(k) . . . G(2) G(1) and ORGBR returns the first m + !> rows of P**T, where n >= m >= k; + !> if k >= n, P**T = G(n-1) . . . G(2) G(1) and ORGBR returns P**T as + !> an N-by-N matrix. + interface orgbr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorgbr +#else + module procedure stdlib_dorgbr +#endif +#:if WITH_QP + module procedure stdlib_qorgbr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorgbr +#else + module procedure stdlib_sorgbr +#endif + end interface orgbr + + + + !> ORGHR: generates a real orthogonal matrix Q which is defined as the + !> product of IHI-ILO elementary reflectors of order N, as returned by + !> DGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + interface orghr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorghr +#else + module procedure stdlib_dorghr +#endif +#:if WITH_QP + module procedure stdlib_qorghr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorghr +#else + module procedure stdlib_sorghr +#endif + end interface orghr + + + + !> ORGLQ: generates an M-by-N real matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGELQF. + interface orglq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorglq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorglq +#else + module procedure stdlib_dorglq +#endif +#:if WITH_QP + module procedure stdlib_qorglq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorglq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorglq +#else + module procedure stdlib_sorglq +#endif + end interface orglq + + + + !> ORGQL: generates an M-by-N real matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. + interface orgql +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgql( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorgql +#else + module procedure stdlib_dorgql +#endif +#:if WITH_QP + module procedure stdlib_qorgql +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgql( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorgql +#else + module procedure stdlib_sorgql +#endif + end interface orgql + + + + !> ORGQR: generates an M-by-N real matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. + interface orgqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgqr( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorgqr +#else + module procedure stdlib_dorgqr +#endif +#:if WITH_QP + module procedure stdlib_qorgqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgqr( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorgqr +#else + module procedure stdlib_sorgqr +#endif + end interface orgqr + + + + !> ORGRQ: generates an M-by-N real matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGERQF. + interface orgrq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgrq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorgrq +#else + module procedure stdlib_dorgrq +#endif +#:if WITH_QP + module procedure stdlib_qorgrq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgrq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorgrq +#else + module procedure stdlib_sorgrq +#endif + end interface orgrq + + + + !> ORGTR: generates a real orthogonal matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> DSYTRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + interface orgtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgtr( uplo, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorgtr +#else + module procedure stdlib_dorgtr +#endif +#:if WITH_QP + module procedure stdlib_qorgtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgtr( uplo, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorgtr +#else + module procedure stdlib_sorgtr +#endif + end interface orgtr + + + + !> ORGTSQR: generates an M-by-N real matrix Q_out with orthonormal columns, + !> which are the first N columns of a product of real orthogonal + !> matrices of order M which are returned by DLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for DLATSQR. + interface orgtsqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(out) :: work(*) + end subroutine dorgtsqr +#else + module procedure stdlib_dorgtsqr +#endif +#:if WITH_QP + module procedure stdlib_qorgtsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(out) :: work(*) + end subroutine sorgtsqr +#else + module procedure stdlib_sorgtsqr +#endif + end interface orgtsqr + + + + !> ORGTSQR_ROW: generates an M-by-N real matrix Q_out with + !> orthonormal columns from the output of DLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by DLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of DLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine DLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which DLATSQR generates the output blocks. + interface orgtsqr_row +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(out) :: work(*) + end subroutine dorgtsqr_row +#else + module procedure stdlib_dorgtsqr_row +#endif +#:if WITH_QP + module procedure stdlib_qorgtsqr_row +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(out) :: work(*) + end subroutine sorgtsqr_row +#else + module procedure stdlib_sorgtsqr_row +#endif + end interface orgtsqr_row + + + + !> ORHR_COL: takes an M-by-N real matrix Q_in with orthonormal columns + !> as input, stored in A, and performs Householder Reconstruction (HR), + !> i.e. reconstructs Householder vectors V(i) implicitly representing + !> another M-by-N matrix Q_out, with the property that Q_in = Q_out*S, + !> where S is an N-by-N diagonal matrix with diagonal entries + !> equal to +1 or -1. The Householder vectors (columns V(i) of V) are + !> stored in A on output, and the diagonal entries of S are stored in D. + !> Block reflectors are also returned in T + !> (same output format as DGEQRT). + interface orhr_col +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorhr_col( m, n, nb, a, lda, t, ldt, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*),t(ldt,*) + end subroutine dorhr_col +#else + module procedure stdlib_dorhr_col +#endif +#:if WITH_QP + module procedure stdlib_qorhr_col +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorhr_col( m, n, nb, a, lda, t, ldt, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*),t(ldt,*) + end subroutine sorhr_col +#else + module procedure stdlib_sorhr_col +#endif + end interface orhr_col + + + + !> ORM2L: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T * C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + interface orm2l +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorm2l +#else + module procedure stdlib_dorm2l +#endif +#:if WITH_QP + module procedure stdlib_qorm2l +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorm2l +#else + module procedure stdlib_sorm2l +#endif + end interface orm2l + + + + !> ORM2R: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + interface orm2r +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dorm2r +#else + module procedure stdlib_dorm2r +#endif +#:if WITH_QP + module procedure stdlib_qorm2r +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sorm2r +#else + module procedure stdlib_sorm2r +#endif + end interface orm2r + + + + !> If VECT = 'Q', ORMBR: overwrites the general real M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> If VECT = 'P', ORMBR overwrites the general real M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'T': P**T * C C * P**T + !> Here Q and P**T are the orthogonal matrices determined by DGEBRD when + !> reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + !> P**T are defined as products of elementary reflectors H(i) and G(i) + !> respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the orthogonal matrix Q or P**T that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + interface ormbr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormbr +#else + module procedure stdlib_dormbr +#endif +#:if WITH_QP + module procedure stdlib_qormbr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormbr +#else + module procedure stdlib_sormbr +#endif + end interface ormbr + + + + !> ORMHR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> IHI-ILO elementary reflectors, as returned by DGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + interface ormhr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(in) :: ihi,ilo,lda,ldc,lwork,m,n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormhr +#else + module procedure stdlib_dormhr +#endif +#:if WITH_QP + module procedure stdlib_qormhr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(in) :: ihi,ilo,lda,ldc,lwork,m,n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormhr +#else + module procedure stdlib_sormhr +#endif + end interface ormhr + + + + !> ORMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface ormlq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormlq +#else + module procedure stdlib_dormlq +#endif +#:if WITH_QP + module procedure stdlib_qormlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormlq +#else + module procedure stdlib_sormlq +#endif + end interface ormlq + + + + !> ORMQL: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface ormql +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormql +#else + module procedure stdlib_dormql +#endif +#:if WITH_QP + module procedure stdlib_qormql +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormql +#else + module procedure stdlib_sormql +#endif + end interface ormql + + + + !> ORMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface ormqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormqr +#else + module procedure stdlib_dormqr +#endif +#:if WITH_QP + module procedure stdlib_qormqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormqr +#else + module procedure stdlib_sormqr +#endif + end interface ormqr + + + + !> ORMRQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface ormrq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormrq +#else + module procedure stdlib_dormrq +#endif +#:if WITH_QP + module procedure stdlib_qormrq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormrq +#else + module procedure stdlib_sormrq +#endif + end interface ormrq + + + + !> ORMRZ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface ormrz +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,l,lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormrz +#else + module procedure stdlib_dormrz +#endif +#:if WITH_QP + module procedure stdlib_qormrz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,l,lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormrz +#else + module procedure stdlib_sormrz +#endif + end interface ormrz + + + + !> ORMTR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by DSYTRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + interface ormtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dormtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldc,lwork,m,n + real(dp), intent(inout) :: a(lda,*),c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + end subroutine dormtr +#else + module procedure stdlib_dormtr +#endif +#:if WITH_QP + module procedure stdlib_qormtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sormtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldc,lwork,m,n + real(sp), intent(inout) :: a(lda,*),c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + end subroutine sormtr +#else + module procedure stdlib_sormtr +#endif + end interface ormtr + + + + !> PBCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite band matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> CPBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface pbcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(out) :: work(*) + end subroutine cpbcon +#else + module procedure stdlib_cpbcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(in) :: anorm,ab(ldab,*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dpbcon +#else + module procedure stdlib_dpbcon +#endif +#:if WITH_QP + module procedure stdlib_qpbcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(in) :: anorm,ab(ldab,*) + real(sp), intent(out) :: rcond,work(*) + end subroutine spbcon +#else + module procedure stdlib_spbcon +#endif +#:if WITH_QP + module procedure stdlib_wpbcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(out) :: work(*) + end subroutine zpbcon +#else + module procedure stdlib_zpbcon +#endif + end interface pbcon + + + + !> PBEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite band matrix A and reduce its condition + !> number (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + interface pbequ +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(out) :: amax,scond,s(*) + complex(sp), intent(in) :: ab(ldab,*) + end subroutine cpbequ +#else + module procedure stdlib_cpbequ +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(out) :: amax,scond,s(*) + real(dp), intent(in) :: ab(ldab,*) + end subroutine dpbequ +#else + module procedure stdlib_dpbequ +#endif +#:if WITH_QP + module procedure stdlib_qpbequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(out) :: amax,scond,s(*) + real(sp), intent(in) :: ab(ldab,*) + end subroutine spbequ +#else + module procedure stdlib_spbequ +#endif +#:if WITH_QP + module procedure stdlib_wpbequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(out) :: amax,scond,s(*) + complex(dp), intent(in) :: ab(ldab,*) + end subroutine zpbequ +#else + module procedure stdlib_zpbequ +#endif + end interface pbequ + + + + !> PBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and banded, and provides error bounds and backward error estimates + !> for the solution. + interface pbrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, & + ferr, berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldafb,ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cpbrfs +#else + module procedure stdlib_cpbrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, & + ferr, berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldafb,ldb,ldx,n,nrhs + real(dp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dpbrfs +#else + module procedure stdlib_dpbrfs +#endif +#:if WITH_QP + module procedure stdlib_qpbrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, & + ferr, berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldafb,ldb,ldx,n,nrhs + real(sp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine spbrfs +#else + module procedure stdlib_spbrfs +#endif +#:if WITH_QP + module procedure stdlib_wpbrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, & + ferr, berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldafb,ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: ab(ldab,*),afb(ldafb,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zpbrfs +#else + module procedure stdlib_zpbrfs +#endif + end interface pbrfs + + + + !> PBSTF: computes a split Cholesky factorization of a complex + !> Hermitian positive definite band matrix A. + !> This routine is designed to be used in conjunction with CHBGST. + !> The factorization has the form A = S**H*S where S is a band matrix + !> of the same bandwidth as A and the following structure: + !> S = ( U ) + !> ( M L ) + !> where U is upper triangular of order m = (n+kd)/2, and L is lower + !> triangular of order n-m. + interface pbstf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbstf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + complex(sp), intent(inout) :: ab(ldab,*) + end subroutine cpbstf +#else + module procedure stdlib_cpbstf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbstf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(inout) :: ab(ldab,*) + end subroutine dpbstf +#else + module procedure stdlib_dpbstf +#endif +#:if WITH_QP + module procedure stdlib_qpbstf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbstf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(inout) :: ab(ldab,*) + end subroutine spbstf +#else + module procedure stdlib_spbstf +#endif +#:if WITH_QP + module procedure stdlib_wpbstf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbstf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + complex(dp), intent(inout) :: ab(ldab,*) + end subroutine zpbstf +#else + module procedure stdlib_zpbstf +#endif + end interface pbstf + + + + !> PBSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular band matrix, and L is a lower + !> triangular band matrix, with the same number of superdiagonals or + !> subdiagonals as A. The factored form of A is then used to solve the + !> system of equations A * X = B. + interface pbsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + complex(sp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine cpbsv +#else + module procedure stdlib_cpbsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + real(dp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine dpbsv +#else + module procedure stdlib_dpbsv +#endif +#:if WITH_QP + module procedure stdlib_qpbsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + real(sp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine spbsv +#else + module procedure stdlib_spbsv +#endif +#:if WITH_QP + module procedure stdlib_wpbsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + complex(dp), intent(inout) :: ab(ldab,*),b(ldb,*) + end subroutine zpbsv +#else + module procedure stdlib_zpbsv +#endif + end interface pbsv + + + + !> PBTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + interface pbtrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbtrf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + complex(sp), intent(inout) :: ab(ldab,*) + end subroutine cpbtrf +#else + module procedure stdlib_cpbtrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbtrf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(inout) :: ab(ldab,*) + end subroutine dpbtrf +#else + module procedure stdlib_dpbtrf +#endif +#:if WITH_QP + module procedure stdlib_qpbtrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbtrf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(inout) :: ab(ldab,*) + end subroutine spbtrf +#else + module procedure stdlib_spbtrf +#endif +#:if WITH_QP + module procedure stdlib_wpbtrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbtrf( uplo, n, kd, ab, ldab, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + complex(dp), intent(inout) :: ab(ldab,*) + end subroutine zpbtrf +#else + module procedure stdlib_zpbtrf +#endif + end interface pbtrf + + + + !> PBTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite band matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by CPBTRF. + interface pbtrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine cpbtrs +#else + module procedure stdlib_cpbtrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dpbtrs +#else + module procedure stdlib_dpbtrs +#endif +#:if WITH_QP + module procedure stdlib_qpbtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine spbtrs +#else + module procedure stdlib_spbtrs +#endif +#:if WITH_QP + module procedure stdlib_wpbtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zpbtrs +#else + module procedure stdlib_zpbtrs +#endif + end interface pbtrs + + + + !> PFTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + interface pftrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpftrf( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(0:*) + end subroutine cpftrf +#else + module procedure stdlib_cpftrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpftrf( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: a(0:*) + end subroutine dpftrf +#else + module procedure stdlib_dpftrf +#endif +#:if WITH_QP + module procedure stdlib_qpftrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spftrf( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: a(0:*) + end subroutine spftrf +#else + module procedure stdlib_spftrf +#endif +#:if WITH_QP + module procedure stdlib_wpftrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpftrf( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(0:*) + end subroutine zpftrf +#else + module procedure stdlib_zpftrf +#endif + end interface pftrf + + + + !> PFTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by CPFTRF. + interface pftri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpftri( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: a(0:*) + end subroutine cpftri +#else + module procedure stdlib_cpftri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpftri( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: a(0:*) + end subroutine dpftri +#else + module procedure stdlib_dpftri +#endif +#:if WITH_QP + module procedure stdlib_qpftri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spftri( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: a(0:*) + end subroutine spftri +#else + module procedure stdlib_spftri +#endif +#:if WITH_QP + module procedure stdlib_wpftri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpftri( transr, uplo, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: a(0:*) + end subroutine zpftri +#else + module procedure stdlib_zpftri +#endif + end interface pftri + + + + !> PFTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by CPFTRF. + interface pftrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(in) :: a(0:*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine cpftrs +#else + module procedure stdlib_cpftrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(in) :: a(0:*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dpftrs +#else + module procedure stdlib_dpftrs +#endif +#:if WITH_QP + module procedure stdlib_qpftrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(in) :: a(0:*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine spftrs +#else + module procedure stdlib_spftrs +#endif +#:if WITH_QP + module procedure stdlib_wpftrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(in) :: a(0:*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zpftrs +#else + module procedure stdlib_zpftrs +#endif + end interface pftrs + + + + !> POCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite matrix using the + !> Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface pocon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpocon( uplo, n, a, lda, anorm, rcond, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine cpocon +#else + module procedure stdlib_cpocon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpocon( uplo, n, a, lda, anorm, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,work(*) + real(dp), intent(inout) :: a(lda,*) + end subroutine dpocon +#else + module procedure stdlib_dpocon +#endif +#:if WITH_QP + module procedure stdlib_qpocon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spocon( uplo, n, a, lda, anorm, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,work(*) + real(sp), intent(inout) :: a(lda,*) + end subroutine spocon +#else + module procedure stdlib_spocon +#endif +#:if WITH_QP + module procedure stdlib_wpocon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpocon( uplo, n, a, lda, anorm, rcond, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zpocon +#else + module procedure stdlib_zpocon +#endif + end interface pocon + + + + !> POEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + interface poequ +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpoequ( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*) + complex(sp), intent(in) :: a(lda,*) + end subroutine cpoequ +#else + module procedure stdlib_cpoequ +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpoequ( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*) + real(dp), intent(in) :: a(lda,*) + end subroutine dpoequ +#else + module procedure stdlib_dpoequ +#endif +#:if WITH_QP + module procedure stdlib_qpoequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spoequ( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*) + real(sp), intent(in) :: a(lda,*) + end subroutine spoequ +#else + module procedure stdlib_spoequ +#endif +#:if WITH_QP + module procedure stdlib_wpoequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpoequ( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*) + complex(dp), intent(in) :: a(lda,*) + end subroutine zpoequ +#else + module procedure stdlib_zpoequ +#endif + end interface poequ + + + + !> POEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + !> This routine differs from CPOEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled diagonal entries are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + interface poequb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpoequb( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*) + complex(sp), intent(in) :: a(lda,*) + end subroutine cpoequb +#else + module procedure stdlib_cpoequb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpoequb( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*) + real(dp), intent(in) :: a(lda,*) + end subroutine dpoequb +#else + module procedure stdlib_dpoequb +#endif +#:if WITH_QP + module procedure stdlib_qpoequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spoequb( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*) + real(sp), intent(in) :: a(lda,*) + end subroutine spoequb +#else + module procedure stdlib_spoequb +#endif +#:if WITH_QP + module procedure stdlib_wpoequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpoequb( n, a, lda, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*) + complex(dp), intent(in) :: a(lda,*) + end subroutine zpoequb +#else + module procedure stdlib_zpoequb +#endif + end interface poequb + + + + !> PORFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite, + !> and provides error bounds and backward error estimates for the + !> solution. + interface porfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cporfs +#else + module procedure stdlib_cporfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs + real(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dporfs +#else + module procedure stdlib_dporfs +#endif +#:if WITH_QP + module procedure stdlib_qporfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs + real(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine sporfs +#else + module procedure stdlib_sporfs +#endif +#:if WITH_QP + module procedure stdlib_wporfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zporfs +#else + module procedure stdlib_zporfs +#endif + end interface porfs + + + + !> POSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H* U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + interface posv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cposv( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine cposv +#else + module procedure stdlib_cposv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dposv( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine dposv +#else + module procedure stdlib_dposv +#endif +#:if WITH_QP + module procedure stdlib_qposv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sposv( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine sposv +#else + module procedure stdlib_sposv +#endif +#:if WITH_QP + module procedure stdlib_wposv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zposv( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + end subroutine zposv +#else + module procedure stdlib_zposv +#endif + end interface posv + + + + !> POTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + interface potrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpotrf( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cpotrf +#else + module procedure stdlib_cpotrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpotrf( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dpotrf +#else + module procedure stdlib_dpotrf +#endif +#:if WITH_QP + module procedure stdlib_qpotrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spotrf( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + end subroutine spotrf +#else + module procedure stdlib_spotrf +#endif +#:if WITH_QP + module procedure stdlib_wpotrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpotrf( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zpotrf +#else + module procedure stdlib_zpotrf +#endif + end interface potrf + + + + !> POTRF2: computes the Cholesky factorization of a Hermitian + !> positive definite matrix A using the recursive algorithm. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = n/2 + !> [ A21 | A22 ] n2 = n-n1 + !> The subroutine calls itself to factor A11. Update and scale A21 + !> or A12, update A22 then calls itself to factor A22. + interface potrf2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine cpotrf2( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cpotrf2 +#else + module procedure stdlib_cpotrf2 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine dpotrf2( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dpotrf2 +#else + module procedure stdlib_dpotrf2 +#endif +#:if WITH_QP + module procedure stdlib_qpotrf2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine spotrf2( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + end subroutine spotrf2 +#else + module procedure stdlib_spotrf2 +#endif +#:if WITH_QP + module procedure stdlib_wpotrf2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure recursive subroutine zpotrf2( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zpotrf2 +#else + module procedure stdlib_zpotrf2 +#endif + end interface potrf2 + + + + !> POTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by CPOTRF. + interface potri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpotri( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine cpotri +#else + module procedure stdlib_cpotri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpotri( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dpotri +#else + module procedure stdlib_dpotri +#endif +#:if WITH_QP + module procedure stdlib_qpotri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spotri( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + end subroutine spotri +#else + module procedure stdlib_spotri +#endif +#:if WITH_QP + module procedure stdlib_wpotri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpotri( uplo, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zpotri +#else + module procedure stdlib_zpotri +#endif + end interface potri + + + + !> POTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by CPOTRF. + interface potrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine cpotrs +#else + module procedure stdlib_cpotrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dpotrs +#else + module procedure stdlib_dpotrs +#endif +#:if WITH_QP + module procedure stdlib_qpotrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine spotrs +#else + module procedure stdlib_spotrs +#endif +#:if WITH_QP + module procedure stdlib_wpotrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zpotrs +#else + module procedure stdlib_zpotrs +#endif + end interface potrs + + + + !> PPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite packed matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> CPPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface ppcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cppcon( uplo, n, ap, anorm, rcond, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + end subroutine cppcon +#else + module procedure stdlib_cppcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dppcon( uplo, n, ap, anorm, rcond, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm,ap(*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dppcon +#else + module procedure stdlib_dppcon +#endif +#:if WITH_QP + module procedure stdlib_qppcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sppcon( uplo, n, ap, anorm, rcond, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm,ap(*) + real(sp), intent(out) :: rcond,work(*) + end subroutine sppcon +#else + module procedure stdlib_sppcon +#endif +#:if WITH_QP + module procedure stdlib_wppcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zppcon( uplo, n, ap, anorm, rcond, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + end subroutine zppcon +#else + module procedure stdlib_zppcon +#endif + end interface ppcon + + + + !> PPEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A in packed storage and reduce + !> its condition number (with respect to the two-norm). S contains the + !> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix + !> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. + !> This choice of S puts the condition number of B within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + interface ppequ +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cppequ( uplo, n, ap, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: amax,scond,s(*) + complex(sp), intent(in) :: ap(*) + end subroutine cppequ +#else + module procedure stdlib_cppequ +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dppequ( uplo, n, ap, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: amax,scond,s(*) + real(dp), intent(in) :: ap(*) + end subroutine dppequ +#else + module procedure stdlib_dppequ +#endif +#:if WITH_QP + module procedure stdlib_qppequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sppequ( uplo, n, ap, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: amax,scond,s(*) + real(sp), intent(in) :: ap(*) + end subroutine sppequ +#else + module procedure stdlib_sppequ +#endif +#:if WITH_QP + module procedure stdlib_wppequ +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zppequ( uplo, n, ap, s, scond, amax, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: amax,scond,s(*) + complex(dp), intent(in) :: ap(*) + end subroutine zppequ +#else + module procedure stdlib_zppequ +#endif + end interface ppequ + + + + !> PPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + interface pprfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: afp(*),ap(*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cpprfs +#else + module procedure stdlib_cpprfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(in) :: afp(*),ap(*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dpprfs +#else + module procedure stdlib_dpprfs +#endif +#:if WITH_QP + module procedure stdlib_qpprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(in) :: afp(*),ap(*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine spprfs +#else + module procedure stdlib_spprfs +#endif +#:if WITH_QP + module procedure stdlib_wpprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: afp(*),ap(*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zpprfs +#else + module procedure stdlib_zpprfs +#endif + end interface pprfs + + + + !> PPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + interface ppsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cppsv( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(inout) :: ap(*),b(ldb,*) + end subroutine cppsv +#else + module procedure stdlib_cppsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dppsv( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(inout) :: ap(*),b(ldb,*) + end subroutine dppsv +#else + module procedure stdlib_dppsv +#endif +#:if WITH_QP + module procedure stdlib_qppsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sppsv( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(inout) :: ap(*),b(ldb,*) + end subroutine sppsv +#else + module procedure stdlib_sppsv +#endif +#:if WITH_QP + module procedure stdlib_wppsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zppsv( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(inout) :: ap(*),b(ldb,*) + end subroutine zppsv +#else + module procedure stdlib_zppsv +#endif + end interface ppsv + + + + !> PPTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A stored in packed format. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + interface pptrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpptrf( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: ap(*) + end subroutine cpptrf +#else + module procedure stdlib_cpptrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpptrf( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: ap(*) + end subroutine dpptrf +#else + module procedure stdlib_dpptrf +#endif +#:if WITH_QP + module procedure stdlib_qpptrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spptrf( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: ap(*) + end subroutine spptrf +#else + module procedure stdlib_spptrf +#endif +#:if WITH_QP + module procedure stdlib_wpptrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpptrf( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: ap(*) + end subroutine zpptrf +#else + module procedure stdlib_zpptrf +#endif + end interface pptrf + + + + !> PPTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by CPPTRF. + interface pptri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpptri( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: ap(*) + end subroutine cpptri +#else + module procedure stdlib_cpptri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpptri( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: ap(*) + end subroutine dpptri +#else + module procedure stdlib_dpptri +#endif +#:if WITH_QP + module procedure stdlib_qpptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spptri( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: ap(*) + end subroutine spptri +#else + module procedure stdlib_spptri +#endif +#:if WITH_QP + module procedure stdlib_wpptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpptri( uplo, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: ap(*) + end subroutine zpptri +#else + module procedure stdlib_zpptri +#endif + end interface pptri + + + + !> PPTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A in packed storage using the Cholesky + !> factorization A = U**H*U or A = L*L**H computed by CPPTRF. + interface pptrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpptrs( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine cpptrs +#else + module procedure stdlib_cpptrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpptrs( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dpptrs +#else + module procedure stdlib_dpptrs +#endif +#:if WITH_QP + module procedure stdlib_qpptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spptrs( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine spptrs +#else + module procedure stdlib_spptrs +#endif +#:if WITH_QP + module procedure stdlib_wpptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpptrs( uplo, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zpptrs +#else + module procedure stdlib_zpptrs +#endif + end interface pptrs + + + + !> PSTRF: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 3 BLAS. + interface pstrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: tol + integer(ilp), intent(out) :: info,rank,piv(n) + integer(ilp), intent(in) :: lda,n + character, intent(in) :: uplo + complex(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(2*n) + end subroutine cpstrf +#else + module procedure stdlib_cpstrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: tol + integer(ilp), intent(out) :: info,rank,piv(n) + integer(ilp), intent(in) :: lda,n + character, intent(in) :: uplo + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(2*n) + end subroutine dpstrf +#else + module procedure stdlib_dpstrf +#endif +#:if WITH_QP + module procedure stdlib_qpstrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: tol + integer(ilp), intent(out) :: info,rank,piv(n) + integer(ilp), intent(in) :: lda,n + character, intent(in) :: uplo + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(2*n) + end subroutine spstrf +#else + module procedure stdlib_spstrf +#endif +#:if WITH_QP + module procedure stdlib_wpstrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: tol + integer(ilp), intent(out) :: info,rank,piv(n) + integer(ilp), intent(in) :: lda,n + character, intent(in) :: uplo + complex(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(2*n) + end subroutine zpstrf +#else + module procedure stdlib_zpstrf +#endif + end interface pstrf + + + + !> PTCON: computes the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite tridiagonal matrix + !> using the factorization A = L*D*L**H or A = U**H*D*U computed by + !> CPTTRF. + !> Norm(inv(A)) is computed by a direct method, and the reciprocal of + !> the condition number is computed as + !> RCOND = 1 / (ANORM * norm(inv(A))). + interface ptcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cptcon( n, d, e, anorm, rcond, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm,d(*) + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: e(*) + end subroutine cptcon +#else + module procedure stdlib_cptcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dptcon( n, d, e, anorm, rcond, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm,d(*),e(*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dptcon +#else + module procedure stdlib_dptcon +#endif +#:if WITH_QP + module procedure stdlib_qptcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sptcon( n, d, e, anorm, rcond, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm,d(*),e(*) + real(sp), intent(out) :: rcond,work(*) + end subroutine sptcon +#else + module procedure stdlib_sptcon +#endif +#:if WITH_QP + module procedure stdlib_wptcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zptcon( n, d, e, anorm, rcond, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm,d(*) + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: e(*) + end subroutine zptcon +#else + module procedure stdlib_zptcon +#endif + end interface ptcon + + + + !> PTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric positive definite tridiagonal matrix by first factoring the + !> matrix using SPTTRF and then calling CBDSQR to compute the singular + !> values of the bidiagonal factor. + !> This routine computes the eigenvalues of the positive definite + !> tridiagonal matrix to high relative accuracy. This means that if the + !> eigenvalues range over many orders of magnitude in size, then the + !> small eigenvalues and corresponding eigenvectors will be computed + !> more accurately than, for example, with the standard QR method. + !> The eigenvectors of a full or band positive definite Hermitian matrix + !> can also be found if CHETRD, CHPTRD, or CHBTRD has been used to + !> reduce this matrix to tridiagonal form. (The reduction to + !> tridiagonal form, however, may preclude the possibility of obtaining + !> high relative accuracy in the small eigenvalues of the original + !> matrix, if these eigenvalues range over many orders of magnitude.) + interface pteqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(ldz,*) + end subroutine cpteqr +#else + module procedure stdlib_cpteqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(inout) :: d(*),e(*),z(ldz,*) + real(dp), intent(out) :: work(*) + end subroutine dpteqr +#else + module procedure stdlib_dpteqr +#endif +#:if WITH_QP + module procedure stdlib_qpteqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(inout) :: d(*),e(*),z(ldz,*) + real(sp), intent(out) :: work(*) + end subroutine spteqr +#else + module procedure stdlib_spteqr +#endif +#:if WITH_QP + module procedure stdlib_wpteqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(ldz,*) + end subroutine zpteqr +#else + module procedure stdlib_zpteqr +#endif + end interface pteqr + + + + !> PTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and tridiagonal, and provides error bounds and backward error + !> estimates for the solution. + interface ptrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cptrfs( uplo, n, nrhs, d, e, df, ef, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + real(sp), intent(in) :: d(*),df(*) + complex(sp), intent(in) :: b(ldb,*),e(*),ef(*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine cptrfs +#else + module procedure stdlib_cptrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dptrfs( n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(in) :: b(ldb,*),d(*),df(*),e(*),ef(*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dptrfs +#else + module procedure stdlib_dptrfs +#endif +#:if WITH_QP + module procedure stdlib_qptrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sptrfs( n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(in) :: b(ldb,*),d(*),df(*),e(*),ef(*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine sptrfs +#else + module procedure stdlib_sptrfs +#endif +#:if WITH_QP + module procedure stdlib_wptrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zptrfs( uplo, n, nrhs, d, e, df, ef, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + real(dp), intent(in) :: d(*),df(*) + complex(dp), intent(in) :: b(ldb,*),e(*),ef(*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zptrfs +#else + module procedure stdlib_zptrfs +#endif + end interface ptrfs + + + + !> PTSV: computes the solution to a complex system of linear equations + !> A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal + !> matrix, and X and B are N-by-NRHS matrices. + !> A is factored as A = L*D*L**H, and the factored form of A is then + !> used to solve the system of equations. + interface ptsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cptsv( n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(inout) :: d(*) + complex(sp), intent(inout) :: b(ldb,*),e(*) + end subroutine cptsv +#else + module procedure stdlib_cptsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dptsv( n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(inout) :: b(ldb,*),d(*),e(*) + end subroutine dptsv +#else + module procedure stdlib_dptsv +#endif +#:if WITH_QP + module procedure stdlib_qptsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sptsv( n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(inout) :: b(ldb,*),d(*),e(*) + end subroutine sptsv +#else + module procedure stdlib_sptsv +#endif +#:if WITH_QP + module procedure stdlib_wptsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zptsv( n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(inout) :: d(*) + complex(dp), intent(inout) :: b(ldb,*),e(*) + end subroutine zptsv +#else + module procedure stdlib_zptsv +#endif + end interface ptsv + + + + !> PTTRF: computes the L*D*L**H factorization of a complex Hermitian + !> positive definite tridiagonal matrix A. The factorization may also + !> be regarded as having the form A = U**H *D*U. + interface pttrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpttrf( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: d(*) + complex(sp), intent(inout) :: e(*) + end subroutine cpttrf +#else + module procedure stdlib_cpttrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpttrf( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: d(*),e(*) + end subroutine dpttrf +#else + module procedure stdlib_dpttrf +#endif +#:if WITH_QP + module procedure stdlib_qpttrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spttrf( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: d(*),e(*) + end subroutine spttrf +#else + module procedure stdlib_spttrf +#endif +#:if WITH_QP + module procedure stdlib_wpttrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpttrf( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: d(*) + complex(dp), intent(inout) :: e(*) + end subroutine zpttrf +#else + module procedure stdlib_zpttrf +#endif + end interface pttrf + + + + !> PTTRS: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H*D*U or A = L*D*L**H computed by CPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + interface pttrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cpttrs( uplo, n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(in) :: d(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: e(*) + end subroutine cpttrs +#else + module procedure stdlib_cpttrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dpttrs( n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(in) :: d(*),e(*) + end subroutine dpttrs +#else + module procedure stdlib_dpttrs +#endif +#:if WITH_QP + module procedure stdlib_qpttrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine spttrs( n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(in) :: d(*),e(*) + end subroutine spttrs +#else + module procedure stdlib_spttrs +#endif +#:if WITH_QP + module procedure stdlib_wpttrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zpttrs( uplo, n, nrhs, d, e, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(in) :: d(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: e(*) + end subroutine zpttrs +#else + module procedure stdlib_zpttrs +#endif + end interface pttrs + + + + !> ROT: applies a plane rotation, where the cos (C) is real and the + !> sin (S) is complex, and the vectors CX and CY are complex. + interface rot +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine crot( n, cx, incx, cy, incy, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(sp), intent(in) :: c + complex(sp), intent(in) :: s + complex(sp), intent(inout) :: cx(*),cy(*) + end subroutine crot +#else + module procedure stdlib_crot +#endif +#:if WITH_QP + module procedure stdlib_wrot +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zrot( n, cx, incx, cy, incy, c, s ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,incy,n + real(dp), intent(in) :: c + complex(dp), intent(in) :: s + complex(dp), intent(inout) :: cx(*),cy(*) + end subroutine zrot +#else + module procedure stdlib_zrot +#endif + end interface rot + + + + !> RSCL: multiplies an n-element real vector x by the real scalar 1/a. + !> This is done without overflow or underflow as long as + !> the final result x/a does not overflow or underflow. + interface rscl +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine drscl( n, sa, sx, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(dp), intent(in) :: sa + real(dp), intent(inout) :: sx(*) + end subroutine drscl +#else + module procedure stdlib_drscl +#endif +#:if WITH_QP + module procedure stdlib_qrscl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine srscl( n, sa, sx, incx ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx,n + real(sp), intent(in) :: sa + real(sp), intent(inout) :: sx(*) + end subroutine srscl +#else + module procedure stdlib_srscl +#endif + end interface rscl + + + + !> SB2ST_KERNELS: is an internal routine used by the DSYTRD_SB2ST + !> subroutine. + interface sb2st_kernels +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, & + lda, v, tau, ldvt, work) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype,st,ed,sweep,n,nb,ib,lda,ldvt + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: v(*),tau(*),work(*) + end subroutine dsb2st_kernels +#else + module procedure stdlib_dsb2st_kernels +#endif +#:if WITH_QP + module procedure stdlib_qsb2st_kernels +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, & + lda, v, tau, ldvt, work) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype,st,ed,sweep,n,nb,ib,lda,ldvt + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: v(*),tau(*),work(*) + end subroutine ssb2st_kernels +#else + module procedure stdlib_ssb2st_kernels +#endif + end interface sb2st_kernels + + + + !> SBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a real symmetric band matrix A. + interface sbev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldz,n + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dsbev +#else + module procedure stdlib_dsbev +#endif +#:if WITH_QP + module procedure stdlib_qsbev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldz,n + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine ssbev +#else + module procedure stdlib_ssbev +#endif + end interface sbev + + + + !> SBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a real symmetric band matrix A. If eigenvectors are desired, it uses + !> a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface sbevd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldz,liwork,lwork,n + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dsbevd +#else + module procedure stdlib_dsbevd +#endif +#:if WITH_QP + module procedure stdlib_qsbevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldz,liwork,lwork,n + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine ssbevd +#else + module procedure stdlib_ssbevd +#endif + end interface sbevd + + + + !> SBGST: reduces a real symmetric-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**T*S by DPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**T*A*X, where + !> X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the + !> bandwidth of A. + interface sbgst +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldx,n + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(in) :: bb(ldbb,*) + real(dp), intent(out) :: work(*),x(ldx,*) + end subroutine dsbgst +#else + module procedure stdlib_dsbgst +#endif +#:if WITH_QP + module procedure stdlib_qsbgst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldx,n + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(in) :: bb(ldbb,*) + real(sp), intent(out) :: work(*),x(ldx,*) + end subroutine ssbgst +#else + module procedure stdlib_ssbgst +#endif + end interface sbgst + + + + !> SBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric + !> and banded, and B is also positive definite. + interface sbgv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,n + real(dp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dsbgv +#else + module procedure stdlib_dsbgv +#endif +#:if WITH_QP + module procedure stdlib_qsbgv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,n + real(sp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine ssbgv +#else + module procedure stdlib_ssbgv +#endif + end interface sbgv + + + + !> SBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of the + !> form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and + !> banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface sbgvd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,liwork,lwork,n + real(dp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dsbgvd +#else + module procedure stdlib_dsbgvd +#endif +#:if WITH_QP + module procedure stdlib_qsbgvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ka,kb,ldab,ldbb,ldz,liwork,lwork,n + real(sp), intent(inout) :: ab(ldab,*),bb(ldbb,*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine ssbgvd +#else + module procedure stdlib_ssbgvd +#endif + end interface sbgvd + + + + !> SBTRD: reduces a real symmetric band matrix A to symmetric + !> tridiagonal form T by an orthogonal similarity transformation: + !> Q**T * A * Q = T. + interface sbtrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldq,n + real(dp), intent(inout) :: ab(ldab,*),q(ldq,*) + real(dp), intent(out) :: d(*),e(*),work(*) + end subroutine dsbtrd +#else + module procedure stdlib_dsbtrd +#endif +#:if WITH_QP + module procedure stdlib_qsbtrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldq,n + real(sp), intent(inout) :: ab(ldab,*),q(ldq,*) + real(sp), intent(out) :: d(*),e(*),work(*) + end subroutine ssbtrd +#else + module procedure stdlib_ssbtrd +#endif + end interface sbtrd + + + + !> Level 3 BLAS like routine for C in RFP Format. + !> SFRK: performs one of the symmetric rank--k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n symmetric + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + interface sfrk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(dp), intent(in) :: alpha,beta,a(lda,*) + integer(ilp), intent(in) :: k,lda,n + character, intent(in) :: trans,transr,uplo + real(dp), intent(inout) :: c(*) + end subroutine dsfrk +#else + module procedure stdlib_dsfrk +#endif +#:if WITH_QP + module procedure stdlib_qsfrk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + real(sp), intent(in) :: alpha,beta,a(lda,*) + integer(ilp), intent(in) :: k,lda,n + character, intent(in) :: trans,transr,uplo + real(sp), intent(inout) :: c(*) + end subroutine ssfrk +#else + module procedure stdlib_ssfrk +#endif + end interface sfrk + + + + !> SPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric packed matrix A using the + !> factorization A = U*D*U**T or A = L*D*L**T computed by CSPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface spcon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cspcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + end subroutine cspcon +#else + module procedure stdlib_cspcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dspcon( uplo, n, ap, ipiv, anorm, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n,ipiv(*) + real(dp), intent(in) :: anorm,ap(*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dspcon +#else + module procedure stdlib_dspcon +#endif +#:if WITH_QP + module procedure stdlib_qspcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sspcon( uplo, n, ap, ipiv, anorm, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n,ipiv(*) + real(sp), intent(in) :: anorm,ap(*) + real(sp), intent(out) :: rcond,work(*) + end subroutine sspcon +#else + module procedure stdlib_sspcon +#endif +#:if WITH_QP + module procedure stdlib_wspcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zspcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + end subroutine zspcon +#else + module procedure stdlib_zspcon +#endif + end interface spcon + + + + !> SPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A in packed storage. + interface spev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dspev( jobz, uplo, n, ap, w, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dspev +#else + module procedure stdlib_dspev +#endif +#:if WITH_QP + module procedure stdlib_qspev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sspev( jobz, uplo, n, ap, w, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sspev +#else + module procedure stdlib_sspev +#endif + end interface spev + + + + !> SPEVD: computes all the eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface spevd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dspevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,iwork, liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lwork,n + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dspevd +#else + module procedure stdlib_dspevd +#endif +#:if WITH_QP + module procedure stdlib_qspevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sspevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,iwork, liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lwork,n + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sspevd +#else + module procedure stdlib_sspevd +#endif + end interface spevd + + + + !> SPGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by DPPTRF. + interface spgst +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dspgst( itype, uplo, n, ap, bp, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,n + real(dp), intent(inout) :: ap(*) + real(dp), intent(in) :: bp(*) + end subroutine dspgst +#else + module procedure stdlib_dspgst +#endif +#:if WITH_QP + module procedure stdlib_qspgst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sspgst( itype, uplo, n, ap, bp, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,n + real(sp), intent(inout) :: ap(*) + real(sp), intent(in) :: bp(*) + end subroutine sspgst +#else + module procedure stdlib_sspgst +#endif + end interface spgst + + + + !> SPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric, stored in packed format, + !> and B is also positive definite. + interface spgv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dspgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,ldz,n + real(dp), intent(inout) :: ap(*),bp(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dspgv +#else + module procedure stdlib_dspgv +#endif +#:if WITH_QP + module procedure stdlib_qspgv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sspgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,ldz,n + real(sp), intent(inout) :: ap(*),bp(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sspgv +#else + module procedure stdlib_sspgv +#endif + end interface spgv + + + + !> SPGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface spgvd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dspgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,ldz,liwork,lwork,n + real(dp), intent(inout) :: ap(*),bp(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dspgvd +#else + module procedure stdlib_dspgvd +#endif +#:if WITH_QP + module procedure stdlib_qspgvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine sspgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,ldz,liwork,lwork,n + real(sp), intent(inout) :: ap(*),bp(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sspgvd +#else + module procedure stdlib_sspgvd +#endif + end interface spgvd + + + + !> SPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + interface spmv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cspmv( uplo, n, alpha, ap, x, incx, beta, y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,incy,n + complex(sp), intent(in) :: alpha,beta,ap(*),x(*) + complex(sp), intent(inout) :: y(*) + end subroutine cspmv +#else + module procedure stdlib_cspmv +#endif +#:if WITH_QP + module procedure stdlib_wspmv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zspmv( uplo, n, alpha, ap, x, incx, beta, y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,incy,n + complex(dp), intent(in) :: alpha,beta,ap(*),x(*) + complex(dp), intent(inout) :: y(*) + end subroutine zspmv +#else + module procedure stdlib_zspmv +#endif + end interface spmv + + + + !> SPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + interface spr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cspr( uplo, n, alpha, x, incx, ap ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,n + complex(sp), intent(in) :: alpha,x(*) + complex(sp), intent(inout) :: ap(*) + end subroutine cspr +#else + module procedure stdlib_cspr +#endif +#:if WITH_QP + module procedure stdlib_wspr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zspr( uplo, n, alpha, x, incx, ap ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,n + complex(dp), intent(in) :: alpha,x(*) + complex(dp), intent(inout) :: ap(*) + end subroutine zspr +#else + module procedure stdlib_zspr +#endif + end interface spr + + + + !> SPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + interface sprfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: afp(*),ap(*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine csprfs +#else + module procedure stdlib_csprfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(in) :: afp(*),ap(*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dsprfs +#else + module procedure stdlib_dsprfs +#endif +#:if WITH_QP + module procedure stdlib_qsprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(in) :: afp(*),ap(*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine ssprfs +#else + module procedure stdlib_ssprfs +#endif +#:if WITH_QP + module procedure stdlib_wsprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: afp(*),ap(*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zsprfs +#else + module procedure stdlib_zsprfs +#endif + end interface sprfs + + + + !> SPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is symmetric and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + interface spsv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(inout) :: ap(*),b(ldb,*) + end subroutine cspsv +#else + module procedure stdlib_cspsv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(inout) :: ap(*),b(ldb,*) + end subroutine dspsv +#else + module procedure stdlib_dspsv +#endif +#:if WITH_QP + module procedure stdlib_qspsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(inout) :: ap(*),b(ldb,*) + end subroutine sspsv +#else + module procedure stdlib_sspsv +#endif +#:if WITH_QP + module procedure stdlib_wspsv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(inout) :: ap(*),b(ldb,*) + end subroutine zspsv +#else + module procedure stdlib_zspsv +#endif + end interface spsv + + + + !> SPTRD: reduces a real symmetric matrix A stored in packed form to + !> symmetric tridiagonal form T by an orthogonal similarity + !> transformation: Q**T * A * Q = T. + interface sptrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsptrd( uplo, n, ap, d, e, tau, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: d(*),e(*),tau(*) + end subroutine dsptrd +#else + module procedure stdlib_dsptrd +#endif +#:if WITH_QP + module procedure stdlib_qsptrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssptrd( uplo, n, ap, d, e, tau, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: d(*),e(*),tau(*) + end subroutine ssptrd +#else + module procedure stdlib_ssptrd +#endif + end interface sptrd + + + + !> SPTRF: computes the factorization of a complex symmetric matrix A + !> stored in packed format using the Bunch-Kaufman diagonal pivoting + !> method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + interface sptrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csptrf( uplo, n, ap, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: ap(*) + end subroutine csptrf +#else + module procedure stdlib_csptrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsptrf( uplo, n, ap, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: ap(*) + end subroutine dsptrf +#else + module procedure stdlib_dsptrf +#endif +#:if WITH_QP + module procedure stdlib_qsptrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssptrf( uplo, n, ap, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: ap(*) + end subroutine ssptrf +#else + module procedure stdlib_ssptrf +#endif +#:if WITH_QP + module procedure stdlib_wsptrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsptrf( uplo, n, ap, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: ap(*) + end subroutine zsptrf +#else + module procedure stdlib_zsptrf +#endif + end interface sptrf + + + + !> SPTRI: computes the inverse of a complex symmetric indefinite matrix + !> A in packed storage using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSPTRF. + interface sptri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csptri( uplo, n, ap, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*) + end subroutine csptri +#else + module procedure stdlib_csptri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsptri( uplo, n, ap, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: work(*) + end subroutine dsptri +#else + module procedure stdlib_dsptri +#endif +#:if WITH_QP + module procedure stdlib_qsptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssptri( uplo, n, ap, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: work(*) + end subroutine ssptri +#else + module procedure stdlib_ssptri +#endif +#:if WITH_QP + module procedure stdlib_wsptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsptri( uplo, n, ap, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,ipiv(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*) + end subroutine zsptri +#else + module procedure stdlib_zsptri +#endif + end interface sptri + + + + !> SPTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A stored in packed format using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by CSPTRF. + interface sptrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine csptrs +#else + module procedure stdlib_csptrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dsptrs +#else + module procedure stdlib_dsptrs +#endif +#:if WITH_QP + module procedure stdlib_qsptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine ssptrs +#else + module procedure stdlib_ssptrs +#endif +#:if WITH_QP + module procedure stdlib_wsptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zsptrs +#else + module procedure stdlib_zsptrs +#endif + end interface sptrs + + + + !> STEBZ: computes the eigenvalues of a symmetric tridiagonal + !> matrix T. The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + interface stebz +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstebz( range, order, n, vl, vu, il, iu, abstol, d, e,m, nsplit, w,& + iblock, isplit, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: order,range + integer(ilp), intent(in) :: il,iu,n + integer(ilp), intent(out) :: info,m,nsplit,iblock(*),isplit(*),iwork(*) + + real(dp), intent(in) :: abstol,vl,vu,d(*),e(*) + real(dp), intent(out) :: w(*),work(*) + end subroutine dstebz +#else + module procedure stdlib_dstebz +#endif +#:if WITH_QP + module procedure stdlib_qstebz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstebz( range, order, n, vl, vu, il, iu, abstol, d, e,m, nsplit, w,& + iblock, isplit, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: order,range + integer(ilp), intent(in) :: il,iu,n + integer(ilp), intent(out) :: info,m,nsplit,iblock(*),isplit(*),iwork(*) + + real(sp), intent(in) :: abstol,vl,vu,d(*),e(*) + real(sp), intent(out) :: w(*),work(*) + end subroutine sstebz +#else + module procedure stdlib_sstebz +#endif + end interface stebz + + + + !> STEDC: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + !> matrix to tridiagonal form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See SLAED3 for details. + interface stedc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cstedc( compz, n, d, e, z, ldz, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lrwork,lwork,n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(ldz,*) + end subroutine cstedc +#else + module procedure stdlib_cstedc +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstedc( compz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lwork,n + real(dp), intent(inout) :: d(*),e(*),z(ldz,*) + real(dp), intent(out) :: work(*) + end subroutine dstedc +#else + module procedure stdlib_dstedc +#endif +#:if WITH_QP + module procedure stdlib_qstedc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstedc( compz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lwork,n + real(sp), intent(inout) :: d(*),e(*),z(ldz,*) + real(sp), intent(out) :: work(*) + end subroutine sstedc +#else + module procedure stdlib_sstedc +#endif +#:if WITH_QP + module procedure stdlib_wstedc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zstedc( compz, n, d, e, z, ldz, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lrwork,lwork,n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(ldz,*) + end subroutine zstedc +#else + module procedure stdlib_zstedc +#endif + end interface stedc + + + + !> STEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> STEGR is a compatibility wrapper around the improved CSTEMR routine. + !> See SSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : STEGR and CSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + interface stegr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + integer(ilp), intent(in) :: il,iu,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: abstol,vl,vu + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: w(*),work(*) + complex(sp), intent(out) :: z(ldz,*) + end subroutine cstegr +#else + module procedure stdlib_cstegr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + integer(ilp), intent(in) :: il,iu,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: abstol,vl,vu + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dstegr +#else + module procedure stdlib_dstegr +#endif +#:if WITH_QP + module procedure stdlib_qstegr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + integer(ilp), intent(in) :: il,iu,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: abstol,vl,vu + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sstegr +#else + module procedure stdlib_sstegr +#endif +#:if WITH_QP + module procedure stdlib_wstegr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + integer(ilp), intent(in) :: il,iu,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: abstol,vl,vu + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: w(*),work(*) + complex(dp), intent(out) :: z(ldz,*) + end subroutine zstegr +#else + module procedure stdlib_zstegr +#endif + end interface stegr + + + + !> STEIN: computes the eigenvectors of a real symmetric tridiagonal + !> matrix T corresponding to specified eigenvalues, using inverse + !> iteration. + !> The maximum number of iterations allowed for each eigenvector is + !> specified by an internal parameter MAXITS (currently set to 5). + !> Although the eigenvectors are real, they are stored in a complex + !> array, which may be passed to CUNMTR or CUPMTR for back + !> transformation to the eigenvectors of a complex Hermitian matrix + !> which was reduced to tridiagonal form. + interface stein +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ifail(*),iwork(*) + integer(ilp), intent(in) :: ldz,m,n,iblock(*),isplit(*) + real(sp), intent(in) :: d(*),e(*),w(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(out) :: z(ldz,*) + end subroutine cstein +#else + module procedure stdlib_cstein +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ifail(*),iwork(*) + integer(ilp), intent(in) :: ldz,m,n,iblock(*),isplit(*) + real(dp), intent(in) :: d(*),e(*),w(*) + real(dp), intent(out) :: work(*),z(ldz,*) + end subroutine dstein +#else + module procedure stdlib_dstein +#endif +#:if WITH_QP + module procedure stdlib_qstein +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ifail(*),iwork(*) + integer(ilp), intent(in) :: ldz,m,n,iblock(*),isplit(*) + real(sp), intent(in) :: d(*),e(*),w(*) + real(sp), intent(out) :: work(*),z(ldz,*) + end subroutine sstein +#else + module procedure stdlib_sstein +#endif +#:if WITH_QP + module procedure stdlib_wstein +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info,ifail(*),iwork(*) + integer(ilp), intent(in) :: ldz,m,n,iblock(*),isplit(*) + real(dp), intent(in) :: d(*),e(*),w(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(out) :: z(ldz,*) + end subroutine zstein +#else + module procedure stdlib_zstein +#endif + end interface stein + + + + !> STEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.STEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + !> 2. LAPACK routines can be used to reduce a complex Hermitean matrix to + !> real symmetric tridiagonal form. + !> (Any complex Hermitean tridiagonal matrix has real values on its diagonal + !> and potentially complex numbers on its off-diagonals. By applying a + !> similarity transform with an appropriate diagonal matrix + !> diag(1,e^{i \phy_1}, ... , e^{i \phy_{n-1}}), the complex Hermitean + !> matrix can be transformed into a real symmetric matrix and complex + !> arithmetic can be entirely avoided.) + !> While the eigenvectors of the real symmetric tridiagonal matrix are real, + !> the eigenvectors of original complex Hermitean matrix have complex entries + !> in general. + !> Since LAPACK drivers overwrite the matrix data with the eigenvectors, + !> STEMR accepts complex workspace to facilitate interoperability + !> with CUNMTR or CUPMTR. + interface stemr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il,iu,ldz,nzc,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: vl,vu + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: w(*),work(*) + complex(sp), intent(out) :: z(ldz,*) + end subroutine cstemr +#else + module procedure stdlib_cstemr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il,iu,ldz,nzc,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: vl,vu + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dstemr +#else + module procedure stdlib_dstemr +#endif +#:if WITH_QP + module procedure stdlib_qstemr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il,iu,ldz,nzc,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: vl,vu + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sstemr +#else + module procedure stdlib_sstemr +#endif +#:if WITH_QP + module procedure stdlib_wstemr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il,iu,ldz,nzc,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: vl,vu + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: w(*),work(*) + complex(dp), intent(out) :: z(ldz,*) + end subroutine zstemr +#else + module procedure stdlib_zstemr +#endif + end interface stemr + + + + !> STEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the implicit QL or QR method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + !> matrix to tridiagonal form. + interface steqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(ldz,*) + end subroutine csteqr +#else + module procedure stdlib_csteqr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(inout) :: d(*),e(*),z(ldz,*) + real(dp), intent(out) :: work(*) + end subroutine dsteqr +#else + module procedure stdlib_dsteqr +#endif +#:if WITH_QP + module procedure stdlib_qsteqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(inout) :: d(*),e(*),z(ldz,*) + real(sp), intent(out) :: work(*) + end subroutine ssteqr +#else + module procedure stdlib_ssteqr +#endif +#:if WITH_QP + module procedure stdlib_wsteqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsteqr( compz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(ldz,*) + end subroutine zsteqr +#else + module procedure stdlib_zsteqr +#endif + end interface steqr + + + + !> STERF: computes all eigenvalues of a symmetric tridiagonal matrix + !> using the Pal-Walker-Kahan variant of the QL or QR algorithm. + interface sterf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsterf( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: d(*),e(*) + end subroutine dsterf +#else + module procedure stdlib_dsterf +#endif +#:if WITH_QP + module procedure stdlib_qsterf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssterf( n, d, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: d(*),e(*) + end subroutine ssterf +#else + module procedure stdlib_ssterf +#endif + end interface sterf + + + + !> STEV: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric tridiagonal matrix A. + interface stev +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstev( jobz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: work(*),z(ldz,*) + end subroutine dstev +#else + module procedure stdlib_dstev +#endif +#:if WITH_QP + module procedure stdlib_qstev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstev( jobz, n, d, e, z, ldz, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz,n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: work(*),z(ldz,*) + end subroutine sstev +#else + module procedure stdlib_sstev +#endif + end interface stev + + + + !> STEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric tridiagonal matrix. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface stevd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstevd( jobz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lwork,n + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: work(*),z(ldz,*) + end subroutine dstevd +#else + module procedure stdlib_dstevd +#endif +#:if WITH_QP + module procedure stdlib_qstevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstevd( jobz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldz,liwork,lwork,n + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: work(*),z(ldz,*) + end subroutine sstevd +#else + module procedure stdlib_sstevd +#endif + end interface stevd + + + + !> STEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Eigenvalues and + !> eigenvectors can be selected by specifying either a range of values + !> or a range of indices for the desired eigenvalues. + !> Whenever possible, STEVR calls DSTEMR to compute the + !> eigenspectrum using Relatively Robust Representations. DSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. For the i-th + !> unreduced block of T, + !> (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T + !> is a relatively robust representation, + !> (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high + !> relative accuracy by the dqds algorithm, + !> (c) If there is a cluster of close eigenvalues, "choose" sigma_i + !> close to the cluster, and go to step (a), + !> (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, + !> compute the corresponding eigenvector by forming a + !> rank-revealing twisted factorization. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, + !> Computer Science Division Technical Report No. UCB//CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : STEVR calls DSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> STEVR calls DSTEBZ and DSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of DSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + interface stevr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dstevr( jobz, range, n, d, e, vl, vu, il, iu, abstol,m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + integer(ilp), intent(in) :: il,iu,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: abstol,vl,vu + real(dp), intent(inout) :: d(*),e(*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dstevr +#else + module procedure stdlib_dstevr +#endif +#:if WITH_QP + module procedure stdlib_qstevr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine sstevr( jobz, range, n, d, e, vl, vu, il, iu, abstol,m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range + integer(ilp), intent(in) :: il,iu,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: abstol,vl,vu + real(sp), intent(inout) :: d(*),e(*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine sstevr +#else + module procedure stdlib_sstevr +#endif + end interface stevr + + + + !> SYCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by CSYTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface sycon +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csycon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csycon +#else + module procedure stdlib_csycon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsycon( uplo, n, a, lda, ipiv, anorm, rcond, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(in) :: anorm,a(lda,*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dsycon +#else + module procedure stdlib_dsycon +#endif +#:if WITH_QP + module procedure stdlib_qsycon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssycon( uplo, n, a, lda, ipiv, anorm, rcond, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(in) :: anorm,a(lda,*) + real(sp), intent(out) :: rcond,work(*) + end subroutine ssycon +#else + module procedure stdlib_ssycon +#endif +#:if WITH_QP + module procedure stdlib_wsycon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsycon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsycon +#else + module procedure stdlib_zsycon +#endif + end interface sycon + + + + !> SYCON_ROOK: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by CSYTRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + interface sycon_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csycon_rook +#else + module procedure stdlib_csycon_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,iwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(in) :: anorm,a(lda,*) + real(dp), intent(out) :: rcond,work(*) + end subroutine dsycon_rook +#else + module procedure stdlib_dsycon_rook +#endif +#:if WITH_QP + module procedure stdlib_qsycon_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,iwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(in) :: anorm,a(lda,*) + real(sp), intent(out) :: rcond,work(*) + end subroutine ssycon_rook +#else + module procedure stdlib_ssycon_rook +#endif +#:if WITH_QP + module procedure stdlib_wsycon_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsycon_rook +#else + module procedure stdlib_zsycon_rook +#endif + end interface sycon_rook + + + + !> SYCONV: convert A given by TRF into L and D and vice-versa. + !> Get Non-diag elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + interface syconv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyconv( uplo, way, n, a, lda, ipiv, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*) + end subroutine csyconv +#else + module procedure stdlib_csyconv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsyconv( uplo, way, n, a, lda, ipiv, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*) + end subroutine dsyconv +#else + module procedure stdlib_dsyconv +#endif +#:if WITH_QP + module procedure stdlib_qsyconv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssyconv( uplo, way, n, a, lda, ipiv, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*) + end subroutine ssyconv +#else + module procedure stdlib_ssyconv +#endif +#:if WITH_QP + module procedure stdlib_wsyconv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyconv( uplo, way, n, a, lda, ipiv, e, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*) + end subroutine zsyconv +#else + module procedure stdlib_zsyconv +#endif + end interface syconv + + + + !> If parameter WAY = 'C': + !> SYCONVF: converts the factorization output format used in + !> CSYTRF provided on entry in parameter A into the factorization + !> output format used in CSYTRF_RK (or CSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in CSYTRF into + !> the format used in CSYTRF_RK (or CSYTRF_BK). + !> If parameter WAY = 'R': + !> SYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in CSYTRF_RK + !> (or CSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in CSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in CSYTRF_RK + !> (or CSYTRF_BK) into the format used in CSYTRF. + !> SYCONVF can also convert in Hermitian matrix case, i.e. between + !> formats used in CHETRF and CHETRF_RK (or CHETRF_BK). + interface syconvf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyconvf( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + integer(ilp), intent(inout) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*),e(*) + end subroutine csyconvf +#else + module procedure stdlib_csyconvf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsyconvf( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*),e(*) + end subroutine dsyconvf +#else + module procedure stdlib_dsyconvf +#endif +#:if WITH_QP + module procedure stdlib_qsyconvf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssyconvf( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*),e(*) + end subroutine ssyconvf +#else + module procedure stdlib_ssyconvf +#endif +#:if WITH_QP + module procedure stdlib_wsyconvf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyconvf( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + integer(ilp), intent(inout) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*),e(*) + end subroutine zsyconvf +#else + module procedure stdlib_zsyconvf +#endif + end interface syconvf + + + + !> If parameter WAY = 'C': + !> SYCONVF_ROOK: converts the factorization output format used in + !> CSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in CSYTRF_RK (or CSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for CSYTRF_ROOK and + !> CSYTRF_RK (or CSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> SYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in CSYTRF_RK + !> (or CSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in CSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for CSYTRF_ROOK and + !> CSYTRF_RK (or CSYTRF_BK) is the same and is not converted. + !> SYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between + !> formats used in CHETRF_ROOK and CHETRF_RK (or CHETRF_BK). + interface syconvf_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*),e(*) + end subroutine csyconvf_rook +#else + module procedure stdlib_csyconvf_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(inout) :: a(lda,*),e(*) + end subroutine dsyconvf_rook +#else + module procedure stdlib_dsyconvf_rook +#endif +#:if WITH_QP + module procedure stdlib_qsyconvf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(inout) :: a(lda,*),e(*) + end subroutine ssyconvf_rook +#else + module procedure stdlib_ssyconvf_rook +#endif +#:if WITH_QP + module procedure stdlib_wsyconvf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo,way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*),e(*) + end subroutine zsyconvf_rook +#else + module procedure stdlib_zsyconvf_rook +#endif + end interface syconvf_rook + + + + !> SYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + interface syequb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyequb( uplo, n, a, lda, s, scond, amax, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*) + character, intent(in) :: uplo + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csyequb +#else + module procedure stdlib_csyequb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsyequb( uplo, n, a, lda, s, scond, amax, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*),work(*) + character, intent(in) :: uplo + real(dp), intent(in) :: a(lda,*) + end subroutine dsyequb +#else + module procedure stdlib_dsyequb +#endif +#:if WITH_QP + module procedure stdlib_qsyequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssyequb( uplo, n, a, lda, s, scond, amax, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: amax,scond,s(*),work(*) + character, intent(in) :: uplo + real(sp), intent(in) :: a(lda,*) + end subroutine ssyequb +#else + module procedure stdlib_ssyequb +#endif +#:if WITH_QP + module procedure stdlib_wsyequb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyequb( uplo, n, a, lda, s, scond, amax, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: amax,scond,s(*) + character, intent(in) :: uplo + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsyequb +#else + module procedure stdlib_zsyequb +#endif + end interface syequb + + + + !> SYEV: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. + interface syev +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsyev( jobz, uplo, n, a, lda, w, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*),work(*) + end subroutine dsyev +#else + module procedure stdlib_dsyev +#endif +#:if WITH_QP + module procedure stdlib_qsyev +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssyev( jobz, uplo, n, a, lda, w, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*),work(*) + end subroutine ssyev +#else + module procedure stdlib_ssyev +#endif + end interface syev + + + + !> SYEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> Because of large use of BLAS of level 3, SYEVD needs N**2 more + !> workspace than DSYEVX. + interface syevd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsyevd( jobz, uplo, n, a, lda, w, work, lwork, iwork,liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,liwork,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*),work(*) + end subroutine dsyevd +#else + module procedure stdlib_dsyevd +#endif +#:if WITH_QP + module procedure stdlib_qsyevd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssyevd( jobz, uplo, n, a, lda, w, work, lwork, iwork,liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,liwork,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*),work(*) + end subroutine ssyevd +#else + module procedure stdlib_ssyevd +#endif + end interface syevd + + + + !> SYEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> SYEVR first reduces the matrix A to tridiagonal form T with a call + !> to DSYTRD. Then, whenever possible, SYEVR calls DSTEMR to compute + !> the eigenspectrum using Relatively Robust Representations. DSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see DSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : SYEVR calls DSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> SYEVR calls DSTEBZ and DSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of DSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + interface syevr +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsyevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, & + ldz, isuppz, work, lwork,iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range,uplo + integer(ilp), intent(in) :: il,iu,lda,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(dp), intent(in) :: abstol,vl,vu + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine dsyevr +#else + module procedure stdlib_dsyevr +#endif +#:if WITH_QP + module procedure stdlib_qsyevr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssyevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, & + ldz, isuppz, work, lwork,iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,range,uplo + integer(ilp), intent(in) :: il,iu,lda,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,isuppz(*),iwork(*) + real(sp), intent(in) :: abstol,vl,vu + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*),work(*),z(ldz,*) + end subroutine ssyevr +#else + module procedure stdlib_ssyevr +#endif + end interface syevr + + + + !> SYGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by DPOTRF. + interface sygst +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsygst( itype, uplo, n, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: b(ldb,*) + end subroutine dsygst +#else + module procedure stdlib_dsygst +#endif +#:if WITH_QP + module procedure stdlib_qsygst +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssygst( itype, uplo, n, a, lda, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: b(ldb,*) + end subroutine ssygst +#else + module procedure stdlib_ssygst +#endif + end interface sygst + + + + !> SYGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric and B is also + !> positive definite. + interface sygv +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsygv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,lwork,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: w(*),work(*) + end subroutine dsygv +#else + module procedure stdlib_dsygv +#endif +#:if WITH_QP + module procedure stdlib_qsygv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssygv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype,lda,ldb,lwork,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: w(*),work(*) + end subroutine ssygv +#else + module procedure stdlib_ssygv +#endif + end interface sygv + + + + !> SYGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + interface sygvd +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dsygvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,lda,ldb,liwork,lwork,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: w(*),work(*) + end subroutine dsygvd +#else + module procedure stdlib_dsygvd +#endif +#:if WITH_QP + module procedure stdlib_qsygvd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ssygvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, iwork, & + liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobz,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: itype,lda,ldb,liwork,lwork,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: w(*),work(*) + end subroutine ssygvd +#else + module procedure stdlib_ssygvd +#endif + end interface sygvd + + + + !> SYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + interface symv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csymv( uplo, n, alpha, a, lda, x, incx, beta, y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,incy,lda,n + complex(sp), intent(in) :: alpha,beta,a(lda,*),x(*) + complex(sp), intent(inout) :: y(*) + end subroutine csymv +#else + module procedure stdlib_csymv +#endif +#:if WITH_QP + module procedure stdlib_wsymv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsymv( uplo, n, alpha, a, lda, x, incx, beta, y, incy ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,incy,lda,n + complex(dp), intent(in) :: alpha,beta,a(lda,*),x(*) + complex(dp), intent(inout) :: y(*) + end subroutine zsymv +#else + module procedure stdlib_zsymv +#endif + end interface symv + + + + !> SYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + interface syr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyr( uplo, n, alpha, x, incx, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,lda,n + complex(sp), intent(in) :: alpha,x(*) + complex(sp), intent(inout) :: a(lda,*) + end subroutine csyr +#else + module procedure stdlib_csyr +#endif +#:if WITH_QP + module procedure stdlib_wsyr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyr( uplo, n, alpha, x, incx, a, lda ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx,lda,n + complex(dp), intent(in) :: alpha,x(*) + complex(dp), intent(inout) :: a(lda,*) + end subroutine zsyr +#else + module procedure stdlib_zsyr +#endif + end interface syr + + + + !> SYRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite, and + !> provides error bounds and backward error estimates for the solution. + interface syrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr,& + berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + end subroutine csyrfs +#else + module procedure stdlib_csyrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr,& + berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + real(dp), intent(inout) :: x(ldx,*) + end subroutine dsyrfs +#else + module procedure stdlib_dsyrfs +#endif +#:if WITH_QP + module procedure stdlib_qsyrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr,& + berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(sp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + real(sp), intent(inout) :: x(ldx,*) + end subroutine ssyrfs +#else + module procedure stdlib_ssyrfs +#endif +#:if WITH_QP + module procedure stdlib_wsyrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr,& + berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldaf,ldb,ldx,n,nrhs,ipiv(*) + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: a(lda,*),af(ldaf,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + end subroutine zsyrfs +#else + module procedure stdlib_zsyrfs +#endif + end interface syrfs + + + + !> SYSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + interface sysv +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine csysv +#else + module procedure stdlib_csysv +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dsysv +#else + module procedure stdlib_dsysv +#endif +#:if WITH_QP + module procedure stdlib_qsysv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine ssysv +#else + module procedure stdlib_ssysv +#endif +#:if WITH_QP + module procedure stdlib_wsysv +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zsysv +#else + module procedure stdlib_zsysv +#endif + end interface sysv + + + + !> CSYSV computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + interface sysv_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine csysv_aa +#else + module procedure stdlib_csysv_aa +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dsysv_aa +#else + module procedure stdlib_dsysv_aa +#endif +#:if WITH_QP + module procedure stdlib_qsysv_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine ssysv_aa +#else + module procedure stdlib_ssysv_aa +#endif +#:if WITH_QP + module procedure stdlib_wsysv_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zsysv_aa +#else + module procedure stdlib_zsysv_aa +#endif + end interface sysv_aa + + + + !> SYSV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> CSYTRF_RK is called to compute the factorization of a complex + !> symmetric matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine CSYTRS_3. + interface sysv_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: e(*),work(*) + end subroutine csysv_rk +#else + module procedure stdlib_csysv_rk +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: e(*),work(*) + end subroutine dsysv_rk +#else + module procedure stdlib_dsysv_rk +#endif +#:if WITH_QP + module procedure stdlib_qsysv_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: e(*),work(*) + end subroutine ssysv_rk +#else + module procedure stdlib_ssysv_rk +#endif +#:if WITH_QP + module procedure stdlib_wsysv_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: e(*),work(*) + end subroutine zsysv_rk +#else + module procedure stdlib_zsysv_rk +#endif + end interface sysv_rk + + + + !> SYSV_ROOK: computes the solution to a complex system of linear + !> equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> CSYTRF_ROOK is called to compute the factorization of a complex + !> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling CSYTRS_ROOK. + interface sysv_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine csysv_rook +#else + module procedure stdlib_csysv_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dsysv_rook +#else + module procedure stdlib_dsysv_rook +#endif +#:if WITH_QP + module procedure stdlib_qsysv_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine ssysv_rook +#else + module procedure stdlib_ssysv_rook +#endif +#:if WITH_QP + module procedure stdlib_wsysv_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,ldb,lwork,n,nrhs + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zsysv_rook +#else + module procedure stdlib_zsysv_rook +#endif + end interface sysv_rook + + + + !> SYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + interface syswapr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csyswapr( uplo, n, a, lda, i1, i2) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1,i2,lda,n + complex(sp), intent(inout) :: a(lda,n) + end subroutine csyswapr +#else + module procedure stdlib_csyswapr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsyswapr( uplo, n, a, lda, i1, i2) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1,i2,lda,n + real(dp), intent(inout) :: a(lda,n) + end subroutine dsyswapr +#else + module procedure stdlib_dsyswapr +#endif +#:if WITH_QP + module procedure stdlib_qsyswapr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssyswapr( uplo, n, a, lda, i1, i2) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1,i2,lda,n + real(sp), intent(inout) :: a(lda,n) + end subroutine ssyswapr +#else + module procedure stdlib_ssyswapr +#endif +#:if WITH_QP + module procedure stdlib_wsyswapr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsyswapr( uplo, n, a, lda, i1, i2) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1,i2,lda,n + complex(dp), intent(inout) :: a(lda,n) + end subroutine zsyswapr +#else + module procedure stdlib_zsyswapr +#endif + end interface syswapr + + + + !> SYTF2_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + interface sytf2_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytf2_rk( uplo, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*) + end subroutine csytf2_rk +#else + module procedure stdlib_csytf2_rk +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytf2_rk( uplo, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*) + end subroutine dsytf2_rk +#else + module procedure stdlib_dsytf2_rk +#endif +#:if WITH_QP + module procedure stdlib_qsytf2_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytf2_rk( uplo, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*) + end subroutine ssytf2_rk +#else + module procedure stdlib_ssytf2_rk +#endif +#:if WITH_QP + module procedure stdlib_wsytf2_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytf2_rk( uplo, n, a, lda, e, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*) + end subroutine zsytf2_rk +#else + module procedure stdlib_zsytf2_rk +#endif + end interface sytf2_rk + + + + !> SYTF2_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + interface sytf2_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytf2_rook( uplo, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine csytf2_rook +#else + module procedure stdlib_csytf2_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytf2_rook( uplo, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dsytf2_rook +#else + module procedure stdlib_dsytf2_rook +#endif +#:if WITH_QP + module procedure stdlib_qsytf2_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytf2_rook( uplo, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + end subroutine ssytf2_rook +#else + module procedure stdlib_ssytf2_rook +#endif +#:if WITH_QP + module procedure stdlib_wsytf2_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytf2_rook( uplo, n, a, lda, ipiv, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine zsytf2_rook +#else + module procedure stdlib_zsytf2_rook +#endif + end interface sytf2_rook + + + + !> SYTRD: reduces a real symmetric matrix A to real symmetric + !> tridiagonal form T by an orthogonal similarity transformation: + !> Q**T * A * Q = T. + interface sytrd +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*),e(*),tau(*),work(*) + end subroutine dsytrd +#else + module procedure stdlib_dsytrd +#endif +#:if WITH_QP + module procedure stdlib_qsytrd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*),e(*),tau(*),work(*) + end subroutine ssytrd +#else + module procedure stdlib_ssytrd +#endif + end interface sytrd + + + + !> SYTRD_SB2ST: reduces a real symmetric band matrix A to real symmetric + !> tridiagonal form T by a orthogonal similarity transformation: + !> Q**T * A * Q = T. + interface sytrd_sb2st +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrd_sb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, & + lhous, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: stage1,uplo,vect + integer(ilp), intent(in) :: n,kd,ldab,lhous,lwork + integer(ilp), intent(out) :: info + real(dp), intent(out) :: d(*),e(*),hous(*),work(*) + real(dp), intent(inout) :: ab(ldab,*) + end subroutine dsytrd_sb2st +#else + module procedure stdlib_dsytrd_sb2st +#endif +#:if WITH_QP + module procedure stdlib_qsytrd_sb2st +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrd_sb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, & + lhous, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: stage1,uplo,vect + integer(ilp), intent(in) :: n,kd,ldab,lhous,lwork + integer(ilp), intent(out) :: info + real(sp), intent(out) :: d(*),e(*),hous(*),work(*) + real(sp), intent(inout) :: ab(ldab,*) + end subroutine ssytrd_sb2st +#else + module procedure stdlib_ssytrd_sb2st +#endif + end interface sytrd_sb2st + + + + !> SYTRD_SY2SB: reduces a real symmetric matrix A to real symmetric + !> band-diagonal form AB by a orthogonal similarity transformation: + !> Q**T * A * Q = AB. + interface sytrd_sy2sb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrd_sy2sb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldab,lwork,n,kd + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: ab(ldab,*),tau(*),work(*) + end subroutine dsytrd_sy2sb +#else + module procedure stdlib_dsytrd_sy2sb +#endif +#:if WITH_QP + module procedure stdlib_qsytrd_sy2sb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrd_sy2sb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info & + ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldab,lwork,n,kd + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: ab(ldab,*),tau(*),work(*) + end subroutine ssytrd_sy2sb +#else + module procedure stdlib_ssytrd_sy2sb +#endif + end interface sytrd_sy2sb + + + + !> SYTRF: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface sytrf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csytrf +#else + module procedure stdlib_csytrf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + end subroutine dsytrf +#else + module procedure stdlib_dsytrf +#endif +#:if WITH_QP + module procedure stdlib_qsytrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + end subroutine ssytrf +#else + module procedure stdlib_ssytrf +#endif +#:if WITH_QP + module procedure stdlib_wsytrf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytrf +#else + module procedure stdlib_zsytrf +#endif + end interface sytrf + + + + !> SYTRF_AA: computes the factorization of a complex symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a complex symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface sytrf_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,lwork + integer(ilp), intent(out) :: info,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csytrf_aa +#else + module procedure stdlib_csytrf_aa +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,lwork + integer(ilp), intent(out) :: info,ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + end subroutine dsytrf_aa +#else + module procedure stdlib_dsytrf_aa +#endif +#:if WITH_QP + module procedure stdlib_qsytrf_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,lwork + integer(ilp), intent(out) :: info,ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + end subroutine ssytrf_aa +#else + module procedure stdlib_ssytrf_aa +#endif +#:if WITH_QP + module procedure stdlib_wsytrf_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,lda,lwork + integer(ilp), intent(out) :: info,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytrf_aa +#else + module procedure stdlib_zsytrf_aa +#endif + end interface sytrf_aa + + + + !> SYTRF_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + interface sytrf_rk +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*),work(*) + end subroutine csytrf_rk +#else + module procedure stdlib_csytrf_rk +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*),work(*) + end subroutine dsytrf_rk +#else + module procedure stdlib_dsytrf_rk +#endif +#:if WITH_QP + module procedure stdlib_qsytrf_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*),work(*) + end subroutine ssytrf_rk +#else + module procedure stdlib_ssytrf_rk +#endif +#:if WITH_QP + module procedure stdlib_wsytrf_rk +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*),work(*) + end subroutine zsytrf_rk +#else + module procedure stdlib_zsytrf_rk +#endif + end interface sytrf_rk + + + + !> SYTRF_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + interface sytrf_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csytrf_rook +#else + module procedure stdlib_csytrf_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + end subroutine dsytrf_rook +#else + module procedure stdlib_dsytrf_rook +#endif +#:if WITH_QP + module procedure stdlib_qsytrf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + end subroutine ssytrf_rook +#else + module procedure stdlib_ssytrf_rook +#endif +#:if WITH_QP + module procedure stdlib_wsytrf_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info,ipiv(*) + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytrf_rook +#else + module procedure stdlib_zsytrf_rook +#endif + end interface sytrf_rook + + + + !> SYTRI: computes the inverse of a complex symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> CSYTRF. + interface sytri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytri( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csytri +#else + module procedure stdlib_csytri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytri( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + end subroutine dsytri +#else + module procedure stdlib_dsytri +#endif +#:if WITH_QP + module procedure stdlib_qsytri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytri( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + end subroutine ssytri +#else + module procedure stdlib_ssytri +#endif +#:if WITH_QP + module procedure stdlib_wsytri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytri( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytri +#else + module procedure stdlib_zsytri +#endif + end interface sytri + + + + !> SYTRI_ROOK: computes the inverse of a complex symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by CSYTRF_ROOK. + interface sytri_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytri_rook( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine csytri_rook +#else + module procedure stdlib_csytri_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytri_rook( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + end subroutine dsytri_rook +#else + module procedure stdlib_dsytri_rook +#endif +#:if WITH_QP + module procedure stdlib_qsytri_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytri_rook( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + end subroutine ssytri_rook +#else + module procedure stdlib_ssytri_rook +#endif +#:if WITH_QP + module procedure stdlib_wsytri_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytri_rook( uplo, n, a, lda, ipiv, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n,ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytri_rook +#else + module procedure stdlib_zsytri_rook +#endif + end interface sytri_rook + + + + !> SYTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSYTRF. + interface sytrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine csytrs +#else + module procedure stdlib_csytrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dsytrs +#else + module procedure stdlib_dsytrs +#endif +#:if WITH_QP + module procedure stdlib_qsytrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine ssytrs +#else + module procedure stdlib_ssytrs +#endif +#:if WITH_QP + module procedure stdlib_wsytrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zsytrs +#else + module procedure stdlib_zsytrs +#endif + end interface sytrs + + + + !> SYTRS2: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSYTRF and converted by CSYCONV. + interface sytrs2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine csytrs2 +#else + module procedure stdlib_csytrs2 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dsytrs2 +#else + module procedure stdlib_dsytrs2 +#endif +#:if WITH_QP + module procedure stdlib_qsytrs2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine ssytrs2 +#else + module procedure stdlib_ssytrs2 +#endif +#:if WITH_QP + module procedure stdlib_wsytrs2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytrs2 +#else + module procedure stdlib_zsytrs2 +#endif + end interface sytrs2 + + + + !> SYTRS_3: solves a system of linear equations A * X = B with a complex + !> symmetric matrix A using the factorization computed + !> by CSYTRF_RK or CSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + interface sytrs_3 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*),e(*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine csytrs_3 +#else + module procedure stdlib_csytrs_3 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(dp), intent(in) :: a(lda,*),e(*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dsytrs_3 +#else + module procedure stdlib_dsytrs_3 +#endif +#:if WITH_QP + module procedure stdlib_qsytrs_3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(sp), intent(in) :: a(lda,*),e(*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine ssytrs_3 +#else + module procedure stdlib_ssytrs_3 +#endif +#:if WITH_QP + module procedure stdlib_wsytrs_3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*),e(*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zsytrs_3 +#else + module procedure stdlib_zsytrs_3 +#endif + end interface sytrs_3 + + + + !> SYTRS_AA: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by CSYTRF_AA. + interface sytrs_aa +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,nrhs,lda,ldb,lwork,ipiv(*) + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine csytrs_aa +#else + module procedure stdlib_csytrs_aa +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,nrhs,lda,ldb,lwork,ipiv(*) + integer(ilp), intent(out) :: info + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dsytrs_aa +#else + module procedure stdlib_dsytrs_aa +#endif +#:if WITH_QP + module procedure stdlib_qsytrs_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,nrhs,lda,ldb,lwork,ipiv(*) + integer(ilp), intent(out) :: info + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine ssytrs_aa +#else + module procedure stdlib_ssytrs_aa +#endif +#:if WITH_QP + module procedure stdlib_wsytrs_aa +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(in) :: n,nrhs,lda,ldb,lwork,ipiv(*) + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine zsytrs_aa +#else + module procedure stdlib_zsytrs_aa +#endif + end interface sytrs_aa + + + + !> SYTRS_ROOK: solves a system of linear equations A*X = B with + !> a complex symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSYTRF_ROOK. + interface sytrs_rook +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine csytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine csytrs_rook +#else + module procedure stdlib_csytrs_rook +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dsytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dsytrs_rook +#else + module procedure stdlib_dsytrs_rook +#endif +#:if WITH_QP + module procedure stdlib_qsytrs_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ssytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine ssytrs_rook +#else + module procedure stdlib_ssytrs_rook +#endif +#:if WITH_QP + module procedure stdlib_wsytrs_rook +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zsytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs,ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine zsytrs_rook +#else + module procedure stdlib_zsytrs_rook +#endif + end interface sytrs_rook + + + + !> TBCON: estimates the reciprocal of the condition number of a + !> triangular band matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + interface tbcon +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ctbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(out) :: work(*) + end subroutine ctbcon +#else + module procedure stdlib_ctbcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(out) :: rcond,work(*) + real(dp), intent(in) :: ab(ldab,*) + end subroutine dtbcon +#else + module procedure stdlib_dtbcon +#endif +#:if WITH_QP + module procedure stdlib_qtbcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine stbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,iwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,n + real(sp), intent(out) :: rcond,work(*) + real(sp), intent(in) :: ab(ldab,*) + end subroutine stbcon +#else + module procedure stdlib_stbcon +#endif +#:if WITH_QP + module procedure stdlib_wtbcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ztbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,rwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,n + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(out) :: work(*) + end subroutine ztbcon +#else + module procedure stdlib_ztbcon +#endif + end interface tbcon + + + + !> TBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by CTBTRS or some other + !> means before entering this routine. TBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + interface tbrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, & + ferr, berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: ab(ldab,*),b(ldb,*),x(ldx,*) + complex(sp), intent(out) :: work(*) + end subroutine ctbrfs +#else + module procedure stdlib_ctbrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, & + ferr, berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldb,ldx,n,nrhs + real(dp), intent(in) :: ab(ldab,*),b(ldb,*),x(ldx,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + end subroutine dtbrfs +#else + module procedure stdlib_dtbrfs +#endif +#:if WITH_QP + module procedure stdlib_qtbrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, & + ferr, berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: kd,ldab,ldb,ldx,n,nrhs + real(sp), intent(in) :: ab(ldab,*),b(ldb,*),x(ldx,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + end subroutine stbrfs +#else + module procedure stdlib_stbrfs +#endif +#:if WITH_QP + module procedure stdlib_wtbrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, & + ferr, berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: ab(ldab,*),b(ldb,*),x(ldx,*) + complex(dp), intent(out) :: work(*) + end subroutine ztbrfs +#else + module procedure stdlib_ztbrfs +#endif + end interface tbrfs + + + + !> TBTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by-NRHS matrix. A check is made to verify that A is nonsingular. + interface tbtrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine ctbtrs +#else + module procedure stdlib_ctbtrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dtbtrs +#else + module procedure stdlib_dtbtrs +#endif +#:if WITH_QP + module procedure stdlib_qtbtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine stbtrs +#else + module procedure stdlib_stbtrs +#endif +#:if WITH_QP + module procedure stdlib_wtbtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd,ldab,ldb,n,nrhs + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine ztbtrs +#else + module procedure stdlib_ztbtrs +#endif + end interface tbtrs + + + + !> Level 3 BLAS like routine for A in RFP Format. + !> TFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**H. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + interface tfsm +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,diag,side,trans,uplo + integer(ilp), intent(in) :: ldb,m,n + complex(sp), intent(in) :: alpha,a(0:*) + complex(sp), intent(inout) :: b(0:ldb-1,0:*) + end subroutine ctfsm +#else + module procedure stdlib_ctfsm +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,diag,side,trans,uplo + integer(ilp), intent(in) :: ldb,m,n + real(dp), intent(in) :: alpha,a(0:*) + real(dp), intent(inout) :: b(0:ldb-1,0:*) + end subroutine dtfsm +#else + module procedure stdlib_dtfsm +#endif +#:if WITH_QP + module procedure stdlib_qtfsm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,diag,side,trans,uplo + integer(ilp), intent(in) :: ldb,m,n + real(sp), intent(in) :: alpha,a(0:*) + real(sp), intent(inout) :: b(0:ldb-1,0:*) + end subroutine stfsm +#else + module procedure stdlib_stfsm +#endif +#:if WITH_QP + module procedure stdlib_wtfsm +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,diag,side,trans,uplo + integer(ilp), intent(in) :: ldb,m,n + complex(dp), intent(in) :: alpha,a(0:*) + complex(dp), intent(inout) :: b(0:ldb-1,0:*) + end subroutine ztfsm +#else + module procedure stdlib_ztfsm +#endif + end interface tfsm + + + + !> TFTRI: computes the inverse of a triangular matrix A stored in RFP + !> format. + !> This is a Level 3 BLAS version of the algorithm. + interface tftri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctftri( transr, uplo, diag, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo,diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: a(0:*) + end subroutine ctftri +#else + module procedure stdlib_ctftri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtftri( transr, uplo, diag, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo,diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: a(0:*) + end subroutine dtftri +#else + module procedure stdlib_dtftri +#endif +#:if WITH_QP + module procedure stdlib_qtftri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stftri( transr, uplo, diag, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo,diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: a(0:*) + end subroutine stftri +#else + module procedure stdlib_stftri +#endif +#:if WITH_QP + module procedure stdlib_wtftri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztftri( transr, uplo, diag, n, a, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo,diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: a(0:*) + end subroutine ztftri +#else + module procedure stdlib_ztftri +#endif + end interface tftri + + + + !> TFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + interface tfttp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctfttp( transr, uplo, n, arf, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(out) :: ap(0:*) + complex(sp), intent(in) :: arf(0:*) + end subroutine ctfttp +#else + module procedure stdlib_ctfttp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtfttp( transr, uplo, n, arf, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: ap(0:*) + real(dp), intent(in) :: arf(0:*) + end subroutine dtfttp +#else + module procedure stdlib_dtfttp +#endif +#:if WITH_QP + module procedure stdlib_qtfttp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stfttp( transr, uplo, n, arf, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: ap(0:*) + real(sp), intent(in) :: arf(0:*) + end subroutine stfttp +#else + module procedure stdlib_stfttp +#endif +#:if WITH_QP + module procedure stdlib_wtfttp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztfttp( transr, uplo, n, arf, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(out) :: ap(0:*) + complex(dp), intent(in) :: arf(0:*) + end subroutine ztfttp +#else + module procedure stdlib_ztfttp +#endif + end interface tfttp + + + + !> TFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + interface tfttr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctfttr( transr, uplo, n, arf, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(sp), intent(out) :: a(0:lda-1,0:*) + complex(sp), intent(in) :: arf(0:*) + end subroutine ctfttr +#else + module procedure stdlib_ctfttr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtfttr( transr, uplo, n, arf, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(dp), intent(out) :: a(0:lda-1,0:*) + real(dp), intent(in) :: arf(0:*) + end subroutine dtfttr +#else + module procedure stdlib_dtfttr +#endif +#:if WITH_QP + module procedure stdlib_qtfttr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stfttr( transr, uplo, n, arf, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(sp), intent(out) :: a(0:lda-1,0:*) + real(sp), intent(in) :: arf(0:*) + end subroutine stfttr +#else + module procedure stdlib_stfttr +#endif +#:if WITH_QP + module procedure stdlib_wtfttr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztfttr( transr, uplo, n, arf, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(dp), intent(out) :: a(0:lda-1,0:*) + complex(dp), intent(in) :: arf(0:*) + end subroutine ztfttr +#else + module procedure stdlib_ztfttr +#endif + end interface tfttr + + + + !> TGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of complex matrices (S,P), where S and P are upper triangular. + !> Matrix pairs of this type are produced by the generalized Schur + !> factorization of a complex matrix pair (A,B): + !> A = Q*S*Z**H, B = Q*P*Z**H + !> as computed by CGGHRD + CHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal elements of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the unitary factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + interface tgevc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr,& + mm, m, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldp,lds,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: p(ldp,*),s(lds,*) + complex(sp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + complex(sp), intent(out) :: work(*) + end subroutine ctgevc +#else + module procedure stdlib_ctgevc +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr,& + mm, m, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldp,lds,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(in) :: p(ldp,*),s(lds,*) + real(dp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + real(dp), intent(out) :: work(*) + end subroutine dtgevc +#else + module procedure stdlib_dtgevc +#endif +#:if WITH_QP + module procedure stdlib_qtgevc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr,& + mm, m, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldp,lds,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(in) :: p(ldp,*),s(lds,*) + real(sp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + real(sp), intent(out) :: work(*) + end subroutine stgevc +#else + module procedure stdlib_stgevc +#endif +#:if WITH_QP + module procedure stdlib_wtgevc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr,& + mm, m, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldp,lds,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: p(ldp,*),s(lds,*) + complex(dp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + complex(dp), intent(out) :: work(*) + end subroutine ztgevc +#else + module procedure stdlib_ztgevc +#endif + end interface tgevc + + + + !> TGEXC: reorders the generalized Schur decomposition of a complex + !> matrix pair (A,B), using an unitary equivalence transformation + !> (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with + !> row index IFST is moved to row ILST. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + interface tgexc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz + integer(ilp), intent(in) :: ifst,lda,ldb,ldq,ldz,n + integer(ilp), intent(inout) :: ilst + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine ctgexc +#else + module procedure stdlib_ctgexc +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst,& + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz + integer(ilp), intent(inout) :: ifst,ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldq,ldz,lwork,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + real(dp), intent(out) :: work(*) + end subroutine dtgexc +#else + module procedure stdlib_dtgexc +#endif +#:if WITH_QP + module procedure stdlib_qtgexc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst,& + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz + integer(ilp), intent(inout) :: ifst,ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldq,ldz,lwork,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + real(sp), intent(out) :: work(*) + end subroutine stgexc +#else + module procedure stdlib_stgexc +#endif +#:if WITH_QP + module procedure stdlib_wtgexc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz + integer(ilp), intent(in) :: ifst,lda,ldb,ldq,ldz,n + integer(ilp), intent(inout) :: ilst + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine ztgexc +#else + module procedure stdlib_ztgexc +#endif + end interface tgexc + + + + !> TGSEN: reorders the generalized Schur decomposition of a complex + !> matrix pair (A, B) (in terms of an unitary equivalence trans- + !> formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the pair (A,B). The leading + !> columns of Q and Z form unitary bases of the corresponding left and + !> right eigenspaces (deflating subspaces). (A, B) must be in + !> generalized Schur canonical form, that is, A and B are both upper + !> triangular. + !> TGSEN also computes the generalized eigenvalues + !> w(j)= ALPHA(j) / BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, the routine computes estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + interface tgsen +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, & + q, ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz,select(*) + integer(ilp), intent(in) :: ijob,lda,ldb,ldq,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,iwork(*) + real(sp), intent(out) :: pl,pr,dif(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + complex(sp), intent(out) :: alpha(*),beta(*),work(*) + end subroutine ctgsen +#else + module procedure stdlib_ctgsen +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, & + alphai, beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz,select(*) + integer(ilp), intent(in) :: ijob,lda,ldb,ldq,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,iwork(*) + real(dp), intent(out) :: pl,pr,alphai(*),alphar(*),beta(*),dif(*),work(*) + + real(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine dtgsen +#else + module procedure stdlib_dtgsen +#endif +#:if WITH_QP + module procedure stdlib_qtgsen +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, & + alphai, beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz,select(*) + integer(ilp), intent(in) :: ijob,lda,ldb,ldq,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,iwork(*) + real(sp), intent(out) :: pl,pr,alphai(*),alphar(*),beta(*),dif(*),work(*) + + real(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + end subroutine stgsen +#else + module procedure stdlib_stgsen +#endif +#:if WITH_QP + module procedure stdlib_wtgsen +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, & + q, ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + logical(lk), intent(in) :: wantq,wantz,select(*) + integer(ilp), intent(in) :: ijob,lda,ldb,ldq,ldz,liwork,lwork,n + integer(ilp), intent(out) :: info,m,iwork(*) + real(dp), intent(out) :: pl,pr,dif(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),z(ldz,*) + complex(dp), intent(out) :: alpha(*),beta(*),work(*) + end subroutine ztgsen +#else + module procedure stdlib_ztgsen +#endif + end interface tgsen + + + + !> TGSJA: computes the generalized singular value decomposition (GSVD) + !> of two complex upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine CGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), + !> where U, V and Q are unitary matrices. + !> R is a nonsingular upper triangular matrix, and D1 + !> and D2 are ``diagonal'' matrices, which are of the following + !> structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the unitary transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + interface tgsja +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb,& + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobq,jobu,jobv + integer(ilp), intent(out) :: info,ncycle + integer(ilp), intent(in) :: k,l,lda,ldb,ldq,ldu,ldv,m,n,p + real(sp), intent(in) :: tola,tolb + real(sp), intent(out) :: alpha(*),beta(*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),u(ldu,*),v(ldv,*) + + complex(sp), intent(out) :: work(*) + end subroutine ctgsja +#else + module procedure stdlib_ctgsja +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb,& + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobq,jobu,jobv + integer(ilp), intent(out) :: info,ncycle + integer(ilp), intent(in) :: k,l,lda,ldb,ldq,ldu,ldv,m,n,p + real(dp), intent(in) :: tola,tolb + real(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),u(ldu,*),v(ldv,*) + + real(dp), intent(out) :: alpha(*),beta(*),work(*) + end subroutine dtgsja +#else + module procedure stdlib_dtgsja +#endif +#:if WITH_QP + module procedure stdlib_qtgsja +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb,& + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobq,jobu,jobv + integer(ilp), intent(out) :: info,ncycle + integer(ilp), intent(in) :: k,l,lda,ldb,ldq,ldu,ldv,m,n,p + real(sp), intent(in) :: tola,tolb + real(sp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),u(ldu,*),v(ldv,*) + + real(sp), intent(out) :: alpha(*),beta(*),work(*) + end subroutine stgsja +#else + module procedure stdlib_stgsja +#endif +#:if WITH_QP + module procedure stdlib_wtgsja +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb,& + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobq,jobu,jobv + integer(ilp), intent(out) :: info,ncycle + integer(ilp), intent(in) :: k,l,lda,ldb,ldq,ldu,ldv,m,n,p + real(dp), intent(in) :: tola,tolb + real(dp), intent(out) :: alpha(*),beta(*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*),q(ldq,*),u(ldu,*),v(ldv,*) + + complex(dp), intent(out) :: work(*) + end subroutine ztgsja +#else + module procedure stdlib_ztgsja +#endif + end interface tgsja + + + + !> TGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B). + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + interface tgsna +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + s, dif, mm, m, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: dif(*),s(*) + complex(sp), intent(in) :: a(lda,*),b(ldb,*),vl(ldvl,*),vr(ldvr,*) + complex(sp), intent(out) :: work(*) + end subroutine ctgsna +#else + module procedure stdlib_ctgsna +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + s, dif, mm, m, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(in) :: a(lda,*),b(ldb,*),vl(ldvl,*),vr(ldvr,*) + real(dp), intent(out) :: dif(*),s(*),work(*) + end subroutine dtgsna +#else + module procedure stdlib_dtgsna +#endif +#:if WITH_QP + module procedure stdlib_qtgsna +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + s, dif, mm, m, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(in) :: a(lda,*),b(ldb,*),vl(ldvl,*),vr(ldvr,*) + real(sp), intent(out) :: dif(*),s(*),work(*) + end subroutine stgsna +#else + module procedure stdlib_stgsna +#endif +#:if WITH_QP + module procedure stdlib_wtgsna +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + s, dif, mm, m, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: lda,ldb,ldvl,ldvr,lwork,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: dif(*),s(*) + complex(dp), intent(in) :: a(lda,*),b(ldb,*),vl(ldvl,*),vr(ldvr,*) + complex(dp), intent(out) :: work(*) + end subroutine ztgsna +#else + module procedure stdlib_ztgsna +#endif + end interface tgsna + + + + !> TGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with complex entries. A, B, D and E are upper + !> triangular (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 + !> is an output scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale*b, where Z + !> is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Here Ix is the identity matrix of size x and X**H is the conjugate + !> transpose of X. Kron(X, Y) is the Kronecker product between the + !> matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case (TRANS = 'C') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using CLACON. + !> If IJOB >= 1, TGSYL computes a Frobenius norm-based estimate of + !> Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. + !> This is a level-3 BLAS algorithm. + interface tgsyl +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, & + f, ldf, scale, dif, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob,lda,ldb,ldc,ldd,lde,ldf,lwork,m,n + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(out) :: dif,scale + complex(sp), intent(in) :: a(lda,*),b(ldb,*),d(ldd,*),e(lde,*) + complex(sp), intent(inout) :: c(ldc,*),f(ldf,*) + complex(sp), intent(out) :: work(*) + end subroutine ctgsyl +#else + module procedure stdlib_ctgsyl +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, & + f, ldf, scale, dif, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob,lda,ldb,ldc,ldd,lde,ldf,lwork,m,n + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(out) :: dif,scale,work(*) + real(dp), intent(in) :: a(lda,*),b(ldb,*),d(ldd,*),e(lde,*) + real(dp), intent(inout) :: c(ldc,*),f(ldf,*) + end subroutine dtgsyl +#else + module procedure stdlib_dtgsyl +#endif +#:if WITH_QP + module procedure stdlib_qtgsyl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, & + f, ldf, scale, dif, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob,lda,ldb,ldc,ldd,lde,ldf,lwork,m,n + integer(ilp), intent(out) :: info,iwork(*) + real(sp), intent(out) :: dif,scale,work(*) + real(sp), intent(in) :: a(lda,*),b(ldb,*),d(ldd,*),e(lde,*) + real(sp), intent(inout) :: c(ldc,*),f(ldf,*) + end subroutine stgsyl +#else + module procedure stdlib_stgsyl +#endif +#:if WITH_QP + module procedure stdlib_wtgsyl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, & + f, ldf, scale, dif, work, lwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob,lda,ldb,ldc,ldd,lde,ldf,lwork,m,n + integer(ilp), intent(out) :: info,iwork(*) + real(dp), intent(out) :: dif,scale + complex(dp), intent(in) :: a(lda,*),b(ldb,*),d(ldd,*),e(lde,*) + complex(dp), intent(inout) :: c(ldc,*),f(ldf,*) + complex(dp), intent(out) :: work(*) + end subroutine ztgsyl +#else + module procedure stdlib_ztgsyl +#endif + end interface tgsyl + + + + !> TPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + interface tpcon +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ctpcon( norm, uplo, diag, n, ap, rcond, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + end subroutine ctpcon +#else + module procedure stdlib_ctpcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtpcon( norm, uplo, diag, n, ap, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n + real(dp), intent(out) :: rcond,work(*) + real(dp), intent(in) :: ap(*) + end subroutine dtpcon +#else + module procedure stdlib_dtpcon +#endif +#:if WITH_QP + module procedure stdlib_qtpcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine stpcon( norm, uplo, diag, n, ap, rcond, work, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: n + real(sp), intent(out) :: rcond,work(*) + real(sp), intent(in) :: ap(*) + end subroutine stpcon +#else + module procedure stdlib_stpcon +#endif +#:if WITH_QP + module procedure stdlib_wtpcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ztpcon( norm, uplo, diag, n, ap, rcond, work, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + end subroutine ztpcon +#else + module procedure stdlib_ztpcon +#endif + end interface tpcon + + + + !> TPLQT: computes a blocked LQ factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + interface tplqt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,mb + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: t(ldt,*),work(*) + end subroutine ctplqt +#else + module procedure stdlib_ctplqt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,mb + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: t(ldt,*),work(*) + end subroutine dtplqt +#else + module procedure stdlib_dtplqt +#endif +#:if WITH_QP + module procedure stdlib_qtplqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,mb + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: t(ldt,*),work(*) + end subroutine stplqt +#else + module procedure stdlib_stplqt +#endif +#:if WITH_QP + module procedure stdlib_wtplqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,mb + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: t(ldt,*),work(*) + end subroutine ztplqt +#else + module procedure stdlib_ztplqt +#endif + end interface tplqt + + + + !> TPLQT2: computes a LQ a factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + interface tplqt2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: t(ldt,*) + end subroutine ctplqt2 +#else + module procedure stdlib_ctplqt2 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: t(ldt,*) + end subroutine dtplqt2 +#else + module procedure stdlib_dtplqt2 +#endif +#:if WITH_QP + module procedure stdlib_qtplqt2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: t(ldt,*) + end subroutine stplqt2 +#else + module procedure stdlib_stplqt2 +#endif +#:if WITH_QP + module procedure stdlib_wtplqt2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: t(ldt,*) + end subroutine ztplqt2 +#else + module procedure stdlib_ztplqt2 +#endif + end interface tplqt2 + + + + !> TPMLQT: applies a complex unitary matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + interface tpmlqt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,mb,ldt + complex(sp), intent(in) :: v(ldv,*),t(ldt,*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine ctpmlqt +#else + module procedure stdlib_ctpmlqt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,mb,ldt + real(dp), intent(in) :: v(ldv,*),t(ldt,*) + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dtpmlqt +#else + module procedure stdlib_dtpmlqt +#endif +#:if WITH_QP + module procedure stdlib_qtpmlqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,mb,ldt + real(sp), intent(in) :: v(ldv,*),t(ldt,*) + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine stpmlqt +#else + module procedure stdlib_stpmlqt +#endif +#:if WITH_QP + module procedure stdlib_wtpmlqt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,mb,ldt + complex(dp), intent(in) :: v(ldv,*),t(ldt,*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine ztpmlqt +#else + module procedure stdlib_ztpmlqt +#endif + end interface tpmlqt + + + + !> TPMQRT: applies a complex orthogonal matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + interface tpmqrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,nb,ldt + complex(sp), intent(in) :: v(ldv,*),t(ldt,*) + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: work(*) + end subroutine ctpmqrt +#else + module procedure stdlib_ctpmqrt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,nb,ldt + real(dp), intent(in) :: v(ldv,*),t(ldt,*) + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: work(*) + end subroutine dtpmqrt +#else + module procedure stdlib_dtpmqrt +#endif +#:if WITH_QP + module procedure stdlib_qtpmqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,nb,ldt + real(sp), intent(in) :: v(ldv,*),t(ldt,*) + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: work(*) + end subroutine stpmqrt +#else + module procedure stdlib_stpmqrt +#endif +#:if WITH_QP + module procedure stdlib_wtpmqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, & + ldb, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,ldv,lda,ldb,m,n,l,nb,ldt + complex(dp), intent(in) :: v(ldv,*),t(ldt,*) + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: work(*) + end subroutine ztpmqrt +#else + module procedure stdlib_ztpmqrt +#endif + end interface tpmqrt + + + + !> TPQRT: computes a blocked QR factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + interface tpqrt +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,nb + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: t(ldt,*),work(*) + end subroutine ctpqrt +#else + module procedure stdlib_ctpqrt +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,nb + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: t(ldt,*),work(*) + end subroutine dtpqrt +#else + module procedure stdlib_dtpqrt +#endif +#:if WITH_QP + module procedure stdlib_qtpqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,nb + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: t(ldt,*),work(*) + end subroutine stpqrt +#else + module procedure stdlib_stpqrt +#endif +#:if WITH_QP + module procedure stdlib_wtpqrt +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l,nb + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: t(ldt,*),work(*) + end subroutine ztpqrt +#else + module procedure stdlib_ztpqrt +#endif + end interface tpqrt + + + + !> TPQRT2: computes a QR factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + interface tpqrt2 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(out) :: t(ldt,*) + end subroutine ctpqrt2 +#else + module procedure stdlib_ctpqrt2 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(out) :: t(ldt,*) + end subroutine dtpqrt2 +#else + module procedure stdlib_dtpqrt2 +#endif +#:if WITH_QP + module procedure stdlib_qtpqrt2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(out) :: t(ldt,*) + end subroutine stpqrt2 +#else + module procedure stdlib_stpqrt2 +#endif +#:if WITH_QP + module procedure stdlib_wtpqrt2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldt,n,m,l + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(out) :: t(ldt,*) + end subroutine ztpqrt2 +#else + module procedure stdlib_ztpqrt2 +#endif + end interface tpqrt2 + + + + !> TPRFB: applies a complex "triangular-pentagonal" block reflector H or its + !> conjugate transpose H**H to a complex matrix C, which is composed of two + !> blocks A and B, either from the left or right. + interface tprfb +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,lda,ldb,ldt,ldv,ldwork,m,n + complex(sp), intent(inout) :: a(lda,*),b(ldb,*) + complex(sp), intent(in) :: t(ldt,*),v(ldv,*) + complex(sp), intent(out) :: work(ldwork,*) + end subroutine ctprfb +#else + module procedure stdlib_ctprfb +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,lda,ldb,ldt,ldv,ldwork,m,n + real(dp), intent(inout) :: a(lda,*),b(ldb,*) + real(dp), intent(in) :: t(ldt,*),v(ldv,*) + real(dp), intent(out) :: work(ldwork,*) + end subroutine dtprfb +#else + module procedure stdlib_dtprfb +#endif +#:if WITH_QP + module procedure stdlib_qtprfb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,lda,ldb,ldt,ldv,ldwork,m,n + real(sp), intent(inout) :: a(lda,*),b(ldb,*) + real(sp), intent(in) :: t(ldt,*),v(ldv,*) + real(sp), intent(out) :: work(ldwork,*) + end subroutine stprfb +#else + module procedure stdlib_stprfb +#endif +#:if WITH_QP + module procedure stdlib_wtprfb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: direct,side,storev,trans + integer(ilp), intent(in) :: k,l,lda,ldb,ldt,ldv,ldwork,m,n + complex(dp), intent(inout) :: a(lda,*),b(ldb,*) + complex(dp), intent(in) :: t(ldt,*),v(ldv,*) + complex(dp), intent(out) :: work(ldwork,*) + end subroutine ztprfb +#else + module procedure stdlib_ztprfb +#endif + end interface tprfb + + + + !> TPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by CTPTRS or some other + !> means before entering this routine. TPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + interface tprfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: ap(*),b(ldb,*),x(ldx,*) + complex(sp), intent(out) :: work(*) + end subroutine ctprfs +#else + module procedure stdlib_ctprfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(in) :: ap(*),b(ldb,*),x(ldx,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + end subroutine dtprfs +#else + module procedure stdlib_dtprfs +#endif +#:if WITH_QP + module procedure stdlib_qtprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(sp), intent(in) :: ap(*),b(ldb,*),x(ldx,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + end subroutine stprfs +#else + module procedure stdlib_stprfs +#endif +#:if WITH_QP + module procedure stdlib_wtprfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: ap(*),b(ldb,*),x(ldx,*) + complex(dp), intent(out) :: work(*) + end subroutine ztprfs +#else + module procedure stdlib_ztprfs +#endif + end interface tprfs + + + + !> TPTRI: computes the inverse of a complex upper or lower triangular + !> matrix A stored in packed format. + interface tptri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctptri( uplo, diag, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(inout) :: ap(*) + end subroutine ctptri +#else + module procedure stdlib_ctptri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtptri( uplo, diag, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: ap(*) + end subroutine dtptri +#else + module procedure stdlib_dtptri +#endif +#:if WITH_QP + module procedure stdlib_qtptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stptri( uplo, diag, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: ap(*) + end subroutine stptri +#else + module procedure stdlib_stptri +#endif +#:if WITH_QP + module procedure stdlib_wtptri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztptri( uplo, diag, n, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(inout) :: ap(*) + end subroutine ztptri +#else + module procedure stdlib_ztptri +#endif + end interface tptri + + + + !> TPTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + interface tptrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine ctptrs +#else + module procedure stdlib_ctptrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dtptrs +#else + module procedure stdlib_dtptrs +#endif +#:if WITH_QP + module procedure stdlib_qtptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine stptrs +#else + module procedure stdlib_stptrs +#endif +#:if WITH_QP + module procedure stdlib_wtptrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb,n,nrhs + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine ztptrs +#else + module procedure stdlib_ztptrs +#endif + end interface tptrs + + + + !> TPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + interface tpttf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctpttf( transr, uplo, n, ap, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(sp), intent(in) :: ap(0:*) + complex(sp), intent(out) :: arf(0:*) + end subroutine ctpttf +#else + module procedure stdlib_ctpttf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtpttf( transr, uplo, n, ap, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: ap(0:*) + real(dp), intent(out) :: arf(0:*) + end subroutine dtpttf +#else + module procedure stdlib_dtpttf +#endif +#:if WITH_QP + module procedure stdlib_qtpttf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stpttf( transr, uplo, n, ap, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: ap(0:*) + real(sp), intent(out) :: arf(0:*) + end subroutine stpttf +#else + module procedure stdlib_stpttf +#endif +#:if WITH_QP + module procedure stdlib_wtpttf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztpttf( transr, uplo, n, ap, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + complex(dp), intent(in) :: ap(0:*) + complex(dp), intent(out) :: arf(0:*) + end subroutine ztpttf +#else + module procedure stdlib_ztpttf +#endif + end interface tpttf + + + + !> TPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + interface tpttr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctpttr( uplo, n, ap, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(sp), intent(out) :: a(lda,*) + complex(sp), intent(in) :: ap(*) + end subroutine ctpttr +#else + module procedure stdlib_ctpttr +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtpttr( uplo, n, ap, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(dp), intent(out) :: a(lda,*) + real(dp), intent(in) :: ap(*) + end subroutine dtpttr +#else + module procedure stdlib_dtpttr +#endif +#:if WITH_QP + module procedure stdlib_qtpttr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stpttr( uplo, n, ap, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(sp), intent(out) :: a(lda,*) + real(sp), intent(in) :: ap(*) + end subroutine stpttr +#else + module procedure stdlib_stpttr +#endif +#:if WITH_QP + module procedure stdlib_wtpttr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztpttr( uplo, n, ap, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(dp), intent(out) :: a(lda,*) + complex(dp), intent(in) :: ap(*) + end subroutine ztpttr +#else + module procedure stdlib_ztpttr +#endif + end interface tpttr + + + + !> TRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + interface trcon +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ctrcon( norm, uplo, diag, n, a, lda, rcond, work,rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: rcond,rwork(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + end subroutine ctrcon +#else + module procedure stdlib_ctrcon +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtrcon( norm, uplo, diag, n, a, lda, rcond, work,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: rcond,work(*) + real(dp), intent(in) :: a(lda,*) + end subroutine dtrcon +#else + module procedure stdlib_dtrcon +#endif +#:if WITH_QP + module procedure stdlib_qtrcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine strcon( norm, uplo, diag, n, a, lda, rcond, work,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,n + real(sp), intent(out) :: rcond,work(*) + real(sp), intent(in) :: a(lda,*) + end subroutine strcon +#else + module procedure stdlib_strcon +#endif +#:if WITH_QP + module procedure stdlib_wtrcon +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ztrcon( norm, uplo, diag, n, a, lda, rcond, work,rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,norm,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(out) :: rcond,rwork(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + end subroutine ztrcon +#else + module procedure stdlib_ztrcon +#endif + end interface trcon + + + + !> TREVC: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + interface trevc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + complex(sp), intent(out) :: work(*) + end subroutine ctrevc +#else + module procedure stdlib_ctrevc +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,mm,n + logical(lk), intent(inout) :: select(*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + real(dp), intent(out) :: work(*) + end subroutine dtrevc +#else + module procedure stdlib_dtrevc +#endif +#:if WITH_QP + module procedure stdlib_qtrevc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,mm,n + logical(lk), intent(inout) :: select(*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + real(sp), intent(out) :: work(*) + end subroutine strevc +#else + module procedure stdlib_strevc +#endif +#:if WITH_QP + module procedure stdlib_wtrevc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + complex(dp), intent(out) :: work(*) + end subroutine ztrevc +#else + module procedure stdlib_ztrevc +#endif + end interface trevc + + + + !> TREVC3: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + interface trevc3 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrevc3( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m,& + work, lwork, rwork, lrwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,lwork,lrwork,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + complex(sp), intent(out) :: work(*) + end subroutine ctrevc3 +#else + module procedure stdlib_ctrevc3 +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrevc3( side, howmny, select, n, t, ldt, vl, ldvl,vr, ldvr, mm, m,& + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,lwork,mm,n + logical(lk), intent(inout) :: select(*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + real(dp), intent(out) :: work(*) + end subroutine dtrevc3 +#else + module procedure stdlib_dtrevc3 +#endif +#:if WITH_QP + module procedure stdlib_qtrevc3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strevc3( side, howmny, select, n, t, ldt, vl, ldvl,vr, ldvr, mm, m,& + work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,lwork,mm,n + logical(lk), intent(inout) :: select(*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(inout) :: vl(ldvl,*),vr(ldvr,*) + real(sp), intent(out) :: work(*) + end subroutine strevc3 +#else + module procedure stdlib_strevc3 +#endif +#:if WITH_QP + module procedure stdlib_wtrevc3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrevc3( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m,& + work, lwork, rwork, lrwork, info) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,side + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,lwork,lrwork,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + complex(dp), intent(out) :: work(*) + end subroutine ztrevc3 +#else + module procedure stdlib_ztrevc3 +#endif + end interface trevc3 + + + + !> TREXC: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that the diagonal element of T with row index IFST + !> is moved to row ILST. + !> The Schur form T is reordered by a unitary similarity transformation + !> Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by + !> postmultplying it with Z. + interface trexc +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrexc( compq, n, t, ldt, q, ldq, ifst, ilst, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq + integer(ilp), intent(in) :: ifst,ilst,ldq,ldt,n + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: q(ldq,*),t(ldt,*) + end subroutine ctrexc +#else + module procedure stdlib_ctrexc +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtrexc( compq, n, t, ldt, q, ldq, ifst, ilst, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq + integer(ilp), intent(inout) :: ifst,ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq,ldt,n + real(dp), intent(inout) :: q(ldq,*),t(ldt,*) + real(dp), intent(out) :: work(*) + end subroutine dtrexc +#else + module procedure stdlib_dtrexc +#endif +#:if WITH_QP + module procedure stdlib_qtrexc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine strexc( compq, n, t, ldt, q, ldq, ifst, ilst, work,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq + integer(ilp), intent(inout) :: ifst,ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq,ldt,n + real(sp), intent(inout) :: q(ldq,*),t(ldt,*) + real(sp), intent(out) :: work(*) + end subroutine strexc +#else + module procedure stdlib_strexc +#endif +#:if WITH_QP + module procedure stdlib_wtrexc +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrexc( compq, n, t, ldt, q, ldq, ifst, ilst, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq + integer(ilp), intent(in) :: ifst,ilst,ldq,ldt,n + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: q(ldq,*),t(ldt,*) + end subroutine ztrexc +#else + module procedure stdlib_ztrexc +#endif + end interface trexc + + + + !> TRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by CTRTRS or some other + !> means before entering this routine. TRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + interface trrfs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, & + berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldx,n,nrhs + real(sp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(sp), intent(in) :: a(lda,*),b(ldb,*),x(ldx,*) + complex(sp), intent(out) :: work(*) + end subroutine ctrrfs +#else + module procedure stdlib_ctrrfs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, & + berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldb,ldx,n,nrhs + real(dp), intent(in) :: a(lda,*),b(ldb,*),x(ldx,*) + real(dp), intent(out) :: berr(*),ferr(*),work(*) + end subroutine dtrrfs +#else + module procedure stdlib_dtrrfs +#endif +#:if WITH_QP + module procedure stdlib_qtrrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, & + berr, work, iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: lda,ldb,ldx,n,nrhs + real(sp), intent(in) :: a(lda,*),b(ldb,*),x(ldx,*) + real(sp), intent(out) :: berr(*),ferr(*),work(*) + end subroutine strrfs +#else + module procedure stdlib_strrfs +#endif +#:if WITH_QP + module procedure stdlib_wtrrfs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, & + berr, work, rwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,ldx,n,nrhs + real(dp), intent(out) :: berr(*),ferr(*),rwork(*) + complex(dp), intent(in) :: a(lda,*),b(ldb,*),x(ldx,*) + complex(dp), intent(out) :: work(*) + end subroutine ztrrfs +#else + module procedure stdlib_ztrrfs +#endif + end interface trrfs + + + + !> TRSEN: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in + !> the leading positions on the diagonal of the upper triangular matrix + !> T, and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + interface trsen +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ctrsen( job, compq, select, n, t, ldt, q, ldq, w, m, s,sep, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,job + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldq,ldt,lwork,n + real(sp), intent(out) :: s,sep + logical(lk), intent(in) :: select(*) + complex(sp), intent(inout) :: q(ldq,*),t(ldt,*) + complex(sp), intent(out) :: w(*),work(*) + end subroutine ctrsen +#else + module procedure stdlib_ctrsen +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtrsen( job, compq, select, n, t, ldt, q, ldq, wr, wi,m, s, sep, work, & + lwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: ldq,ldt,liwork,lwork,n + real(dp), intent(out) :: s,sep,wi(*),work(*),wr(*) + logical(lk), intent(in) :: select(*) + real(dp), intent(inout) :: q(ldq,*),t(ldt,*) + end subroutine dtrsen +#else + module procedure stdlib_dtrsen +#endif +#:if WITH_QP + module procedure stdlib_qtrsen +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine strsen( job, compq, select, n, t, ldt, q, ldq, wr, wi,m, s, sep, work, & + lwork, iwork, liwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: ldq,ldt,liwork,lwork,n + real(sp), intent(out) :: s,sep,wi(*),work(*),wr(*) + logical(lk), intent(in) :: select(*) + real(sp), intent(inout) :: q(ldq,*),t(ldt,*) + end subroutine strsen +#else + module procedure stdlib_strsen +#endif +#:if WITH_QP + module procedure stdlib_wtrsen +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ztrsen( job, compq, select, n, t, ldt, q, ldq, w, m, s,sep, work, lwork,& + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: compq,job + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldq,ldt,lwork,n + real(dp), intent(out) :: s,sep + logical(lk), intent(in) :: select(*) + complex(dp), intent(inout) :: q(ldq,*),t(ldt,*) + complex(dp), intent(out) :: w(*),work(*) + end subroutine ztrsen +#else + module procedure stdlib_ztrsen +#endif + end interface trsen + + + + !> TRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a complex upper triangular + !> matrix T (or of any matrix Q*T*Q**H with Q unitary). + interface trsna +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, & + mm, m, work, ldwork, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,ldwork,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*),s(*),sep(*) + complex(sp), intent(in) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + complex(sp), intent(out) :: work(ldwork,*) + end subroutine ctrsna +#else + module procedure stdlib_ctrsna +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm, & + m, work, ldwork, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: ldt,ldvl,ldvr,ldwork,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: s(*),sep(*),work(ldwork,*) + real(dp), intent(in) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + end subroutine dtrsna +#else + module procedure stdlib_dtrsna +#endif +#:if WITH_QP + module procedure stdlib_qtrsna +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine strsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm, & + m, work, ldwork, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m,iwork(*) + integer(ilp), intent(in) :: ldt,ldvl,ldvr,ldwork,mm,n + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: s(*),sep(*),work(ldwork,*) + real(sp), intent(in) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + end subroutine strsna +#else + module procedure stdlib_strsna +#endif +#:if WITH_QP + module procedure stdlib_wtrsna +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, & + mm, m, work, ldwork, rwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: howmny,job + integer(ilp), intent(out) :: info,m + integer(ilp), intent(in) :: ldt,ldvl,ldvr,ldwork,mm,n + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*),s(*),sep(*) + complex(dp), intent(in) :: t(ldt,*),vl(ldvl,*),vr(ldvr,*) + complex(dp), intent(out) :: work(ldwork,*) + end subroutine ztrsna +#else + module procedure stdlib_ztrsna +#endif + end interface trsna + + + + !> TRSYL: solves the complex Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**H, and A and B are both upper triangular. A is + !> M-by-M and B is N-by-N; the right hand side C and the solution X are + !> M-by-N; and scale is an output scale factor, set <= 1 to avoid + !> overflow in X. + interface trsyl +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ctrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trana,tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn,lda,ldb,ldc,m,n + real(sp), intent(out) :: scale + complex(sp), intent(in) :: a(lda,*),b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + end subroutine ctrsyl +#else + module procedure stdlib_ctrsyl +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine dtrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trana,tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn,lda,ldb,ldc,m,n + real(dp), intent(out) :: scale + real(dp), intent(in) :: a(lda,*),b(ldb,*) + real(dp), intent(inout) :: c(ldc,*) + end subroutine dtrsyl +#else + module procedure stdlib_dtrsyl +#endif +#:if WITH_QP + module procedure stdlib_qtrsyl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine strsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trana,tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn,lda,ldb,ldc,m,n + real(sp), intent(out) :: scale + real(sp), intent(in) :: a(lda,*),b(ldb,*) + real(sp), intent(inout) :: c(ldc,*) + end subroutine strsyl +#else + module procedure stdlib_strsyl +#endif +#:if WITH_QP + module procedure stdlib_wtrsyl +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine ztrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: trana,tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn,lda,ldb,ldc,m,n + real(dp), intent(out) :: scale + complex(dp), intent(in) :: a(lda,*),b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + end subroutine ztrsyl +#else + module procedure stdlib_ztrsyl +#endif + end interface trsyl + + + + !> TRTRI: computes the inverse of a complex upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + interface trtri +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrtri( uplo, diag, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(sp), intent(inout) :: a(lda,*) + end subroutine ctrtri +#else + module procedure stdlib_ctrtri +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrtri( uplo, diag, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(dp), intent(inout) :: a(lda,*) + end subroutine dtrtri +#else + module procedure stdlib_dtrtri +#endif +#:if WITH_QP + module procedure stdlib_qtrtri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strtri( uplo, diag, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + real(sp), intent(inout) :: a(lda,*) + end subroutine strtri +#else + module procedure stdlib_strtri +#endif +#:if WITH_QP + module procedure stdlib_wtrtri +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrtri( uplo, diag, n, a, lda, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,n + complex(dp), intent(inout) :: a(lda,*) + end subroutine ztrtri +#else + module procedure stdlib_ztrtri +#endif + end interface trtri + + + + !> TRTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + interface trtrs +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + end subroutine ctrtrs +#else + module procedure stdlib_ctrtrs +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + end subroutine dtrtrs +#else + module procedure stdlib_dtrtrs +#endif +#:if WITH_QP + module procedure stdlib_qtrtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + end subroutine strtrs +#else + module procedure stdlib_strtrs +#endif +#:if WITH_QP + module procedure stdlib_wtrtrs +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: diag,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldb,n,nrhs + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + end subroutine ztrtrs +#else + module procedure stdlib_ztrtrs +#endif + end interface trtrs + + + + !> TRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + interface trttf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrttf( transr, uplo, n, a, lda, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(sp), intent(in) :: a(0:lda-1,0:*) + complex(sp), intent(out) :: arf(0:*) + end subroutine ctrttf +#else + module procedure stdlib_ctrttf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrttf( transr, uplo, n, a, lda, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(dp), intent(in) :: a(0:lda-1,0:*) + real(dp), intent(out) :: arf(0:*) + end subroutine dtrttf +#else + module procedure stdlib_dtrttf +#endif +#:if WITH_QP + module procedure stdlib_qtrttf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strttf( transr, uplo, n, a, lda, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(sp), intent(in) :: a(0:lda-1,0:*) + real(sp), intent(out) :: arf(0:*) + end subroutine strttf +#else + module procedure stdlib_strttf +#endif +#:if WITH_QP + module procedure stdlib_wtrttf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrttf( transr, uplo, n, a, lda, arf, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: transr,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(dp), intent(in) :: a(0:lda-1,0:*) + complex(dp), intent(out) :: arf(0:*) + end subroutine ztrttf +#else + module procedure stdlib_ztrttf +#endif + end interface trttf + + + + !> TRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + interface trttp +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctrttp( uplo, n, a, lda, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: ap(*) + end subroutine ctrttp +#else + module procedure stdlib_ctrttp +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtrttp( uplo, n, a, lda, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: ap(*) + end subroutine dtrttp +#else + module procedure stdlib_dtrttp +#endif +#:if WITH_QP + module procedure stdlib_qtrttp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine strttp( uplo, n, a, lda, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: ap(*) + end subroutine strttp +#else + module procedure stdlib_strttp +#endif +#:if WITH_QP + module procedure stdlib_wtrttp +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztrttp( uplo, n, a, lda, ap, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n,lda + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: ap(*) + end subroutine ztrttp +#else + module procedure stdlib_ztrttp +#endif + end interface trttp + + + + !> TZRZF: reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A + !> to upper triangular form by means of unitary transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N unitary matrix and R is an M-by-M upper + !> triangular matrix. + interface tzrzf +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ctzrzf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*),work(*) + end subroutine ctzrzf +#else + module procedure stdlib_ctzrzf +#endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine dtzrzf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*),work(*) + end subroutine dtzrzf +#else + module procedure stdlib_dtzrzf +#endif +#:if WITH_QP + module procedure stdlib_qtzrzf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine stzrzf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*),work(*) + end subroutine stzrzf +#else + module procedure stdlib_stzrzf +#endif +#:if WITH_QP + module procedure stdlib_wtzrzf +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine ztzrzf( m, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*),work(*) + end subroutine ztzrzf +#else + module procedure stdlib_ztzrzf +#endif + end interface tzrzf + + + + !> UNBDB: simultaneously bidiagonalizes the blocks of an M-by-M + !> partitioned unitary matrix X: + !> [ B11 | B12 0 0 ] + !> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H + !> X = [-----------] = [---------] [----------------] [---------] . + !> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] + !> [ 0 | 0 0 I ] + !> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is + !> not the case, then X must be transposed and/or permuted. This can be + !> done in constant time using the TRANS and SIGNS options. See CUNCSD + !> for details.) + !> The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- + !> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are + !> represented implicitly by Householder vectors. + !> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + interface unbdb +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: signs,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11,ldx12,ldx21,ldx22,lwork,m,p,q + real(sp), intent(out) :: phi(*),theta(*) + complex(sp), intent(out) :: taup1(*),taup2(*),tauq1(*),tauq2(*),work(*) + + complex(sp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(& + ldx22,*) + end subroutine cunbdb +#else + module procedure stdlib_cunbdb +#endif +#:if WITH_QP + module procedure stdlib_wunbdb +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: signs,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11,ldx12,ldx21,ldx22,lwork,m,p,q + real(dp), intent(out) :: phi(*),theta(*) + complex(dp), intent(out) :: taup1(*),taup2(*),tauq1(*),tauq2(*),work(*) + + complex(dp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(& + ldx22,*) + end subroutine zunbdb +#else + module procedure stdlib_zunbdb +#endif + end interface unbdb + + + + !> UNBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines CUNBDB2, CUNBDB3, and CUNBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + interface unbdb1 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*) + complex(sp), intent(out) :: taup1(*),taup2(*),tauq1(*),work(*) + complex(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine cunbdb1 +#else + module procedure stdlib_cunbdb1 +#endif +#:if WITH_QP + module procedure stdlib_wunbdb1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*) + complex(dp), intent(out) :: taup1(*),taup2(*),tauq1(*),work(*) + complex(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine zunbdb1 +#else + module procedure stdlib_zunbdb1 +#endif + end interface unbdb1 + + + + !> UNBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines CUNBDB1, CUNBDB3, and CUNBDB4 handle cases in + !> which P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + interface unbdb2 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*) + complex(sp), intent(out) :: taup1(*),taup2(*),tauq1(*),work(*) + complex(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine cunbdb2 +#else + module procedure stdlib_cunbdb2 +#endif +#:if WITH_QP + module procedure stdlib_wunbdb2 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*) + complex(dp), intent(out) :: taup1(*),taup2(*),tauq1(*),work(*) + complex(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine zunbdb2 +#else + module procedure stdlib_zunbdb2 +#endif + end interface unbdb2 + + + + !> UNBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines CUNBDB1, CUNBDB2, and CUNBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + interface unbdb3 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*) + complex(sp), intent(out) :: taup1(*),taup2(*),tauq1(*),work(*) + complex(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine cunbdb3 +#else + module procedure stdlib_cunbdb3 +#endif +#:if WITH_QP + module procedure stdlib_wunbdb3 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*) + complex(dp), intent(out) :: taup1(*),taup2(*),tauq1(*),work(*) + complex(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine zunbdb3 +#else + module procedure stdlib_zunbdb3 +#endif + end interface unbdb3 + + + + !> UNBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines CUNBDB1, CUNBDB2, and CUNBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + interface unbdb4 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, phantom, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(sp), intent(out) :: phi(*),theta(*) + complex(sp), intent(out) :: phantom(*),taup1(*),taup2(*),tauq1(*),work(*) + + complex(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine cunbdb4 +#else + module procedure stdlib_cunbdb4 +#endif +#:if WITH_QP + module procedure stdlib_wunbdb4 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, & + tauq1, phantom, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork,m,p,q,ldx11,ldx21 + real(dp), intent(out) :: phi(*),theta(*) + complex(dp), intent(out) :: phantom(*),taup1(*),taup2(*),tauq1(*),work(*) + + complex(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine zunbdb4 +#else + module procedure stdlib_zunbdb4 +#endif + end interface unbdb4 + + + + !> UNBDB5: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then some other vector from the orthogonal complement + !> is returned. This vector is chosen in an arbitrary but deterministic + !> way. + interface unbdb5 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x1(*),x2(*) + end subroutine cunbdb5 +#else + module procedure stdlib_cunbdb5 +#endif +#:if WITH_QP + module procedure stdlib_wunbdb5 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x1(*),x2(*) + end subroutine zunbdb5 +#else + module procedure stdlib_zunbdb5 +#endif + end interface unbdb5 + + + + !> UNBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + interface unbdb6 +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + complex(sp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x1(*),x2(*) + end subroutine cunbdb6 +#else + module procedure stdlib_cunbdb6 +#endif +#:if WITH_QP + module procedure stdlib_wunbdb6 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: incx1,incx2,ldq1,ldq2,lwork,m1,m2,n + integer(ilp), intent(out) :: info + complex(dp), intent(in) :: q1(ldq1,*),q2(ldq2,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x1(*),x2(*) + end subroutine zunbdb6 +#else + module procedure stdlib_zunbdb6 +#endif + end interface unbdb6 + + + + !> UNCSD: computes the CS decomposition of an M-by-M partitioned + !> unitary matrix X: + !> [ I 0 0 | 0 0 0 ] + !> [ 0 C 0 | 0 -S 0 ] + !> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H + !> X = [-----------] = [---------] [---------------------] [---------] . + !> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] + !> [ 0 S 0 | 0 C 0 ] + !> [ 0 0 I | 0 0 0 ] + !> X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, + !> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are + !> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in + !> which R = MIN(P,M-P,Q,M-Q). + interface uncsd +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine cuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, & + x11, ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, & + ldv1t, v2t,ldv2t, work, lwork, rwork, lrwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,signs,trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,ldx11,ldx12,ldx21,ldx22,& + lrwork,lwork,m,p,q + real(sp), intent(out) :: theta(*),rwork(*) + complex(sp), intent(out) :: u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),v2t(ldv2t,*),& + work(*) + complex(sp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(& + ldx22,*) + end subroutine cuncsd +#else + module procedure stdlib_cuncsd +#endif +#:if WITH_QP + module procedure stdlib_wuncsd +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + recursive subroutine zuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, & + x11, ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, & + ldv1t, v2t,ldv2t, work, lwork, rwork, lrwork,iwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t,jobv2t,signs,trans + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,ldv2t,ldx11,ldx12,ldx21,ldx22,& + lrwork,lwork,m,p,q + real(dp), intent(out) :: theta(*),rwork(*) + complex(dp), intent(out) :: u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),v2t(ldv2t,*),& + work(*) + complex(dp), intent(inout) :: x11(ldx11,*),x12(ldx12,*),x21(ldx21,*),x22(& + ldx22,*) + end subroutine zuncsd +#else + module procedure stdlib_zuncsd +#endif + end interface uncsd + + + + !> UNCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + interface uncsd2by1 +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine cuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta,& + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,lwork,ldx11,ldx21,m,p,q,& + lrwork + real(sp), intent(out) :: rwork(*),theta(*) + complex(sp), intent(out) :: u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),work(*) + complex(sp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine cuncsd2by1 +#else + module procedure stdlib_cuncsd2by1 +#endif +#:if WITH_QP + module procedure stdlib_wuncsd2by1 +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + subroutine zuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta,& + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: jobu1,jobu2,jobv1t + integer(ilp), intent(out) :: info,iwork(*) + integer(ilp), intent(in) :: ldu1,ldu2,ldv1t,lwork,ldx11,ldx21,m,p,q,& + lrwork + real(dp), intent(out) :: rwork(*),theta(*) + complex(dp), intent(out) :: u1(ldu1,*),u2(ldu2,*),v1t(ldv1t,*),work(*) + complex(dp), intent(inout) :: x11(ldx11,*),x21(ldx21,*) + end subroutine zuncsd2by1 +#else + module procedure stdlib_zuncsd2by1 +#endif + end interface uncsd2by1 + + + + !> UNG2L: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. + interface ung2l +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cung2l( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cung2l +#else + module procedure stdlib_cung2l +#endif +#:if WITH_QP + module procedure stdlib_wung2l +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zung2l( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zung2l +#else + module procedure stdlib_zung2l +#endif + end interface ung2l + + + + !> UNG2R: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. + interface ung2r +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cung2r( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cung2r +#else + module procedure stdlib_cung2r +#endif +#:if WITH_QP + module procedure stdlib_wung2r +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zung2r( m, n, k, a, lda, tau, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zung2r +#else + module procedure stdlib_zung2r +#endif + end interface ung2r + + + + !> UNGBR: generates one of the complex unitary matrices Q or P**H + !> determined by CGEBRD when reducing a complex matrix A to bidiagonal + !> form: A = Q * B * P**H. Q and P**H are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and UNGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and UNGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + !> is of order N: + !> if k < n, P**H = G(k) . . . G(2) G(1) and UNGBR returns the first m + !> rows of P**H, where n >= m >= k; + !> if k >= n, P**H = G(n-1) . . . G(2) G(1) and UNGBR returns P**H as + !> an N-by-N matrix. + interface ungbr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cungbr +#else + module procedure stdlib_cungbr +#endif +#:if WITH_QP + module procedure stdlib_wungbr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zungbr +#else + module procedure stdlib_zungbr +#endif + end interface ungbr + + + + !> UNGHR: generates a complex unitary matrix Q which is defined as the + !> product of IHI-ILO elementary reflectors of order N, as returned by + !> CGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + interface unghr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunghr +#else + module procedure stdlib_cunghr +#endif +#:if WITH_QP + module procedure stdlib_wunghr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(in) :: ihi,ilo,lda,lwork,n + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunghr +#else + module procedure stdlib_zunghr +#endif + end interface unghr + + + + !> UNGLQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by CGELQF. + interface unglq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunglq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunglq +#else + module procedure stdlib_cunglq +#endif +#:if WITH_QP + module procedure stdlib_wunglq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunglq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunglq +#else + module procedure stdlib_zunglq +#endif + end interface unglq + + + + !> UNGQL: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. + interface ungql +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungql( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cungql +#else + module procedure stdlib_cungql +#endif +#:if WITH_QP + module procedure stdlib_wungql +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungql( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zungql +#else + module procedure stdlib_zungql +#endif + end interface ungql + + + + !> UNGQR: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. + interface ungqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungqr( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cungqr +#else + module procedure stdlib_cungqr +#endif +#:if WITH_QP + module procedure stdlib_wungqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungqr( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zungqr +#else + module procedure stdlib_zungqr +#endif + end interface ungqr + + + + !> UNGRQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by CGERQF. + interface ungrq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungrq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cungrq +#else + module procedure stdlib_cungrq +#endif +#:if WITH_QP + module procedure stdlib_wungrq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungrq( m, n, k, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,lwork,m,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zungrq +#else + module procedure stdlib_zungrq +#endif + end interface ungrq + + + + !> UNGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> CHETRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + interface ungtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungtr( uplo, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cungtr +#else + module procedure stdlib_cungtr +#endif +#:if WITH_QP + module procedure stdlib_wungtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungtr( uplo, n, a, lda, tau, work, lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,lwork,n + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zungtr +#else + module procedure stdlib_zungtr +#endif + end interface ungtr + + + + !> UNGTSQR: generates an M-by-N complex matrix Q_out with orthonormal + !> columns, which are the first N columns of a product of comlpex unitary + !> matrices of order M which are returned by CLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for CLATSQR. + interface ungtsqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: t(ldt,*) + complex(sp), intent(out) :: work(*) + end subroutine cungtsqr +#else + module procedure stdlib_cungtsqr +#endif +#:if WITH_QP + module procedure stdlib_wungtsqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: t(ldt,*) + complex(dp), intent(out) :: work(*) + end subroutine zungtsqr +#else + module procedure stdlib_zungtsqr +#endif + end interface ungtsqr + + + + !> UNGTSQR_ROW: generates an M-by-N complex matrix Q_out with + !> orthonormal columns from the output of CLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by CLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of CLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine CLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which CLATSQR generates the output blocks. + interface ungtsqr_row +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: t(ldt,*) + complex(sp), intent(out) :: work(*) + end subroutine cungtsqr_row +#else + module procedure stdlib_cungtsqr_row +#endif +#:if WITH_QP + module procedure stdlib_wungtsqr_row +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,lwork,m,n,mb,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: t(ldt,*) + complex(dp), intent(out) :: work(*) + end subroutine zungtsqr_row +#else + module procedure stdlib_zungtsqr_row +#endif + end interface ungtsqr_row + + + + !> UNHR_COL: takes an M-by-N complex matrix Q_in with orthonormal columns + !> as input, stored in A, and performs Householder Reconstruction (HR), + !> i.e. reconstructs Householder vectors V(i) implicitly representing + !> another M-by-N matrix Q_out, with the property that Q_in = Q_out*S, + !> where S is an N-by-N diagonal matrix with diagonal entries + !> equal to +1 or -1. The Householder vectors (columns V(i) of V) are + !> stored in A on output, and the diagonal entries of S are stored in D. + !> Block reflectors are also returned in T + !> (same output format as CGEQRT). + interface unhr_col +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunhr_col( m, n, nb, a, lda, t, ldt, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: d(*),t(ldt,*) + end subroutine cunhr_col +#else + module procedure stdlib_cunhr_col +#endif +#:if WITH_QP + module procedure stdlib_wunhr_col +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunhr_col( m, n, nb, a, lda, t, ldt, d, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldt,m,n,nb + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: d(*),t(ldt,*) + end subroutine zunhr_col +#else + module procedure stdlib_zunhr_col +#endif + end interface unhr_col + + + + !> UNM2L: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + interface unm2l +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunm2l +#else + module procedure stdlib_cunm2l +#endif +#:if WITH_QP + module procedure stdlib_wunm2l +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunm2l +#else + module procedure stdlib_zunm2l +#endif + end interface unm2l + + + + !> UNM2R: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + interface unm2r +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunm2r +#else + module procedure stdlib_cunm2r +#endif +#:if WITH_QP + module procedure stdlib_wunm2r +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunm2r +#else + module procedure stdlib_zunm2r +#endif + end interface unm2r + + + + !> If VECT = 'Q', UNMBR: overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> If VECT = 'P', UNMBR overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'C': P**H * C C * P**H + !> Here Q and P**H are the unitary matrices determined by CGEBRD when + !> reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + !> and P**H are defined as products of elementary reflectors H(i) and + !> G(i) respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the unitary matrix Q or P**H that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + interface unmbr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmbr +#else + module procedure stdlib_cunmbr +#endif +#:if WITH_QP + module procedure stdlib_wunmbr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmbr +#else + module procedure stdlib_zunmbr +#endif + end interface unmbr + + + + !> UNMHR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> IHI-ILO elementary reflectors, as returned by CGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + interface unmhr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(in) :: ihi,ilo,lda,ldc,lwork,m,n + integer(ilp), intent(out) :: info + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmhr +#else + module procedure stdlib_cunmhr +#endif +#:if WITH_QP + module procedure stdlib_wunmhr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, & + lwork, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(in) :: ihi,ilo,lda,ldc,lwork,m,n + integer(ilp), intent(out) :: info + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmhr +#else + module procedure stdlib_zunmhr +#endif + end interface unmhr + + + + !> UNMLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface unmlq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmlq +#else + module procedure stdlib_cunmlq +#endif +#:if WITH_QP + module procedure stdlib_wunmlq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmlq +#else + module procedure stdlib_zunmlq +#endif + end interface unmlq + + + + !> UNMQL: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface unmql +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmql +#else + module procedure stdlib_cunmql +#endif +#:if WITH_QP + module procedure stdlib_wunmql +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmql +#else + module procedure stdlib_zunmql +#endif + end interface unmql + + + + !> UNMQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface unmqr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmqr +#else + module procedure stdlib_cunmqr +#endif +#:if WITH_QP + module procedure stdlib_wunmqr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmqr +#else + module procedure stdlib_zunmqr +#endif + end interface unmqr + + + + !> UNMRQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by CGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface unmrq +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmrq +#else + module procedure stdlib_cunmrq +#endif +#:if WITH_QP + module procedure stdlib_wunmrq +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmrq +#else + module procedure stdlib_zunmrq +#endif + end interface unmrq + + + + !> UNMRZ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CTZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + interface unmrz +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,l,lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmrz +#else + module procedure stdlib_cunmrz +#endif +#:if WITH_QP + module procedure stdlib_wunmrz +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k,l,lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmrz +#else + module procedure stdlib_zunmrz +#endif + end interface unmrz + + + + !> UNMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by CHETRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + interface unmtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cunmtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldc,lwork,m,n + complex(sp), intent(inout) :: a(lda,*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cunmtr +#else + module procedure stdlib_cunmtr +#endif +#:if WITH_QP + module procedure stdlib_wunmtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zunmtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda,ldc,lwork,m,n + complex(dp), intent(inout) :: a(lda,*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zunmtr +#else + module procedure stdlib_zunmtr +#endif + end interface unmtr + + + + !> UPGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> CHPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + interface upgtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cupgtr( uplo, n, ap, tau, q, ldq, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq,n + complex(sp), intent(in) :: ap(*),tau(*) + complex(sp), intent(out) :: q(ldq,*),work(*) + end subroutine cupgtr +#else + module procedure stdlib_cupgtr +#endif +#:if WITH_QP + module procedure stdlib_wupgtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zupgtr( uplo, n, ap, tau, q, ldq, work, info ) + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq,n + complex(dp), intent(in) :: ap(*),tau(*) + complex(dp), intent(out) :: q(ldq,*),work(*) + end subroutine zupgtr +#else + module procedure stdlib_zupgtr +#endif + end interface upgtr + + + + !> UPMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by CHPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + interface upmtr +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine cupmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,m,n + complex(sp), intent(inout) :: ap(*),c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + end subroutine cupmtr +#else + module procedure stdlib_cupmtr +#endif +#:if WITH_QP + module procedure stdlib_wupmtr +#:endif +#ifdef STDLIB_EXTERNAL_LAPACK + pure subroutine zupmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + + import sp,dp,qp,ilp,lk + implicit none(type,external) + character, intent(in) :: side,trans,uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc,m,n + complex(dp), intent(inout) :: ap(*),c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + end subroutine zupmtr +#else + module procedure stdlib_zupmtr +#endif + end interface upmtr + + + + + +end module stdlib_linalg_lapack diff --git a/src/stdlib_linalg_lapack_aux.fypp b/src/stdlib_linalg_lapack_aux.fypp new file mode 100644 index 000000000..f01fb9df8 --- /dev/null +++ b/src/stdlib_linalg_lapack_aux.fypp @@ -0,0 +1,1961 @@ +#:include "common.fypp" +module stdlib_linalg_lapack_aux + use stdlib_linalg_constants + use stdlib_linalg_blas + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_chla_transtype + public :: stdlib_droundup_lwork + public :: stdlib_icmax1 + public :: stdlib_ieeeck + public :: stdlib_ilaclc + public :: stdlib_ilaclr + public :: stdlib_iladiag + public :: stdlib_iladlc + public :: stdlib_iladlr + public :: stdlib_ilaenv + public :: stdlib_ilaenv2stage + public :: stdlib_ilaprec + public :: stdlib_ilaslc + public :: stdlib_ilaslr + public :: stdlib_ilatrans + public :: stdlib_ilauplo + public :: stdlib_ilazlc + public :: stdlib_ilazlr + public :: stdlib_iparam2stage + public :: stdlib_iparmq + public :: stdlib_izmax1 + public :: stdlib_lsamen + public :: stdlib_sroundup_lwork + public :: stdlib_xerbla + public :: stdlib_xerbla_array +#:if WITH_QP + public :: stdlib_qroundup_lwork +#:endif +#:if WITH_QP + public :: stdlib_ilaqiag +#:endif +#:if WITH_QP + public :: stdlib_ilaqlc +#:endif +#:if WITH_QP + public :: stdlib_ilaqlr +#:endif +#:if WITH_QP + public :: stdlib_ilawlc +#:endif +#:if WITH_QP + public :: stdlib_ilawlr +#:endif +#:if WITH_QP + public :: stdlib_iwmax1 +#:endif + public :: stdlib_selctg_s + public :: stdlib_select_s + public :: stdlib_selctg_d + public :: stdlib_select_d +#:if WITH_QP + public :: stdlib_selctg_q + public :: stdlib_select_q +#:endif + public :: stdlib_selctg_c + public :: stdlib_select_c + public :: stdlib_selctg_z + public :: stdlib_select_z +#:if WITH_QP + public :: stdlib_selctg_w + public :: stdlib_select_w +#:endif + ! SELCTG is a LOGICAL FUNCTION of three DOUBLE PRECISION arguments + ! used to select eigenvalues to sort to the top left of the Schur form. + ! An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if SELCTG is true, i.e., + abstract interface + pure logical(lk) function stdlib_selctg_s(alphar,alphai,beta) + import sp,dp,qp,lk + implicit none + real(sp), intent(in) :: alphar,alphai,beta + end function stdlib_selctg_s + pure logical(lk) function stdlib_select_s(alphar,alphai) + import sp,dp,qp,lk + implicit none + real(sp), intent(in) :: alphar,alphai + end function stdlib_select_s + pure logical(lk) function stdlib_selctg_d(alphar,alphai,beta) + import sp,dp,qp,lk + implicit none + real(dp), intent(in) :: alphar,alphai,beta + end function stdlib_selctg_d + pure logical(lk) function stdlib_select_d(alphar,alphai) + import sp,dp,qp,lk + implicit none + real(dp), intent(in) :: alphar,alphai + end function stdlib_select_d +#:if WITH_QP + pure logical(lk) function stdlib_selctg_q(alphar,alphai,beta) + import sp,dp,qp,lk + implicit none + real(qp), intent(in) :: alphar,alphai,beta + end function stdlib_selctg_q + pure logical(lk) function stdlib_select_q(alphar,alphai) + import sp,dp,qp,lk + implicit none + real(qp), intent(in) :: alphar,alphai + end function stdlib_select_q +#:endif + pure logical(lk) function stdlib_selctg_c(alpha,beta) + import sp,dp,qp,lk + implicit none + complex(sp), intent(in) :: alpha,beta + end function stdlib_selctg_c + pure logical(lk) function stdlib_select_c(alpha) + import sp,dp,qp,lk + implicit none + complex(sp), intent(in) :: alpha + end function stdlib_select_c + pure logical(lk) function stdlib_selctg_z(alpha,beta) + import sp,dp,qp,lk + implicit none + complex(dp), intent(in) :: alpha,beta + end function stdlib_selctg_z + pure logical(lk) function stdlib_select_z(alpha) + import sp,dp,qp,lk + implicit none + complex(dp), intent(in) :: alpha + end function stdlib_select_z +#:if WITH_QP + pure logical(lk) function stdlib_selctg_w(alpha,beta) + import sp,dp,qp,lk + implicit none + complex(qp), intent(in) :: alpha,beta + end function stdlib_selctg_w + pure logical(lk) function stdlib_select_w(alpha) + import sp,dp,qp,lk + implicit none + complex(qp), intent(in) :: alpha + end function stdlib_select_w +#:endif + end interface + + + + contains + + !> This subroutine translates from a BLAST-specified integer constant to + !> the character string specifying a transposition operation. + !> CHLA_TRANSTYPE: returns an CHARACTER*1. If CHLA_TRANSTYPE: is 'X', + !> then input is not an integer indicating a transposition operator. + !> Otherwise CHLA_TRANSTYPE returns the constant value corresponding to + !> TRANS. + + pure character function stdlib_chla_transtype( trans ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: trans + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blas_no_trans = 111 + integer(ilp), parameter :: blas_trans = 112 + integer(ilp), parameter :: blas_conj_trans = 113 + + ! Executable Statements + if( trans==blas_no_trans ) then + stdlib_chla_transtype = 'N' + else if( trans==blas_trans ) then + stdlib_chla_transtype = 'T' + else if( trans==blas_conj_trans ) then + stdlib_chla_transtype = 'C' + else + stdlib_chla_transtype = 'X' + end if + return + end function stdlib_chla_transtype + + !> DROUNDUP_LWORK: deals with a subtle bug with returning LWORK as a Float. + !> This routine guarantees it is rounded up instead of down by + !> multiplying LWORK by 1+eps when it is necessary, where eps is the relative machine precision. + !> E.g., + !> float( 9007199254740993 ) == 9007199254740992 + !> float( 9007199254740993 ) * (1.+eps) == 9007199254740994 + !> \return DROUNDUP_LWORK + !> + !> DROUNDUP_LWORK >= LWORK. + !> DROUNDUP_LWORK is guaranteed to have zero decimal part. + + pure real(dp) function stdlib_droundup_lwork( lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lwork + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: epsilon,real,int + ! Executable Statements + stdlib_droundup_lwork = real( lwork,KIND=dp) + if( int( stdlib_droundup_lwork,KIND=ilp) < lwork ) then + ! force round up of lwork + stdlib_droundup_lwork = stdlib_droundup_lwork * ( 1.0e+0_dp + epsilon(0.0e+0_dp) ) + + endif + return + end function stdlib_droundup_lwork + + !> ICMAX1: finds the index of the first vector element of maximum absolute value. + !> Based on ICAMAX from Level 1 BLAS. + !> The change is to use the 'genuine' absolute value. + + pure integer(ilp) function stdlib_icmax1( n, cx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(in) :: cx(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: smax + integer(ilp) :: i, ix + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + stdlib_icmax1 = 0 + if (n<1 .or. incx<=0) return + stdlib_icmax1 = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + smax = abs(cx(1)) + do i = 2,n + if (abs(cx(i))>smax) then + stdlib_icmax1 = i + smax = abs(cx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + smax = abs(cx(1)) + ix = ix + incx + do i = 2,n + if (abs(cx(ix))>smax) then + stdlib_icmax1 = i + smax = abs(cx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_icmax1 + + !> IEEECK: is called from the ILAENV to verify that Infinity and + !> possibly NaN arithmetic is safe (i.e. will not trap). + + pure integer(ilp) function stdlib_ieeeck( ispec, zero, one ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ispec + real(sp), intent(in) :: one, zero + ! ===================================================================== + ! Local Scalars + real(sp) :: nan1, nan2, nan3, nan4, nan5, nan6, neginf, negzro, newzro, posinf + ! Executable Statements + stdlib_ieeeck = 1 + posinf = one / zero + if( posinf<=one ) then + stdlib_ieeeck = 0 + return + end if + neginf = -one / zero + if( neginf>=zero ) then + stdlib_ieeeck = 0 + return + end if + negzro = one / ( neginf+one ) + if( negzro/=zero ) then + stdlib_ieeeck = 0 + return + end if + neginf = one / negzro + if( neginf>=zero ) then + stdlib_ieeeck = 0 + return + end if + newzro = negzro + zero + if( newzro/=zero ) then + stdlib_ieeeck = 0 + return + end if + posinf = one / newzro + if( posinf<=one ) then + stdlib_ieeeck = 0 + return + end if + neginf = neginf*posinf + if( neginf>=zero ) then + stdlib_ieeeck = 0 + return + end if + posinf = posinf*posinf + if( posinf<=one ) then + stdlib_ieeeck = 0 + return + end if + ! return if we were only asked to check infinity arithmetic + if( ispec==0 )return + nan1 = posinf + neginf + nan2 = posinf / neginf + nan3 = posinf / posinf + nan4 = posinf*zero + nan5 = neginf*negzro + nan6 = nan5*zero + if( nan1==nan1 ) then + stdlib_ieeeck = 0 + return + end if + if( nan2==nan2 ) then + stdlib_ieeeck = 0 + return + end if + if( nan3==nan3 ) then + stdlib_ieeeck = 0 + return + end if + if( nan4==nan4 ) then + stdlib_ieeeck = 0 + return + end if + if( nan5==nan5 ) then + stdlib_ieeeck = 0 + return + end if + if( nan6==nan6 ) then + stdlib_ieeeck = 0 + return + end if + return + end function stdlib_ieeeck + + !> ILACLC: scans A for its last non-zero column. + + pure integer(ilp) function stdlib_ilaclc( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + complex(sp), parameter :: zero = (0.0e+0,0.0e+0) + + ! Local Scalars + integer(ilp) :: i + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( n==0 ) then + stdlib_ilaclc = n + else if( a(1, n)/=zero .or. a(m, n)/=zero ) then + stdlib_ilaclc = n + else + ! now scan each column from the end, returning with the first non-zero. + do stdlib_ilaclc = n, 1, -1 + do i = 1, m + if( a(i, stdlib_ilaclc)/=zero ) return + end do + end do + end if + return + end function stdlib_ilaclc + + !> ILACLR: scans A for its last non-zero row. + + pure integer(ilp) function stdlib_ilaclr( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + complex(sp), parameter :: zero = (0.0e+0,0.0e+0) + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( m==0 ) then + stdlib_ilaclr = m + else if( a(m, 1)/=zero .or. a(m, n)/=zero ) then + stdlib_ilaclr = m + else + ! scan up each column tracking the last zero row seen. + stdlib_ilaclr = 0 + do j = 1, n + i=m + do while((a(max(i,1),j)==zero).and.(i>=1)) + i=i-1 + enddo + stdlib_ilaclr = max( stdlib_ilaclr, i ) + end do + end if + return + end function stdlib_ilaclr + + !> This subroutine translated from a character string specifying if a + !> matrix has unit diagonal or not to the relevant BLAST-specified + !> integer constant. + !> ILADIAG: returns an INTEGER. If ILADIAG: < 0, then the input is not a + !> character indicating a unit or non-unit diagonal. Otherwise ILADIAG + !> returns the constant value corresponding to DIAG. + + integer(ilp) function stdlib_iladiag( diag ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character :: diag + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blas_non_unit_diag = 131 + integer(ilp), parameter :: blas_unit_diag = 132 + + ! Executable Statements + if( stdlib_lsame( diag, 'N' ) ) then + stdlib_iladiag = blas_non_unit_diag + else if( stdlib_lsame( diag, 'U' ) ) then + stdlib_iladiag = blas_unit_diag + else + stdlib_iladiag = -1 + end if + return + end function stdlib_iladiag + + !> ILADLC: scans A for its last non-zero column. + + pure integer(ilp) function stdlib_iladlc( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: zero = 0.0d+0 + + ! Local Scalars + integer(ilp) :: i + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( n==0 ) then + stdlib_iladlc = n + else if( a(1, n)/=zero .or. a(m, n)/=zero ) then + stdlib_iladlc = n + else + ! now scan each column from the end, returning with the first non-zero. + do stdlib_iladlc = n, 1, -1 + do i = 1, m + if( a(i, stdlib_iladlc)/=zero ) return + end do + end do + end if + return + end function stdlib_iladlc + + !> ILADLR: scans A for its last non-zero row. + + pure integer(ilp) function stdlib_iladlr( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: zero = 0.0d+0 + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( m==0 ) then + stdlib_iladlr = m + else if( a(m, 1)/=zero .or. a(m, n)/=zero ) then + stdlib_iladlr = m + else + ! scan up each column tracking the last zero row seen. + stdlib_iladlr = 0 + do j = 1, n + i=m + do while((a(max(i,1),j)==zero).and.(i>=1)) + i=i-1 + enddo + stdlib_iladlr = max( stdlib_iladlr, i ) + end do + end if + return + end function stdlib_iladlr + + !> This subroutine translated from a character string specifying an + !> intermediate precision to the relevant BLAST-specified integer + !> constant. + !> ILAPREC: returns an INTEGER. If ILAPREC: < 0, then the input is not a + !> character indicating a supported intermediate precision. Otherwise + !> ILAPREC returns the constant value corresponding to PREC. + + integer(ilp) function stdlib_ilaprec( prec ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character :: prec + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blas_prec_single = 211 + integer(ilp), parameter :: blas_prec_double = 212 + integer(ilp), parameter :: blas_prec_indigenous = 213 + integer(ilp), parameter :: blas_prec_extra = 214 + + ! Executable Statements + if( stdlib_lsame( prec, 'S' ) ) then + stdlib_ilaprec = blas_prec_single + else if( stdlib_lsame( prec, 'D' ) ) then + stdlib_ilaprec = blas_prec_double + else if( stdlib_lsame( prec, 'I' ) ) then + stdlib_ilaprec = blas_prec_indigenous + else if( stdlib_lsame( prec, 'X' ) .or. stdlib_lsame( prec, 'E' ) ) then + stdlib_ilaprec = blas_prec_extra + else + stdlib_ilaprec = -1 + end if + return + end function stdlib_ilaprec + + !> ILASLC: scans A for its last non-zero column. + + pure integer(ilp) function stdlib_ilaslc( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: zero = 0.0e+0 + + ! Local Scalars + integer(ilp) :: i + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( n==0 ) then + stdlib_ilaslc = n + else if( a(1, n)/=zero .or. a(m, n)/=zero ) then + stdlib_ilaslc = n + else + ! now scan each column from the end, returning with the first non-zero. + do stdlib_ilaslc = n, 1, -1 + do i = 1, m + if( a(i, stdlib_ilaslc)/=zero ) return + end do + end do + end if + return + end function stdlib_ilaslc + + !> ILASLR: scans A for its last non-zero row. + + pure integer(ilp) function stdlib_ilaslr( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: zero = 0.0e+0 + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( m==0 ) then + stdlib_ilaslr = m + elseif( a(m, 1)/=zero .or. a(m, n)/=zero ) then + stdlib_ilaslr = m + else + ! scan up each column tracking the last zero row seen. + stdlib_ilaslr = 0 + do j = 1, n + i=m + do while((a(max(i,1),j)==zero).and.(i>=1)) + i=i-1 + enddo + stdlib_ilaslr = max( stdlib_ilaslr, i ) + end do + end if + return + end function stdlib_ilaslr + + !> This subroutine translates from a character string specifying a + !> transposition operation to the relevant BLAST-specified integer + !> constant. + !> ILATRANS: returns an INTEGER. If ILATRANS: < 0, then the input is not + !> a character indicating a transposition operator. Otherwise ILATRANS + !> returns the constant value corresponding to TRANS. + + integer(ilp) function stdlib_ilatrans( trans ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character :: trans + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blas_no_trans = 111 + integer(ilp), parameter :: blas_trans = 112 + integer(ilp), parameter :: blas_conj_trans = 113 + + ! Executable Statements + if( stdlib_lsame( trans, 'N' ) ) then + stdlib_ilatrans = blas_no_trans + else if( stdlib_lsame( trans, 'T' ) ) then + stdlib_ilatrans = blas_trans + else if( stdlib_lsame( trans, 'C' ) ) then + stdlib_ilatrans = blas_conj_trans + else + stdlib_ilatrans = -1 + end if + return + end function stdlib_ilatrans + + !> This subroutine translated from a character string specifying a + !> upper- or lower-triangular matrix to the relevant BLAST-specified + !> integer constant. + !> ILAUPLO: returns an INTEGER. If ILAUPLO: < 0, then the input is not + !> a character indicating an upper- or lower-triangular matrix. + !> Otherwise ILAUPLO returns the constant value corresponding to UPLO. + + integer(ilp) function stdlib_ilauplo( uplo ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character :: uplo + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blas_upper = 121 + integer(ilp), parameter :: blas_lower = 122 + + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + stdlib_ilauplo = blas_upper + else if( stdlib_lsame( uplo, 'L' ) ) then + stdlib_ilauplo = blas_lower + else + stdlib_ilauplo = -1 + end if + return + end function stdlib_ilauplo + + !> ILAZLC: scans A for its last non-zero column. + + pure integer(ilp) function stdlib_ilazlc( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + complex(dp), parameter :: zero = (0.0d+0,0.0d+0) + + ! Local Scalars + integer(ilp) :: i + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( n==0 ) then + stdlib_ilazlc = n + else if( a(1, n)/=zero .or. a(m, n)/=zero ) then + stdlib_ilazlc = n + else + ! now scan each column from the end, returning with the first non-zero. + do stdlib_ilazlc = n, 1, -1 + do i = 1, m + if( a(i, stdlib_ilazlc)/=zero ) return + end do + end do + end if + return + end function stdlib_ilazlc + + !> ILAZLR: scans A for its last non-zero row. + + pure integer(ilp) function stdlib_ilazlr( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + complex(dp), parameter :: zero = (0.0d+0,0.0d+0) + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( m==0 ) then + stdlib_ilazlr = m + else if( a(m, 1)/=zero .or. a(m, n)/=zero ) then + stdlib_ilazlr = m + else + ! scan up each column tracking the last zero row seen. + stdlib_ilazlr = 0 + do j = 1, n + i=m + do while((a(max(i,1),j)==zero).and.(i>=1)) + i=i-1 + enddo + stdlib_ilazlr = max( stdlib_ilazlr, i ) + end do + end if + return + end function stdlib_ilazlr + + !> This program sets problem and machine dependent parameters + !> useful for xHSEQR and related subroutines for eigenvalue + !> problems. It is called whenever + !> IPARMQ: is called with 12 <= ISPEC <= 16 + + pure integer(ilp) function stdlib_iparmq( ispec, name, opts, n, ilo, ihi, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ispec, lwork, n + character, intent(in) :: name*(*), opts*(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: inmin = 12 + integer(ilp), parameter :: inwin = 13 + integer(ilp), parameter :: inibl = 14 + integer(ilp), parameter :: ishfts = 15 + integer(ilp), parameter :: iacc22 = 16 + integer(ilp), parameter :: icost = 17 + integer(ilp), parameter :: nmin = 75 + integer(ilp), parameter :: k22min = 14 + integer(ilp), parameter :: kacmin = 14 + integer(ilp), parameter :: nibble = 14 + integer(ilp), parameter :: knwswp = 500 + integer(ilp), parameter :: rcost = 10 + real(sp), parameter :: two = 2.0 + + + + ! Local Scalars + integer(ilp) :: nh, ns + integer(ilp) :: i, ic, iz + character :: subnam*6 + ! Intrinsic Functions + intrinsic :: log,max,mod,nint,real + ! Executable Statements + if( ( ispec==ishfts ) .or. ( ispec==inwin ) .or.( ispec==iacc22 ) ) then + ! ==== set the number simultaneous shifts ==== + nh = ihi - ilo + 1 + ns = 2 + if( nh>=30 )ns = 4 + if( nh>=60 )ns = 10 + if( nh>=150 )ns = max( 10, nh / nint( log( real( nh,KIND=dp) ) / log( two ),& + KIND=ilp) ) + if( nh>=590 )ns = 64 + if( nh>=3000 )ns = 128 + if( nh>=6000 )ns = 256 + ns = max( 2, ns-mod( ns, 2 ) ) + end if + if( ispec==inmin ) then + ! ===== matrices of order smaller than nmin get sent + ! . to xlahqr, the classic double shift algorithm. + ! . this must be at least 11. ==== + stdlib_iparmq = nmin + else if( ispec==inibl ) then + ! ==== inibl: skip a multi-shift qr iteration and + ! . whenever aggressive early deflation finds + ! . at least (nibble*(window size)/100) deflations. ==== + stdlib_iparmq = nibble + else if( ispec==ishfts ) then + ! ==== nshfts: the number of simultaneous shifts ===== + stdlib_iparmq = ns + else if( ispec==inwin ) then + ! ==== nw: deflation window size. ==== + if( nh<=knwswp ) then + stdlib_iparmq = ns + else + stdlib_iparmq = 3*ns / 2 + end if + else if( ispec==iacc22 ) then + ! ==== iacc22: whether to accumulate reflections + ! . before updating the far-from-diagonal elements + ! . and whether to use 2-by-2 block structure while + ! . doing it. a small amount of work could be saved + ! . by making this choice dependent also upon the + ! . nh=ihi-ilo+1. + ! convert name to upper case if the first character is lower case. + stdlib_iparmq = 0 + subnam = name + ic = ichar( subnam( 1: 1 ) ) + iz = ichar( 'Z' ) + if( iz==90 .or. iz==122 ) then + ! ascii character set + if( ic>=97 .and. ic<=122 ) then + subnam( 1: 1 ) = char( ic-32 ) + do i = 2, 6 + ic = ichar( subnam( i: i ) ) + if( ic>=97 .and. ic<=122 )subnam( i: i ) = char( ic-32 ) + end do + end if + else if( iz==233 .or. iz==169 ) then + ! ebcdic character set + if( ( ic>=129 .and. ic<=137 ) .or.( ic>=145 .and. ic<=153 ) .or.( ic>=162 .and. & + ic<=169 ) ) then + subnam( 1: 1 ) = char( ic+64 ) + do i = 2, 6 + ic = ichar( subnam( i: i ) ) + if( ( ic>=129 .and. ic<=137 ) .or.( ic>=145 .and. ic<=153 ) .or.( ic>=162 & + .and. ic<=169 ) )subnam( i:i ) = char( ic+64 ) + end do + end if + else if( iz==218 .or. iz==250 ) then + ! prime machines: ascii+128 + if( ic>=225 .and. ic<=250 ) then + subnam( 1: 1 ) = char( ic-32 ) + do i = 2, 6 + ic = ichar( subnam( i: i ) ) + if( ic>=225 .and. ic<=250 )subnam( i: i ) = char( ic-32 ) + end do + end if + end if + if( subnam( 2:6 )=='GGHRD' .or.subnam( 2:6 )=='GGHD3' ) then + stdlib_iparmq = 1 + if( nh>=k22min )stdlib_iparmq = 2 + else if ( subnam( 4:6 )=='EXC' ) then + if( nh>=kacmin )stdlib_iparmq = 1 + if( nh>=k22min )stdlib_iparmq = 2 + else if ( subnam( 2:6 )=='HSEQR' .or.subnam( 2:5 )=='LAQR' ) then + if( ns>=kacmin )stdlib_iparmq = 1 + if( ns>=k22min )stdlib_iparmq = 2 + end if + else if( ispec==icost ) then + ! === relative cost of near-the-diagonal chase vs + ! blas updates === + stdlib_iparmq = rcost + else + ! ===== invalid value of ispec ===== + stdlib_iparmq = -1 + end if + end function stdlib_iparmq + + !> IZMAX1: finds the index of the first vector element of maximum absolute value. + !> Based on IZAMAX from Level 1 BLAS. + !> The change is to use the 'genuine' absolute value. + + pure integer(ilp) function stdlib_izmax1( n, zx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(in) :: zx(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: dmax + integer(ilp) :: i, ix + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + stdlib_izmax1 = 0 + if (n<1 .or. incx<=0) return + stdlib_izmax1 = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + dmax = abs(zx(1)) + do i = 2,n + if (abs(zx(i))>dmax) then + stdlib_izmax1 = i + dmax = abs(zx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + dmax = abs(zx(1)) + ix = ix + incx + do i = 2,n + if (abs(zx(ix))>dmax) then + stdlib_izmax1 = i + dmax = abs(zx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_izmax1 + + !> LSAMEN: tests if the first N letters of CA are the same as the + !> first N letters of CB, regardless of case. + !> LSAMEN returns .TRUE. if CA and CB are equivalent except for case + !> and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) + !> or LEN( CB ) is less than N. + + pure logical(lk) function stdlib_lsamen( n, ca, cb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character(len=*), intent(in) :: ca, cb + integer(ilp), intent(in) :: n + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: len + ! Executable Statements + stdlib_lsamen = .false. + if( len( ca ) SROUNDUP_LWORK: deals with a subtle bug with returning LWORK as a Float. + !> This routine guarantees it is rounded up instead of down by + !> multiplying LWORK by 1+eps when it is necessary, where eps is the relative machine precision. + !> E.g., + !> float( 16777217 ) == 16777216 + !> float( 16777217 ) * (1.+eps) == 16777218 + !> \return SROUNDUP_LWORK + !> + !> SROUNDUP_LWORK >= LWORK. + !> SROUNDUP_LWORK is guaranteed to have zero decimal part. + + pure real(sp) function stdlib_sroundup_lwork( lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lwork + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: epsilon,real,int + ! Executable Statements + stdlib_sroundup_lwork = real( lwork,KIND=sp) + if( int( stdlib_sroundup_lwork,KIND=ilp) < lwork ) then + ! force round up of lwork + stdlib_sroundup_lwork = stdlib_sroundup_lwork * ( 1.0e+0_sp + epsilon(0.0e+0_sp) ) + + endif + return + end function stdlib_sroundup_lwork + +#:if WITH_QP + + !> DROUNDUP_LWORK: deals with a subtle bug with returning LWORK as a Float. + !> This routine guarantees it is rounded up instead of down by + !> multiplying LWORK by 1+eps when it is necessary, where eps is the relative machine precision. + !> E.g., + !> float( 9007199254740993 ) == 9007199254740992 + !> float( 9007199254740993 ) * (1.+eps) == 9007199254740994 + !> \return DROUNDUP_LWORK + !> + !> DROUNDUP_LWORK >= LWORK. + !> DROUNDUP_LWORK is guaranteed to have zero decimal part. + + pure real(qp) function stdlib_qroundup_lwork( lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lwork + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: epsilon,real,int + ! Executable Statements + stdlib_qroundup_lwork = real( lwork,KIND=qp) + if( int( stdlib_qroundup_lwork,KIND=ilp) < lwork ) then + ! force round up of lwork + stdlib_qroundup_lwork = stdlib_qroundup_lwork * ( 1.0e+0_qp + epsilon(0.0e+0_qp) ) + + endif + return + end function stdlib_qroundup_lwork +#:endif + +#:if WITH_QP + + !> This subroutine translated from a character string specifying if a + !> matrix has unit diagonal or not to the relevant BLAST-specified + !> integer constant. + !> ILADIAG: returns an INTEGER. If ILADIAG: < 0, then the input is not a + !> character indicating a unit or non-unit diagonal. Otherwise ILADIAG + !> returns the constant value corresponding to DIAG. + + integer(ilp) function stdlib_ilaqiag( diag ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character :: diag + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blas_non_unit_qiag = 131 + integer(ilp), parameter :: blas_unit_qiag = 132 + + ! Executable Statements + if( stdlib_lsame( diag, 'N' ) ) then + stdlib_ilaqiag = blas_non_unit_qiag + else if( stdlib_lsame( diag, 'U' ) ) then + stdlib_ilaqiag = blas_unit_qiag + else + stdlib_ilaqiag = -1 + end if + return + end function stdlib_ilaqiag +#:endif + +#:if WITH_QP + + !> ILADLC: scans A for its last non-zero column. + + pure integer(ilp) function stdlib_ilaqlc( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: zero = 0.0d+0 + + ! Local Scalars + integer(ilp) :: i + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( n==0 ) then + stdlib_ilaqlc = n + else if( a(1, n)/=zero .or. a(m, n)/=zero ) then + stdlib_ilaqlc = n + else + ! now scan each column from the end, returning with the first non-zero. + do stdlib_ilaqlc = n, 1, -1 + do i = 1, m + if( a(i, stdlib_ilaqlc)/=zero ) return + end do + end do + end if + return + end function stdlib_ilaqlc +#:endif + +#:if WITH_QP + + !> ILADLR: scans A for its last non-zero row. + + pure integer(ilp) function stdlib_ilaqlr( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: zero = 0.0d+0 + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( m==0 ) then + stdlib_ilaqlr = m + else if( a(m, 1)/=zero .or. a(m, n)/=zero ) then + stdlib_ilaqlr = m + else + ! scan up each column tracking the last zero row seen. + stdlib_ilaqlr = 0 + do j = 1, n + i=m + do while((a(max(i,1),j)==zero).and.(i>=1)) + i=i-1 + enddo + stdlib_ilaqlr = max( stdlib_ilaqlr, i ) + end do + end if + return + end function stdlib_ilaqlr +#:endif + +#:if WITH_QP + + !> ILAZLC: scans A for its last non-zero column. + + pure integer(ilp) function stdlib_ilawlc( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + complex(qp), parameter :: zero = (0.0d+0,0.0d+0) + + ! Local Scalars + integer(ilp) :: i + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( n==0 ) then + stdlib_ilawlc = n + else if( a(1, n)/=zero .or. a(m, n)/=zero ) then + stdlib_ilawlc = n + else + ! now scan each column from the end, returning with the first non-zero. + do stdlib_ilawlc = n, 1, -1 + do i = 1, m + if( a(i, stdlib_ilawlc)/=zero ) return + end do + end do + end if + return + end function stdlib_ilawlc +#:endif + +#:if WITH_QP + + !> ILAZLR: scans A for its last non-zero row. + + pure integer(ilp) function stdlib_ilawlr( m, n, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: m, n, lda + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Parameters + complex(qp), parameter :: zero = (0.0d+0,0.0d+0) + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick test for the common case where one corner is non-zero. + if( m==0 ) then + stdlib_ilawlr = m + else if( a(m, 1)/=zero .or. a(m, n)/=zero ) then + stdlib_ilawlr = m + else + ! scan up each column tracking the last zero row seen. + stdlib_ilawlr = 0 + do j = 1, n + i=m + do while((a(max(i,1),j)==zero).and.(i>=1)) + i=i-1 + enddo + stdlib_ilawlr = max( stdlib_ilawlr, i ) + end do + end if + return + end function stdlib_ilawlr +#:endif + +#:if WITH_QP + + !> IZMAX1: finds the index of the first vector element of maximum absolute value. + !> Based on IZAMAX from Level 1 BLAS. + !> The change is to use the 'genuine' absolute value. + + pure integer(ilp) function stdlib_iwmax1( n, zx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(in) :: zx(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: dmax + integer(ilp) :: i, ix + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + stdlib_iwmax1 = 0 + if (n<1 .or. incx<=0) return + stdlib_iwmax1 = 1 + if (n==1) return + if (incx==1) then + ! code for increment equal to 1 + dmax = abs(zx(1)) + do i = 2,n + if (abs(zx(i))>dmax) then + stdlib_iwmax1 = i + dmax = abs(zx(i)) + end if + end do + else + ! code for increment not equal to 1 + ix = 1 + dmax = abs(zx(1)) + ix = ix + incx + do i = 2,n + if (abs(zx(ix))>dmax) then + stdlib_iwmax1 = i + dmax = abs(zx(ix)) + end if + ix = ix + incx + end do + end if + return + end function stdlib_iwmax1 +#:endif + + !> ILAENV: is called from the LAPACK routines to choose problem-dependent + !> parameters for the local environment. See ISPEC for a description of + !> the parameters. + !> ILAENV returns an INTEGER + !> if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC + !> if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. + !> This version provides a set of parameters which should give good, + !> but not optimal, performance on many of the currently available + !> computers. Users are encouraged to modify this subroutine to set + !> the tuning parameters for their particular machine using the option + !> and problem size information in the arguments. + !> This routine will not function correctly if it is converted to all + !> lower case. Converting it to all upper case is allowed. + + pure integer(ilp) function stdlib_ilaenv( ispec, name, opts, n1, n2, n3, n4 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character(len=*), intent(in) :: name, opts + integer(ilp), intent(in) :: ispec, n1, n2, n3, n4 + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, iz, nb, nbmin, nx + logical(lk) :: cname, sname, twostage + character :: c1*1, c2*2, c4*2, c3*3, subnam*16 + ! Intrinsic Functions + intrinsic :: char,ichar,int,min,real + ! Executable Statements + go to ( 10, 10, 10, 80, 90, 100, 110, 120,130, 140, 150, 160, 160, 160, 160, 160, 160)& + ispec + ! invalid value for ispec + stdlib_ilaenv = -1 + return + 10 continue + ! convert name to upper case if the first character is lower case. + stdlib_ilaenv = 1 + subnam = name + ic = ichar( subnam( 1: 1 ) ) + iz = ichar( 'Z' ) + if( iz==90 .or. iz==122 ) then + ! ascii character set + if( ic>=97 .and. ic<=122 ) then + subnam( 1: 1 ) = char( ic-32 ) + do i = 2, 6 + ic = ichar( subnam( i: i ) ) + if( ic>=97 .and. ic<=122 )subnam( i: i ) = char( ic-32 ) + end do + end if + else if( iz==233 .or. iz==169 ) then + ! ebcdic character set + if( ( ic>=129 .and. ic<=137 ) .or.( ic>=145 .and. ic<=153 ) .or.( ic>=162 .and. & + ic<=169 ) ) then + subnam( 1: 1 ) = char( ic+64 ) + do i = 2, 6 + ic = ichar( subnam( i: i ) ) + if( ( ic>=129 .and. ic<=137 ) .or.( ic>=145 .and. ic<=153 ) .or.( ic>=162 & + .and. ic<=169 ) )subnam( i:i ) = char( ic+64 ) + end do + end if + else if( iz==218 .or. iz==250 ) then + ! prime machines: ascii+128 + if( ic>=225 .and. ic<=250 ) then + subnam( 1: 1 ) = char( ic-32 ) + do i = 2, 6 + ic = ichar( subnam( i: i ) ) + if( ic>=225 .and. ic<=250 )subnam( i: i ) = char( ic-32 ) + end do + end if + end if + c1 = subnam( 1: 1 ) + sname = c1=='S' .or. c1=='D' + cname = c1=='C' .or. c1=='Z' + if( .not.( cname .or. sname ) )return + c2 = subnam( 2: 3 ) + c3 = subnam( 4: 6 ) + c4 = c3( 2: 3 ) + twostage = len( subnam )>=11.and. subnam( 11: 11 )=='2' + go to ( 50, 60, 70 )ispec + 50 continue + ! ispec = 1: block size + ! in these examples, separate code is provided for setting nb for + ! real and complex. we assume that nb will take the same value in + ! single or double precision. + nb = 1 + if( subnam(2:6)=='LAORH' ) then + ! this is for *laorhr_getrfnp routine + if( sname ) then + nb = 32 + else + nb = 32 + end if + else if( c2=='GE' ) then + if( c3=='TRF' ) then + if( sname ) then + nb = 64 + else + nb = 64 + end if + else if( c3=='QRF' .or. c3=='RQF' .or. c3=='LQF' .or.c3=='QLF' ) then + if( sname ) then + nb = 32 + else + nb = 32 + end if + else if( c3=='QR ') then + if( n3 == 1) then + if( sname ) then + ! m*n + if ((n1*n2<=131072).or.(n1<=8192)) then + nb = n1 + else + nb = 32768/n2 + end if + else + if ((n1*n2<=131072).or.(n1<=8192)) then + nb = n1 + else + nb = 32768/n2 + end if + end if + else + if( sname ) then + nb = 1 + else + nb = 1 + end if + end if + else if( c3=='LQ ') then + if( n3 == 2) then + if( sname ) then + ! m*n + if ((n1*n2<=131072).or.(n1<=8192)) then + nb = n1 + else + nb = 32768/n2 + end if + else + if ((n1*n2<=131072).or.(n1<=8192)) then + nb = n1 + else + nb = 32768/n2 + end if + end if + else + if( sname ) then + nb = 1 + else + nb = 1 + end if + end if + else if( c3=='HRD' ) then + if( sname ) then + nb = 32 + else + nb = 32 + end if + else if( c3=='BRD' ) then + if( sname ) then + nb = 32 + else + nb = 32 + end if + else if( c3=='TRI' ) then + if( sname ) then + nb = 64 + else + nb = 64 + end if + end if + else if( c2=='PO' ) then + if( c3=='TRF' ) then + if( sname ) then + nb = 64 + else + nb = 64 + end if + end if + else if( c2=='SY' ) then + if( c3=='TRF' ) then + if( sname ) then + if( twostage ) then + nb = 192 + else + nb = 64 + end if + else + if( twostage ) then + nb = 192 + else + nb = 64 + end if + end if + else if( sname .and. c3=='TRD' ) then + nb = 32 + else if( sname .and. c3=='GST' ) then + nb = 64 + end if + else if( cname .and. c2=='HE' ) then + if( c3=='TRF' ) then + if( twostage ) then + nb = 192 + else + nb = 64 + end if + else if( c3=='TRD' ) then + nb = 32 + else if( c3=='GST' ) then + nb = 64 + end if + else if( sname .and. c2=='OR' ) then + if( c3( 1: 1 )=='G' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nb = 32 + end if + else if( c3( 1: 1 )=='M' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nb = 32 + end if + end if + else if( cname .and. c2=='UN' ) then + if( c3( 1: 1 )=='G' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nb = 32 + end if + else if( c3( 1: 1 )=='M' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nb = 32 + end if + end if + else if( c2=='GB' ) then + if( c3=='TRF' ) then + if( sname ) then + if( n4<=64 ) then + nb = 1 + else + nb = 32 + end if + else + if( n4<=64 ) then + nb = 1 + else + nb = 32 + end if + end if + end if + else if( c2=='PB' ) then + if( c3=='TRF' ) then + if( sname ) then + if( n2<=64 ) then + nb = 1 + else + nb = 32 + end if + else + if( n2<=64 ) then + nb = 1 + else + nb = 32 + end if + end if + end if + else if( c2=='TR' ) then + if( c3=='TRI' ) then + if( sname ) then + nb = 64 + else + nb = 64 + end if + else if ( c3=='EVC' ) then + if( sname ) then + nb = 64 + else + nb = 64 + end if + end if + else if( c2=='LA' ) then + if( c3=='UUM' ) then + if( sname ) then + nb = 64 + else + nb = 64 + end if + end if + else if( sname .and. c2=='ST' ) then + if( c3=='EBZ' ) then + nb = 1 + end if + else if( c2=='GG' ) then + nb = 32 + if( c3=='HD3' ) then + if( sname ) then + nb = 32 + else + nb = 32 + end if + end if + end if + stdlib_ilaenv = nb + return + 60 continue + ! ispec = 2: minimum block size + nbmin = 2 + if( c2=='GE' ) then + if( c3=='QRF' .or. c3=='RQF' .or. c3=='LQF' .or. c3=='QLF' ) then + if( sname ) then + nbmin = 2 + else + nbmin = 2 + end if + else if( c3=='HRD' ) then + if( sname ) then + nbmin = 2 + else + nbmin = 2 + end if + else if( c3=='BRD' ) then + if( sname ) then + nbmin = 2 + else + nbmin = 2 + end if + else if( c3=='TRI' ) then + if( sname ) then + nbmin = 2 + else + nbmin = 2 + end if + end if + else if( c2=='SY' ) then + if( c3=='TRF' ) then + if( sname ) then + nbmin = 8 + else + nbmin = 8 + end if + else if( sname .and. c3=='TRD' ) then + nbmin = 2 + end if + else if( cname .and. c2=='HE' ) then + if( c3=='TRD' ) then + nbmin = 2 + end if + else if( sname .and. c2=='OR' ) then + if( c3( 1: 1 )=='G' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nbmin = 2 + end if + else if( c3( 1: 1 )=='M' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nbmin = 2 + end if + end if + else if( cname .and. c2=='UN' ) then + if( c3( 1: 1 )=='G' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nbmin = 2 + end if + else if( c3( 1: 1 )=='M' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nbmin = 2 + end if + end if + else if( c2=='GG' ) then + nbmin = 2 + if( c3=='HD3' ) then + nbmin = 2 + end if + end if + stdlib_ilaenv = nbmin + return + 70 continue + ! ispec = 3: crossover point + nx = 0 + if( c2=='GE' ) then + if( c3=='QRF' .or. c3=='RQF' .or. c3=='LQF' .or. c3=='QLF' ) then + if( sname ) then + nx = 128 + else + nx = 128 + end if + else if( c3=='HRD' ) then + if( sname ) then + nx = 128 + else + nx = 128 + end if + else if( c3=='BRD' ) then + if( sname ) then + nx = 128 + else + nx = 128 + end if + end if + else if( c2=='SY' ) then + if( sname .and. c3=='TRD' ) then + nx = 32 + end if + else if( cname .and. c2=='HE' ) then + if( c3=='TRD' ) then + nx = 32 + end if + else if( sname .and. c2=='OR' ) then + if( c3( 1: 1 )=='G' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nx = 128 + end if + end if + else if( cname .and. c2=='UN' ) then + if( c3( 1: 1 )=='G' ) then + if( c4=='QR' .or. c4=='RQ' .or. c4=='LQ' .or. c4=='QL' .or. c4=='HR' .or. & + c4=='TR' .or. c4=='BR' )then + nx = 128 + end if + end if + else if( c2=='GG' ) then + nx = 128 + if( c3=='HD3' ) then + nx = 128 + end if + end if + stdlib_ilaenv = nx + return + 80 continue + ! ispec = 4: number of shifts (used by xhseqr) + stdlib_ilaenv = 6 + return + 90 continue + ! ispec = 5: minimum column dimension (not used) + stdlib_ilaenv = 2 + return + 100 continue + ! ispec = 6: crossover point for svd (used by xgelss and xgesvd) + stdlib_ilaenv = int( real( min( n1, n2 ),KIND=dp)*1.6e0,KIND=ilp) + return + 110 continue + ! ispec = 7: number of processors (not used) + stdlib_ilaenv = 1 + return + 120 continue + ! ispec = 8: crossover point for multishift (used by xhseqr) + stdlib_ilaenv = 50 + return + 130 continue + ! ispec = 9: maximum size of the subproblems at the bottom of the + ! computation tree in the divide-and-conquer algorithm + ! (used by xgelsd and xgesdd) + stdlib_ilaenv = 25 + return + 140 continue + ! ispec = 10: ieee and infinity nan arithmetic can be trusted not to trap + ! stdlib_ilaenv = 0 + stdlib_ilaenv = 1 + if( stdlib_ilaenv==1 ) then + stdlib_ilaenv = stdlib_ieeeck( 1, 0.0, 1.0 ) + end if + return + 150 continue + ! ispec = 11: ieee infinity arithmetic can be trusted not to trap + ! stdlib_ilaenv = 0 + stdlib_ilaenv = 1 + if( stdlib_ilaenv==1 ) then + stdlib_ilaenv = stdlib_ieeeck( 0, 0.0, 1.0 ) + end if + return + 160 continue + ! 12 <= ispec <= 17: xhseqr or related subroutines. + stdlib_ilaenv = stdlib_iparmq( ispec, name, opts, n1, n2, n3, n4 ) + return + end function stdlib_ilaenv + + !> This program sets problem and machine dependent parameters + !> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST, + !> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD + !> and related subroutines for eigenvalue problems. + !> It is called whenever ILAENV is called with 17 <= ISPEC <= 21. + !> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5 + !> with a direct conversion ISPEC + 16. + + pure integer(ilp) function stdlib_iparam2stage( ispec, name, opts,ni, nbi, ibi, nxi ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character(len=*), intent(in) :: name, opts + integer(ilp), intent(in) :: ispec, ni, nbi, ibi, nxi + ! ================================================================ + ! Local Scalars + integer(ilp) :: i, ic, iz, kd, ib, lhous, lwork, nthreads, factoptnb, qroptnb, & + lqoptnb + logical(lk) :: rprec, cprec + character :: prec*1, algo*3, stag*5, subnam*12, vect*1 + ! Intrinsic Functions + intrinsic :: char,ichar,max + ! Executable Statements + ! invalid value for ispec + if( (ispec<17).or.(ispec>21) ) then + stdlib_iparam2stage = -1 + return + endif + ! get the number of threads + nthreads = 1 + !$ nthreads = omp_get_num_threads() + + ! write(*,*) 'iparam voici nthreads ispec ',nthreads, ispec + if( ispec /= 19 ) then + ! convert name to upper case if the first character is lower case. + stdlib_iparam2stage = -1 + subnam = name + ic = ichar( subnam( 1: 1 ) ) + iz = ichar( 'Z' ) + if( iz==90 .or. iz==122 ) then + ! ascii character set + if( ic>=97 .and. ic<=122 ) then + subnam( 1: 1 ) = char( ic-32 ) + do i = 2, 12 + ic = ichar( subnam( i: i ) ) + if( ic>=97 .and. ic<=122 )subnam( i: i ) = char( ic-32 ) + end do + end if + else if( iz==233 .or. iz==169 ) then + ! ebcdic character set + if( ( ic>=129 .and. ic<=137 ) .or.( ic>=145 .and. ic<=153 ) .or.( ic>=162 .and. & + ic<=169 ) ) then + subnam( 1: 1 ) = char( ic+64 ) + do i = 2, 12 + ic = ichar( subnam( i: i ) ) + if( ( ic>=129 .and. ic<=137 ) .or.( ic>=145 .and. ic<=153 ) .or.( ic>=162 & + .and. ic<=169 ) )subnam( i:i ) = char( ic+64 ) + end do + end if + else if( iz==218 .or. iz==250 ) then + ! prime machines: ascii+128 + if( ic>=225 .and. ic<=250 ) then + subnam( 1: 1 ) = char( ic-32 ) + do i = 2, 12 + ic = ichar( subnam( i: i ) ) + if( ic>=225 .and. ic<=250 )subnam( i: i ) = char( ic-32 ) + end do + end if + end if + prec = subnam( 1: 1 ) + algo = subnam( 4: 6 ) + stag = subnam( 8:12 ) + rprec = prec=='S' .or. prec=='D' + cprec = prec=='C' .or. prec=='Z' + ! invalid value for precision + if( .not.( rprec .or. cprec ) ) then + stdlib_iparam2stage = -1 + return + endif + endif + ! write(*,*),'rprec,cprec ',rprec,cprec, + ! $ ' algo ',algo,' stage ',stag + if (( ispec == 17 ) .or. ( ispec == 18 )) then + ! ispec = 17, 18: block size kd, ib + ! could be also dependent from n but for now it + ! depend only on sequential or parallel + if( nthreads>4 ) then + if( cprec ) then + kd = 128 + ib = 32 + else + kd = 160 + ib = 40 + endif + else if( nthreads>1 ) then + if( cprec ) then + kd = 64 + ib = 32 + else + kd = 64 + ib = 32 + endif + else + if( cprec ) then + kd = 16 + ib = 16 + else + kd = 32 + ib = 16 + endif + endif + if( ispec==17 ) stdlib_iparam2stage = kd + if( ispec==18 ) stdlib_iparam2stage = ib + else if ( ispec == 19 ) then + ! ispec = 19: + ! lhous length of the houselholder representation + ! matrix (v,t) of the second stage. should be >= 1. + ! will add the vect option here next release + vect = opts(1:1) + if( vect=='N' ) then + lhous = max( 1, 4*ni ) + else + ! this is not correct, it need to call the algo and the stage2 + lhous = max( 1, 4*ni ) + ibi + endif + if( lhous>=0 ) then + stdlib_iparam2stage = lhous + else + stdlib_iparam2stage = -1 + endif + else if ( ispec == 20 ) then + ! ispec = 20: (21 for future use) + ! lwork length of the workspace for + ! either or both stages for trd and brd. should be >= 1. + ! trd: + ! trd_stage 1: = lt + lw + ls1 + ls2 + ! = ldt*kd + n*kd + n*max(kd,factoptnb) + lds2*kd + ! where ldt=lds2=kd + ! = n*kd + n*max(kd,factoptnb) + 2*kd*kd + ! trd_stage 2: = (2nb+1)*n + kd*nthreads + ! trd_both : = max(stage1,stage2) + ab ( ab=(kd+1)*n ) + ! = n*kd + n*max(kd+1,factoptnb) + ! + max(2*kd*kd, kd*nthreads) + ! + (kd+1)*n + lwork = -1 + subnam(1:1) = prec + subnam(2:6) = 'GEQRF' + qroptnb = stdlib_ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 ) + subnam(2:6) = 'GELQF' + lqoptnb = stdlib_ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 ) + ! could be qr or lq for trd and the max for brd + factoptnb = max(qroptnb, lqoptnb) + if( algo=='TRD' ) then + if( stag=='2STAG' ) then + lwork = ni*nbi + ni*max(nbi+1,factoptnb)+ max(2*nbi*nbi, nbi*nthreads)+ (nbi+& + 1)*ni + else if( (stag=='HE2HB').or.(stag=='SY2SB') ) then + lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi + else if( (stag=='HB2ST').or.(stag=='SB2ST') ) then + lwork = (2*nbi+1)*ni + nbi*nthreads + endif + else if( algo=='BRD' ) then + if( stag=='2STAG' ) then + lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)+ max(2*nbi*nbi, nbi*nthreads)+ (& + nbi+1)*ni + else if( stag=='GE2GB' ) then + lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi + else if( stag=='GB2BD' ) then + lwork = (3*nbi+1)*ni + nbi*nthreads + endif + endif + lwork = max ( 1, lwork ) + if( lwork>0 ) then + stdlib_iparam2stage = lwork + else + stdlib_iparam2stage = -1 + endif + else if ( ispec == 21 ) then + ! ispec = 21 for future use + stdlib_iparam2stage = nxi + endif + end function stdlib_iparam2stage + + !> ILAENV2STAGE: is called from the LAPACK routines to choose problem-dependent + !> parameters for the local environment. See ISPEC for a description of + !> the parameters. + !> It sets problem and machine dependent parameters useful for *_2STAGE and + !> related subroutines. + !> ILAENV2STAGE returns an INTEGER + !> if ILAENV2STAGE >= 0: ILAENV2STAGE returns the value of the parameter + !> specified by ISPEC + !> if ILAENV2STAGE < 0: if ILAENV2STAGE = -k, the k-th argument had an + !> illegal value. + !> This version provides a set of parameters which should give good, + !> but not optimal, performance on many of the currently available + !> computers for the 2-stage solvers. Users are encouraged to modify this + !> subroutine to set the tuning parameters for their particular machine using + !> the option and problem size information in the arguments. + !> This routine will not function correctly if it is converted to all + !> lower case. Converting it to all upper case is allowed. + + pure integer(ilp) function stdlib_ilaenv2stage( ispec, name, opts, n1, n2, n3, n4 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! july 2017 + ! Scalar Arguments + character(len=*), intent(in) :: name, opts + integer(ilp), intent(in) :: ispec, n1, n2, n3, n4 + ! ===================================================================== + ! Local Scalars + integer(ilp) :: iispec + ! Executable Statements + go to ( 10, 10, 10, 10, 10 )ispec + ! invalid value for ispec + stdlib_ilaenv2stage = -1 + return + 10 continue + ! 2stage eigenvalues and svd or related subroutines. + iispec = 16 + ispec + stdlib_ilaenv2stage = stdlib_iparam2stage( iispec, name, opts,n1, n2, n3, n4 ) + return + end function stdlib_ilaenv2stage + + + +end module stdlib_linalg_lapack_aux diff --git a/src/stdlib_linalg_lapack_c.fypp b/src/stdlib_linalg_lapack_c.fypp new file mode 100644 index 000000000..39aaa2f84 --- /dev/null +++ b/src/stdlib_linalg_lapack_c.fypp @@ -0,0 +1,80743 @@ +#:include "common.fypp" +module stdlib_linalg_lapack_c + use stdlib_linalg_constants + use stdlib_linalg_blas + use stdlib_linalg_lapack_aux + use stdlib_linalg_lapack_s + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_cbbcsd + public :: stdlib_cbdsqr + public :: stdlib_cgbbrd + public :: stdlib_cgbcon + public :: stdlib_cgbequ + public :: stdlib_cgbequb + public :: stdlib_cgbrfs + public :: stdlib_cgbsv + public :: stdlib_cgbsvx + public :: stdlib_cgbtf2 + public :: stdlib_cgbtrf + public :: stdlib_cgbtrs + public :: stdlib_cgebak + public :: stdlib_cgebal + public :: stdlib_cgebd2 + public :: stdlib_cgebrd + public :: stdlib_cgecon + public :: stdlib_cgeequ + public :: stdlib_cgeequb + public :: stdlib_cgees + public :: stdlib_cgeesx + public :: stdlib_cgeev + public :: stdlib_cgeevx + public :: stdlib_cgehd2 + public :: stdlib_cgehrd + public :: stdlib_cgejsv + public :: stdlib_cgelq + public :: stdlib_cgelq2 + public :: stdlib_cgelqf + public :: stdlib_cgelqt + public :: stdlib_cgelqt3 + public :: stdlib_cgels + public :: stdlib_cgelsd + public :: stdlib_cgelss + public :: stdlib_cgelsy + public :: stdlib_cgemlq + public :: stdlib_cgemlqt + public :: stdlib_cgemqr + public :: stdlib_cgemqrt + public :: stdlib_cgeql2 + public :: stdlib_cgeqlf + public :: stdlib_cgeqp3 + public :: stdlib_cgeqr + public :: stdlib_cgeqr2 + public :: stdlib_cgeqr2p + public :: stdlib_cgeqrf + public :: stdlib_cgeqrfp + public :: stdlib_cgeqrt + public :: stdlib_cgeqrt2 + public :: stdlib_cgeqrt3 + public :: stdlib_cgerfs + public :: stdlib_cgerq2 + public :: stdlib_cgerqf + public :: stdlib_cgesc2 + public :: stdlib_cgesdd + public :: stdlib_cgesv + public :: stdlib_cgesvd + public :: stdlib_cgesvdq + public :: stdlib_cgesvj + public :: stdlib_cgesvx + public :: stdlib_cgetc2 + public :: stdlib_cgetf2 + public :: stdlib_cgetrf + public :: stdlib_cgetrf2 + public :: stdlib_cgetri + public :: stdlib_cgetrs + public :: stdlib_cgetsls + public :: stdlib_cgetsqrhrt + public :: stdlib_cggbak + public :: stdlib_cggbal + public :: stdlib_cgges + public :: stdlib_cgges3 + public :: stdlib_cggesx + public :: stdlib_cggev + public :: stdlib_cggev3 + public :: stdlib_cggevx + public :: stdlib_cggglm + public :: stdlib_cgghd3 + public :: stdlib_cgghrd + public :: stdlib_cgglse + public :: stdlib_cggqrf + public :: stdlib_cggrqf + public :: stdlib_cgsvj0 + public :: stdlib_cgsvj1 + public :: stdlib_cgtcon + public :: stdlib_cgtrfs + public :: stdlib_cgtsv + public :: stdlib_cgtsvx + public :: stdlib_cgttrf + public :: stdlib_cgttrs + public :: stdlib_cgtts2 + public :: stdlib_chb2st_kernels + public :: stdlib_chbev + public :: stdlib_chbevd + public :: stdlib_chbevx + public :: stdlib_chbgst + public :: stdlib_chbgv + public :: stdlib_chbgvd + public :: stdlib_chbgvx + public :: stdlib_chbtrd + public :: stdlib_checon + public :: stdlib_checon_rook + public :: stdlib_cheequb + public :: stdlib_cheev + public :: stdlib_cheevd + public :: stdlib_cheevr + public :: stdlib_cheevx + public :: stdlib_chegs2 + public :: stdlib_chegst + public :: stdlib_chegv + public :: stdlib_chegvd + public :: stdlib_chegvx + public :: stdlib_cherfs + public :: stdlib_chesv + public :: stdlib_chesv_aa + public :: stdlib_chesv_rk + public :: stdlib_chesv_rook + public :: stdlib_chesvx + public :: stdlib_cheswapr + public :: stdlib_chetd2 + public :: stdlib_chetf2 + public :: stdlib_chetf2_rk + public :: stdlib_chetf2_rook + public :: stdlib_chetrd + public :: stdlib_chetrd_hb2st + public :: stdlib_chetrd_he2hb + public :: stdlib_chetrf + public :: stdlib_chetrf_aa + public :: stdlib_chetrf_rk + public :: stdlib_chetrf_rook + public :: stdlib_chetri + public :: stdlib_chetri_rook + public :: stdlib_chetrs + public :: stdlib_chetrs2 + public :: stdlib_chetrs_3 + public :: stdlib_chetrs_aa + public :: stdlib_chetrs_rook + public :: stdlib_chfrk + public :: stdlib_chgeqz + public :: stdlib_chpcon + public :: stdlib_chpev + public :: stdlib_chpevd + public :: stdlib_chpevx + public :: stdlib_chpgst + public :: stdlib_chpgv + public :: stdlib_chpgvd + public :: stdlib_chpgvx + public :: stdlib_chprfs + public :: stdlib_chpsv + public :: stdlib_chpsvx + public :: stdlib_chptrd + public :: stdlib_chptrf + public :: stdlib_chptri + public :: stdlib_chptrs + public :: stdlib_chsein + public :: stdlib_chseqr + public :: stdlib_cla_gbamv + public :: stdlib_cla_gbrcond_c + public :: stdlib_cla_gbrpvgrw + public :: stdlib_cla_geamv + public :: stdlib_cla_gercond_c + public :: stdlib_cla_gerpvgrw + public :: stdlib_cla_heamv + public :: stdlib_cla_hercond_c + public :: stdlib_cla_herpvgrw + public :: stdlib_cla_lin_berr + public :: stdlib_cla_porcond_c + public :: stdlib_cla_porpvgrw + public :: stdlib_cla_syamv + public :: stdlib_cla_syrcond_c + public :: stdlib_cla_syrpvgrw + public :: stdlib_cla_wwaddw + public :: stdlib_clabrd + public :: stdlib_clacgv + public :: stdlib_clacn2 + public :: stdlib_clacon + public :: stdlib_clacp2 + public :: stdlib_clacpy + public :: stdlib_clacrm + public :: stdlib_clacrt + public :: stdlib_cladiv + public :: stdlib_claed0 + public :: stdlib_claed7 + public :: stdlib_claed8 + public :: stdlib_claein + public :: stdlib_claesy + public :: stdlib_claev2 + public :: stdlib_clag2z + public :: stdlib_clags2 + public :: stdlib_clagtm + public :: stdlib_clahef + public :: stdlib_clahef_aa + public :: stdlib_clahef_rk + public :: stdlib_clahef_rook + public :: stdlib_clahqr + public :: stdlib_clahr2 + public :: stdlib_claic1 + public :: stdlib_clals0 + public :: stdlib_clalsa + public :: stdlib_clalsd + public :: stdlib_clamswlq + public :: stdlib_clamtsqr + public :: stdlib_clangb + public :: stdlib_clange + public :: stdlib_clangt + public :: stdlib_clanhb + public :: stdlib_clanhe + public :: stdlib_clanhf + public :: stdlib_clanhp + public :: stdlib_clanhs + public :: stdlib_clanht + public :: stdlib_clansb + public :: stdlib_clansp + public :: stdlib_clansy + public :: stdlib_clantb + public :: stdlib_clantp + public :: stdlib_clantr + public :: stdlib_clapll + public :: stdlib_clapmr + public :: stdlib_clapmt + public :: stdlib_claqgb + public :: stdlib_claqge + public :: stdlib_claqhb + public :: stdlib_claqhe + public :: stdlib_claqhp + public :: stdlib_claqp2 + public :: stdlib_claqps + public :: stdlib_claqr0 + public :: stdlib_claqr1 + public :: stdlib_claqr2 + public :: stdlib_claqr3 + public :: stdlib_claqr4 + public :: stdlib_claqr5 + public :: stdlib_claqsb + public :: stdlib_claqsp + public :: stdlib_claqsy + public :: stdlib_claqz0 + public :: stdlib_claqz1 + public :: stdlib_claqz2 + public :: stdlib_claqz3 + public :: stdlib_clar1v + public :: stdlib_clar2v + public :: stdlib_clarcm + public :: stdlib_clarf + public :: stdlib_clarfb + public :: stdlib_clarfb_gett + public :: stdlib_clarfg + public :: stdlib_clarfgp + public :: stdlib_clarft + public :: stdlib_clarfx + public :: stdlib_clarfy + public :: stdlib_clargv + public :: stdlib_clarnv + public :: stdlib_clarrv + public :: stdlib_clartg + public :: stdlib_clartv + public :: stdlib_clarz + public :: stdlib_clarzb + public :: stdlib_clarzt + public :: stdlib_clascl + public :: stdlib_claset + public :: stdlib_clasr + public :: stdlib_classq + public :: stdlib_claswlq + public :: stdlib_claswp + public :: stdlib_clasyf + public :: stdlib_clasyf_aa + public :: stdlib_clasyf_rk + public :: stdlib_clasyf_rook + public :: stdlib_clatbs + public :: stdlib_clatdf + public :: stdlib_clatps + public :: stdlib_clatrd + public :: stdlib_clatrs + public :: stdlib_clatrz + public :: stdlib_clatsqr + public :: stdlib_claunhr_col_getrfnp + public :: stdlib_claunhr_col_getrfnp2 + public :: stdlib_clauu2 + public :: stdlib_clauum + public :: stdlib_cpbcon + public :: stdlib_cpbequ + public :: stdlib_cpbrfs + public :: stdlib_cpbstf + public :: stdlib_cpbsv + public :: stdlib_cpbsvx + public :: stdlib_cpbtf2 + public :: stdlib_cpbtrf + public :: stdlib_cpbtrs + public :: stdlib_cpftrf + public :: stdlib_cpftri + public :: stdlib_cpftrs + public :: stdlib_cpocon + public :: stdlib_cpoequ + public :: stdlib_cpoequb + public :: stdlib_cporfs + public :: stdlib_cposv + public :: stdlib_cposvx + public :: stdlib_cpotf2 + public :: stdlib_cpotrf + public :: stdlib_cpotrf2 + public :: stdlib_cpotri + public :: stdlib_cpotrs + public :: stdlib_cppcon + public :: stdlib_cppequ + public :: stdlib_cpprfs + public :: stdlib_cppsv + public :: stdlib_cppsvx + public :: stdlib_cpptrf + public :: stdlib_cpptri + public :: stdlib_cpptrs + public :: stdlib_cpstf2 + public :: stdlib_cpstrf + public :: stdlib_cptcon + public :: stdlib_cpteqr + public :: stdlib_cptrfs + public :: stdlib_cptsv + public :: stdlib_cptsvx + public :: stdlib_cpttrf + public :: stdlib_cpttrs + public :: stdlib_cptts2 + public :: stdlib_crot + public :: stdlib_cspcon + public :: stdlib_cspmv + public :: stdlib_cspr + public :: stdlib_csprfs + public :: stdlib_cspsv + public :: stdlib_cspsvx + public :: stdlib_csptrf + public :: stdlib_csptri + public :: stdlib_csptrs + public :: stdlib_csrscl + public :: stdlib_cstedc + public :: stdlib_cstegr + public :: stdlib_cstein + public :: stdlib_cstemr + public :: stdlib_csteqr + public :: stdlib_csycon + public :: stdlib_csycon_rook + public :: stdlib_csyconv + public :: stdlib_csyconvf + public :: stdlib_csyconvf_rook + public :: stdlib_csyequb + public :: stdlib_csymv + public :: stdlib_csyr + public :: stdlib_csyrfs + public :: stdlib_csysv + public :: stdlib_csysv_aa + public :: stdlib_csysv_rk + public :: stdlib_csysv_rook + public :: stdlib_csysvx + public :: stdlib_csyswapr + public :: stdlib_csytf2 + public :: stdlib_csytf2_rk + public :: stdlib_csytf2_rook + public :: stdlib_csytrf + public :: stdlib_csytrf_aa + public :: stdlib_csytrf_rk + public :: stdlib_csytrf_rook + public :: stdlib_csytri + public :: stdlib_csytri_rook + public :: stdlib_csytrs + public :: stdlib_csytrs2 + public :: stdlib_csytrs_3 + public :: stdlib_csytrs_aa + public :: stdlib_csytrs_rook + public :: stdlib_ctbcon + public :: stdlib_ctbrfs + public :: stdlib_ctbtrs + public :: stdlib_ctfsm + public :: stdlib_ctftri + public :: stdlib_ctfttp + public :: stdlib_ctfttr + public :: stdlib_ctgevc + public :: stdlib_ctgex2 + public :: stdlib_ctgexc + public :: stdlib_ctgsen + public :: stdlib_ctgsja + public :: stdlib_ctgsna + public :: stdlib_ctgsy2 + public :: stdlib_ctgsyl + public :: stdlib_ctpcon + public :: stdlib_ctplqt + public :: stdlib_ctplqt2 + public :: stdlib_ctpmlqt + public :: stdlib_ctpmqrt + public :: stdlib_ctpqrt + public :: stdlib_ctpqrt2 + public :: stdlib_ctprfb + public :: stdlib_ctprfs + public :: stdlib_ctptri + public :: stdlib_ctptrs + public :: stdlib_ctpttf + public :: stdlib_ctpttr + public :: stdlib_ctrcon + public :: stdlib_ctrevc + public :: stdlib_ctrevc3 + public :: stdlib_ctrexc + public :: stdlib_ctrrfs + public :: stdlib_ctrsen + public :: stdlib_ctrsna + public :: stdlib_ctrsyl + public :: stdlib_ctrti2 + public :: stdlib_ctrtri + public :: stdlib_ctrtrs + public :: stdlib_ctrttf + public :: stdlib_ctrttp + public :: stdlib_ctzrzf + public :: stdlib_cunbdb + public :: stdlib_cunbdb1 + public :: stdlib_cunbdb2 + public :: stdlib_cunbdb3 + public :: stdlib_cunbdb4 + public :: stdlib_cunbdb5 + public :: stdlib_cunbdb6 + public :: stdlib_cuncsd + public :: stdlib_cuncsd2by1 + public :: stdlib_cung2l + public :: stdlib_cung2r + public :: stdlib_cungbr + public :: stdlib_cunghr + public :: stdlib_cungl2 + public :: stdlib_cunglq + public :: stdlib_cungql + public :: stdlib_cungqr + public :: stdlib_cungr2 + public :: stdlib_cungrq + public :: stdlib_cungtr + public :: stdlib_cungtsqr + public :: stdlib_cungtsqr_row + public :: stdlib_cunhr_col + public :: stdlib_cunm22 + public :: stdlib_cunm2l + public :: stdlib_cunm2r + public :: stdlib_cunmbr + public :: stdlib_cunmhr + public :: stdlib_cunml2 + public :: stdlib_cunmlq + public :: stdlib_cunmql + public :: stdlib_cunmqr + public :: stdlib_cunmr2 + public :: stdlib_cunmr3 + public :: stdlib_cunmrq + public :: stdlib_cunmrz + public :: stdlib_cunmtr + public :: stdlib_cupgtr + public :: stdlib_cupmtr + + ! 32-bit real constants + real(sp), parameter, private :: negone = -1.00_sp + real(sp), parameter, private :: zero = 0.00_sp + real(sp), parameter, private :: half = 0.50_sp + real(sp), parameter, private :: one = 1.00_sp + real(sp), parameter, private :: two = 2.00_sp + real(sp), parameter, private :: three = 3.00_sp + real(sp), parameter, private :: four = 4.00_sp + real(sp), parameter, private :: eight = 8.00_sp + real(sp), parameter, private :: ten = 10.00_sp + + ! 32-bit complex constants + complex(sp), parameter, private :: czero = ( 0.0_sp,0.0_sp) + complex(sp), parameter, private :: chalf = ( 0.5_sp,0.0_sp) + complex(sp), parameter, private :: cone = ( 1.0_sp,0.0_sp) + complex(sp), parameter, private :: cnegone = (-1.0_sp,0.0_sp) + + ! 32-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(sp), parameter, private :: rradix = real(radix(zero),sp) + real(sp), parameter, private :: ulp = epsilon(zero) + real(sp), parameter, private :: eps = ulp*half + real(sp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(sp), parameter, private :: safmax = one/safmin + real(sp), parameter, private :: smlnum = safmin/ulp + real(sp), parameter, private :: bignum = safmax*ulp + real(sp), parameter, private :: rtmin = sqrt(smlnum) + real(sp), parameter, private :: rtmax = sqrt(bignum) + + ! 32-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(sp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(sp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(sp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(sp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> CGBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_cgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(out) :: c(*), r(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(sp) :: bignum, rcmax, rcmin, smlnum + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab CGBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from CGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_cgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(out) :: c(*), r(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(sp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,log,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabzero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = max( j-ku, 1 ), min( j+kl, m ) + c( j ) = max( c( j ), cabs1( ab( kd+i-j, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_cgbequb + + !> CGBTF2: computes an LU factorization of a complex m-by-n band matrix + !> A using partial pivoting with row interchanges. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_cgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jp, ju, km, kv + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in. + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab0 ) then + ! compute multipliers. + call stdlib_cscal( km, cone / ab( kv+1, j ), ab( kv+2, j ), 1 ) + ! update trailing submatrix within the band. + if( ju>j )call stdlib_cgeru( km, ju-j, -cone, ab( kv+2, j ), 1,ab( kv, j+1 ), & + ldab-1, ab( kv+1, j+1 ),ldab-1 ) + end if + else + ! if pivot is czero, set info to the index of the pivot + ! unless a czero pivot has already been found. + if( info==0 )info = j + end if + end do loop_40 + return + end subroutine stdlib_cgbtf2 + + !> CGEBAK: forms the right or left eigenvectors of a complex general + !> matrix by backward transformation on the computed eigenvectors of the + !> balanced matrix output by CGEBAL. + + pure subroutine stdlib_cgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(in) :: scale(*) + complex(sp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, ii, k + real(sp) :: s + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! decode and test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( m<0 ) then + info = -7 + else if( ldv=ilo .and. i<=ihi )cycle loop_40 + if( i=ilo .and. i<=ihi )cycle loop_50 + if( i CGEBAL: balances a general complex matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + + pure subroutine stdlib_cgebal( job, n, a, lda, ilo, ihi, scale, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(out) :: scale(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sclfac = 2.0e+0_sp + real(sp), parameter :: factor = 0.95e+0_sp + + + + ! Local Scalars + logical(lk) :: noconv + integer(ilp) :: i, ica, iexc, ira, j, k, l, m + real(sp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2 + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=g .or. max( f, c, ca )>=sfmax2 .or.min( r, g, ra )<=sfmin2 )go to 170 + if( stdlib_sisnan( c+f+ca+r+g+ra ) ) then + ! exit if nan to avoid infinite loop + info = -3 + call stdlib_xerbla( 'CGEBAL', -info ) + return + end if + f = f*sclfac + c = c*sclfac + ca = ca*sclfac + r = r / sclfac + g = g / sclfac + ra = ra / sclfac + go to 160 + 170 continue + g = c / sclfac + 180 continue + if( g=sfmax2 .or.min( f, c, g, ca )<=sfmin2 )go to 190 + f = f / sclfac + c = c / sclfac + g = g / sclfac + ca = ca / sclfac + r = r*sclfac + ra = ra*sclfac + go to 180 + ! now balance. + 190 continue + if( ( c+r )>=factor*s )cycle loop_200 + if( fone .and. scale( i )>one ) then + if( scale( i )>=sfmax1 / f )cycle loop_200 + end if + g = one / f + scale( i ) = scale( i )*f + noconv = .true. + call stdlib_csscal( n-k+1, g, a( i, k ), lda ) + call stdlib_csscal( l, f, a( 1, i ), 1 ) + end do loop_200 + if( noconv )go to 140 + 210 continue + ilo = k + ihi = l + return + end subroutine stdlib_cgebal + + !> CGEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_cgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(out) :: c(*), r(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: bignum, rcmax, rcmin, smlnum + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from CGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_cgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(out) :: c(*), r(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,log,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldazero ) then + r( i ) = radix**int( log(r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = 1, m + c( j ) = max( c( j ), cabs1( a( i, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_cgeequb + + !> CGETC2: computes an LU factorization, using complete pivoting, of the + !> n-by-n matrix A. The factorization has the form A = P * L * U * Q, + !> where P and Q are permutation matrices, L is lower triangular with + !> unit diagonal elements and U is upper triangular. + !> This is a level 1 BLAS version of the algorithm. + + pure subroutine stdlib_cgetc2( n, a, lda, ipiv, jpiv, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*), jpiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ip, ipv, j, jp, jpv + real(sp) :: bignum, eps, smin, smlnum, xmax + ! Intrinsic Functions + intrinsic :: abs,cmplx,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! handle the case n=1 by itself + if( n==1 ) then + ipiv( 1 ) = 1 + jpiv( 1 ) = 1 + if( abs( a( 1, 1 ) )=xmax ) then + xmax = abs( a( ip, jp ) ) + ipv = ip + jpv = jp + end if + end do + end do + if( i==1 )smin = max( eps*xmax, smlnum ) + ! swap rows + if( ipv/=i )call stdlib_cswap( n, a( ipv, 1 ), lda, a( i, 1 ), lda ) + ipiv( i ) = ipv + ! swap columns + if( jpv/=i )call stdlib_cswap( n, a( 1, jpv ), 1, a( 1, i ), 1 ) + jpiv( i ) = jpv + ! check for singularity + if( abs( a( i, i ) ) CGETF2: computes an LU factorization of a general m-by-n matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_cgetf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: sfmin + integer(ilp) :: i, j, jp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_cscal( m-j, cone / a( j, j ), a( j+1, j ), 1 ) + else + do i = 1, m-j + a( j+i, j ) = a( j+i, j ) / a( j, j ) + end do + end if + end if + else if( info==0 ) then + info = j + end if + if( j CGGBAK: forms the right or left eigenvectors of a complex generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> CGGBAL. + + pure subroutine stdlib_cggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(in) :: lscale(*), rscale(*) + complex(sp), intent(inout) :: v(ldv,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, k + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( n==0 .and. ihi==0 .and. ilo/=1 ) then + info = -4 + else if( n>0 .and. ( ihimax( 1, n ) ) )then + info = -5 + else if( n==0 .and. ilo==1 .and. ihi/=0 ) then + info = -5 + else if( m<0 ) then + info = -8 + else if( ldv CGGBAL: balances a pair of general complex matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + + pure subroutine stdlib_cggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, n + ! Array Arguments + real(sp), intent(out) :: lscale(*), rscale(*), work(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sclfac = 1.0e+1_sp + + + + ! Local Scalars + integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, & + lrab, lsfmax, lsfmin, m, nr, nrp2 + real(sp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, & + pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc + complex(sp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,aimag,int,log10,max,min,real,sign + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldacmax )cmax = abs( cor ) + lscale( i ) = lscale( i ) + cor + cor = alpha*work( i ) + if( abs( cor )>cmax )cmax = abs( cor ) + rscale( i ) = rscale( i ) + cor + end do + if( cmax CGTSV: solves the equation + !> A*X = B, + !> where A is an N-by-N tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T *X = B may be solved by interchanging the + !> order of the arguments DU and DL. + + pure subroutine stdlib_cgtsv( n, nrhs, dl, d, du, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(sp), intent(inout) :: b(ldb,*), d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k + complex(sp) :: mult, temp, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb=cabs1( dl( k ) ) ) then + ! no row interchange required + mult = dl( k ) / d( k ) + d( k+1 ) = d( k+1 ) - mult*du( k ) + do j = 1, nrhs + b( k+1, j ) = b( k+1, j ) - mult*b( k, j ) + end do + if( k<( n-1 ) )dl( k ) = czero + else + ! interchange rows k and k+1 + mult = d( k ) / dl( k ) + d( k ) = dl( k ) + temp = d( k+1 ) + d( k+1 ) = du( k ) - mult*temp + if( k<( n-1 ) ) then + dl( k ) = du( k+1 ) + du( k+1 ) = -mult*dl( k ) + end if + du( k ) = temp + do j = 1, nrhs + temp = b( k, j ) + b( k, j ) = b( k+1, j ) + b( k+1, j ) = temp - mult*b( k+1, j ) + end do + end if + end do loop_30 + if( d( n )==czero ) then + info = n + return + end if + ! back solve with the matrix u from the factorization. + do j = 1, nrhs + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) / d( n-1 ) + do k = n - 2, 1, -1 + b( k, j ) = ( b( k, j )-du( k )*b( k+1, j )-dl( k )*b( k+2, j ) ) / d( k ) + + end do + end do + return + end subroutine stdlib_cgtsv + + !> CGTTRF: computes an LU factorization of a complex tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + + pure subroutine stdlib_cgttrf( n, dl, d, du, du2, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: d(*), dl(*), du(*) + complex(sp), intent(out) :: du2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(sp) :: fact, temp, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'CGTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! initialize ipiv(i) = i and du2(i) = 0 + do i = 1, n + ipiv( i ) = i + end do + do i = 1, n - 2 + du2( i ) = zero + end do + do i = 1, n - 2 + if( cabs1( d( i ) )>=cabs1( dl( i ) ) ) then + ! no row interchange required, eliminate dl(i) + if( cabs1( d( i ) )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + ! interchange rows i and i+1, eliminate dl(i) + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + du2( i ) = du( i+1 ) + du( i+1 ) = -fact*du( i+1 ) + ipiv( i ) = i + 1 + end if + end do + if( n>1 ) then + i = n - 1 + if( cabs1( d( i ) )>=cabs1( dl( i ) ) ) then + if( cabs1( d( i ) )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + ipiv( i ) = i + 1 + end if + end if + ! check for a zero on the diagonal of u. + do i = 1, n + if( cabs1( d( i ) )==zero ) then + info = i + go to 50 + end if + end do + 50 continue + return + end subroutine stdlib_cgttrf + + !> CGTTS2: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by CGTTRF. + + pure subroutine stdlib_cgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: itrans, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + complex(sp) :: temp + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( n==0 .or. nrhs==0 )return + if( itrans==0 ) then + ! solve a*x = b using the lu factorization of a, + ! overwriting each right hand side vector with its solution. + if( nrhs<=1 ) then + j = 1 + 10 continue + ! solve l*x = b. + do i = 1, n - 1 + if( ipiv( i )==i ) then + b( i+1, j ) = b( i+1, j ) - dl( i )*b( i, j ) + else + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - dl( i )*b( i, j ) + end if + end do + ! solve u*x = b. + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + else if( itrans==1 ) then + ! solve a**t * x = b. + if( nrhs<=1 ) then + j = 1 + 70 continue + ! solve u**t * x = b. + b( 1, j ) = b( 1, j ) / d( 1 ) + if( n>1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d( i & + ) + end do + ! solve l**t * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + if( j1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d(& + i ) + end do + ! solve l**t * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + end do + end if + else + ! solve a**h * x = b. + if( nrhs<=1 ) then + j = 1 + 130 continue + ! solve u**h * x = b. + b( 1, j ) = b( 1, j ) / conjg( d( 1 ) ) + if( n>1 )b( 2, j ) = ( b( 2, j )-conjg( du( 1 ) )*b( 1, j ) ) /conjg( d( 2 ) ) + + do i = 3, n + b( i, j ) = ( b( i, j )-conjg( du( i-1 ) )*b( i-1, j )-conjg( du2( i-2 ) )*b( & + i-2, j ) ) /conjg( d( i ) ) + end do + ! solve l**h * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - conjg( dl( i ) )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - conjg( dl( i ) )*temp + b( i, j ) = temp + end if + end do + if( j1 )b( 2, j ) = ( b( 2, j )-conjg( du( 1 ) )*b( 1, j ) ) /conjg( d( 2 ) ) + + do i = 3, n + b( i, j ) = ( b( i, j )-conjg( du( i-1 ) )*b( i-1, j )-conjg( du2( i-2 ) )& + *b( i-2, j ) ) / conjg( d( i ) ) + end do + ! solve l**h * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - conjg( dl( i ) )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - conjg( dl( i ) )*temp + b( i, j ) = temp + end if + end do + end do + end if + end if + end subroutine stdlib_cgtts2 + + !> CHESWAPR: applies an elementary permutation on the rows and the columns of + !> a hermitian matrix. + + pure subroutine stdlib_cheswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(sp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_cswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + ! - swap a(i2,i1) and a(i1,i2) + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=conjg(a(i1+i,i2)) + a(i1+i,i2)=conjg(tmp) + end do + a(i1,i2)=conjg(a(i1,i2)) + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from 1 to i1-1 + call stdlib_cswap ( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + ! - swap a(i2,i1) and a(i1,i2) + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=conjg(a(i2,i1+i)) + a(i2,i1+i)=conjg(tmp) + end do + a(i2,i1)=conjg(a(i2,i1)) + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_cheswapr + + !> CHETF2: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_chetf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt + complex(sp) :: d12, d21, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_icamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) .or. stdlib_sisnan(absakk) ) then + ! column k is or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=sp) + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = imax + stdlib_icamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_icamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, cabs1( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( a( imax, imax ),KIND=sp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_cswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + a( kp, kk ) = conjg( a( kp, kk ) ) + r1 = real( a( kk, kk ),KIND=sp) + a( kk, kk ) = real( a( kp, kp ),KIND=sp) + a( kp, kp ) = r1 + if( kstep==2 ) then + a( k, k ) = real( a( k, k ),KIND=sp) + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + else + a( k, k ) = real( a( k, k ),KIND=sp) + if( kstep==2 )a( k-1, k-1 ) = real( a( k-1, k-1 ),KIND=sp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h + r1 = one / real( a( k, k ),KIND=sp) + call stdlib_cher( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_csscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h + if( k>2 ) then + d = stdlib_slapy2( real( a( k-1, k ),KIND=sp),aimag( a( k-1, k ) ) ) + + d22 = real( a( k-1, k-1 ),KIND=sp) / d + d11 = real( a( k, k ),KIND=sp) / d + tt = one / ( d11*d22-one ) + d12 = a( k-1, k ) / d + d = tt / d + do j = k - 2, 1, -1 + wkm1 = d*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = d*( d22*a( j, k )-d12*a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*conjg( wk ) -a( i, k-1 )*conjg( & + wkm1 ) + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + a( j, j ) = cmplx( real( a( j, j ),KIND=sp), zero,KIND=sp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 50 continue + ! if k > n, exit from loop + if( k>n )go to 90 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_icamax( imax-k, a( imax, k ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( a( imax, imax ),KIND=sp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp CHETF2_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_chetf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*) + ! ====================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: done, upper + integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p + real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, stemp, rowmax, tt, sfmin + complex(sp) :: d12, d21, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_icamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( ( max( absakk, colmax )==zero ) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=sp) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_icamax( imax-1, a( 1, imax ), 1 ) + stemp = cabs1( a( itemp, imax ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=sp) )1 )call stdlib_cswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! (2) swap and conjugate middle parts + do j = p + 1, k - 1 + t = conjg( a( j, k ) ) + a( j, k ) = conjg( a( p, j ) ) + a( p, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( p, k ) = conjg( a( p, k ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( k, k ),KIND=sp) + a( k, k ) = real( a( p, p ),KIND=sp) + a( p, p ) = r1 + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_cswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! (2) swap and conjugate middle parts + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( kp, kk ) = conjg( a( kp, kk ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( kk, kk ),KIND=sp) + a( kk, kk ) = real( a( kp, kp ),KIND=sp) + a( kp, kp ) = r1 + if( kstep==2 ) then + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=sp) + ! (5) swap row elements + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( real( a( k, k ),KIND=sp) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / real( a( k, k ),KIND=sp) + call stdlib_cher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_csscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=sp) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_cher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + ! d = |a12| + d = stdlib_slapy2( real( a( k-1, k ),KIND=sp),aimag( a( k-1, k ) ) ) + + d11 = real( a( k, k ) / d,KIND=sp) + d22 = real( a( k-1, k-1 ) / d,KIND=sp) + d12 = a( k-1, k ) / d + tt = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + ! compute d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j + wkm1 = tt*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = tt*( d22*a( j, k )-d12*a( j, k-1 ) ) + ! perform a rank-2 update of a(1:k-2,1:k-2) + do i = j, 1, -1 + a( i, j ) = a( i, j ) -( a( i, k ) / d )*conjg( wk ) -( a( i, k-1 ) & + / d )*conjg( wkm1 ) + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d + a( j, k-1 ) = wkm1 / d + ! (*) make sure that diagonal element of pivot is real + a( j, j ) = cmplx( real( a( j, j ),KIND=sp), zero,KIND=sp) + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = czero + a( k-1, k ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**h using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakkrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=sp) )1 )call stdlib_cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! for both 1x1 and 2x2 pivots, interchange rows and + ! columns kk and kp in the trailing submatrix a(k:n,k:n) + if( kp/=kk ) then + ! (1) swap columnar parts + if( kp1 )call stdlib_cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + else + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=sp) + if( kstep==2 )a( k+1, k+1 ) = real( a( k+1, k+1 ),KIND=sp) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of a now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / real( a( k, k ),KIND=sp) + call stdlib_cher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_csscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=sp) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_cher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k CHETF2_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_chetf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ====================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: done, upper + integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p + real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, stemp, rowmax, tt, sfmin + complex(sp) :: d12, d21, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_icamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( ( max( absakk, colmax )==zero ) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=sp) + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_icamax( imax-1, a( 1, imax ), 1 ) + stemp = cabs1( a( itemp, imax ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=sp) )1 )call stdlib_cswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! (2) swap and conjugate middle parts + do j = p + 1, k - 1 + t = conjg( a( j, k ) ) + a( j, k ) = conjg( a( p, j ) ) + a( p, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( p, k ) = conjg( a( p, k ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( k, k ),KIND=sp) + a( k, k ) = real( a( p, p ),KIND=sp) + a( p, p ) = r1 + end if + ! for both 1x1 and 2x2 pivots, interchange rows and + ! columns kk and kp in the leading submatrix a(1:k,1:k) + if( kp/=kk ) then + ! (1) swap columnar parts + if( kp>1 )call stdlib_cswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! (2) swap and conjugate middle parts + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( kp, kk ) = conjg( a( kp, kk ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( kk, kk ),KIND=sp) + a( kk, kk ) = real( a( kp, kp ),KIND=sp) + a( kp, kp ) = r1 + if( kstep==2 ) then + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=sp) + ! (5) swap row elements + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + else + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=sp) + if( kstep==2 )a( k-1, k-1 ) = real( a( k-1, k-1 ),KIND=sp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( real( a( k, k ),KIND=sp) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / real( a( k, k ),KIND=sp) + call stdlib_cher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_csscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=sp) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_cher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + ! d = |a12| + d = stdlib_slapy2( real( a( k-1, k ),KIND=sp),aimag( a( k-1, k ) ) ) + + d11 = real( a( k, k ) / d,KIND=sp) + d22 = real( a( k-1, k-1 ) / d,KIND=sp) + d12 = a( k-1, k ) / d + tt = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + ! compute d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j + wkm1 = tt*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = tt*( d22*a( j, k )-d12*a( j, k-1 ) ) + ! perform a rank-2 update of a(1:k-2,1:k-2) + do i = j, 1, -1 + a( i, j ) = a( i, j ) -( a( i, k ) / d )*conjg( wk ) -( a( i, k-1 ) & + / d )*conjg( wkm1 ) + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d + a( j, k-1 ) = wkm1 / d + ! (*) make sure that diagonal element of pivot is real + a( j, j ) = cmplx( real( a( j, j ),KIND=sp), zero,KIND=sp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakkrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=sp) )=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / real( a( k, k ),KIND=sp) + call stdlib_cher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_csscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=sp) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_cher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k CHETRI: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> CHETRF. + + pure subroutine stdlib_chetri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp, kstep + real(sp) :: ak, akp1, d, t + complex(sp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=sp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_chemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_cdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=sp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = real( a( k, k ),KIND=sp) / t + akp1 = real( a( k+1, k+1 ),KIND=sp) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_chemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_cdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=sp) + a( k, k+1 ) = a( k, k+1 ) -stdlib_cdotc( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_ccopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_chemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -real( stdlib_cdotc( k-1, work, 1, a( 1, k+1 ),& + 1 ),KIND=sp) + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=sp) + ! compute column k of the inverse. + if( k CHETRI_ROOK: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> CHETRF_ROOK. + + pure subroutine stdlib_chetri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp, kstep + real(sp) :: ak, akp1, d, t + complex(sp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 70 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=sp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_chemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_cdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=sp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = real( a( k, k ),KIND=sp) / t + akp1 = real( a( k+1, k+1 ),KIND=sp) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_chemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_cdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=sp) + a( k, k+1 ) = a( k, k+1 ) -stdlib_cdotc( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_ccopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_chemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -real( stdlib_cdotc( k-1, work, 1, a( 1, k+1 ),& + 1 ),KIND=sp) + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k,1:k) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1) in the leading submatrix a(k+1:n,k+1:n) + ! (1) interchange rows and columns k and -ipiv(k) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + ! (2) interchange rows and columns k+1 and -ipiv(k+1) + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 70 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 80 continue + ! if k < 1, exit from loop. + if( k<1 )go to 120 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=sp) + ! compute column k of the inverse. + if( k CHETRS_3: solves a system of linear equations A * X = B with a complex + !> Hermitian matrix A using the factorization computed + !> by CHETRF_RK or CHETRF_BK: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_chetrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*), e(*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + real(sp) :: s + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_ctrsm( 'L', 'U', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + s = real( cone,KIND=sp) / real( a( i, i ),KIND=sp) + call stdlib_csscal( nrhs, s, b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / conjg( akm1k ) + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**h \ b) -> b [ u**h \ (d \ (u \p**t * b) ) ] + call stdlib_ctrsm( 'L', 'U', 'C', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (u**h \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**h. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ctrsm( 'L', 'L', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + s = real( cone,KIND=sp) / real( a( i, i ),KIND=sp) + call stdlib_csscal( nrhs, s, b( i, 1 ), ldb ) + else if( i b [ l**h \ (d \ (l \p**t * b) ) ] + call stdlib_ctrsm('L', 'L', 'C', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (l**h \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_chetrs_3 + + !> Level 3 BLAS like routine for C in RFP Format. + !> CHFRK: performs one of the Hermitian rank--k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n Hermitian + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + + pure subroutine stdlib_chfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, n + character, intent(in) :: trans, transr, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: c(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, normaltransr, nisodd, notrans + integer(ilp) :: info, nrowa, j, nk, n1, n2 + complex(sp) :: calpha, cbeta + ! Intrinsic Functions + intrinsic :: max,cmplx + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( notrans ) then + nrowa = n + else + nrowa = k + end if + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( lda CHPGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by CPPTRF. + + pure subroutine stdlib_chpgst( itype, uplo, n, ap, bp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, n + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(in) :: bp(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk + real(sp) :: ajj, akk, bjj, bkk + complex(sp) :: ct + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CHPGST', -info ) + return + end if + if( itype==1 ) then + if( upper ) then + ! compute inv(u**h)*a*inv(u) + ! j1 and jj are the indices of a(1,j) and a(j,j) + jj = 0 + do j = 1, n + j1 = jj + 1 + jj = jj + j + ! compute the j-th column of the upper triangle of a + ap( jj ) = real( ap( jj ),KIND=sp) + bjj = real( bp( jj ),KIND=sp) + call stdlib_ctpsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', j,bp, ap( j1 ), 1 & + ) + call stdlib_chpmv( uplo, j-1, -cone, ap, bp( j1 ), 1, cone,ap( j1 ), 1 ) + + call stdlib_csscal( j-1, one / bjj, ap( j1 ), 1 ) + ap( jj ) = ( ap( jj )-stdlib_cdotc( j-1, ap( j1 ), 1, bp( j1 ),1 ) ) / & + bjj + end do + else + ! compute inv(l)*a*inv(l**h) + ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1) + kk = 1 + do k = 1, n + k1k1 = kk + n - k + 1 + ! update the lower triangle of a(k:n,k:n) + akk = real( ap( kk ),KIND=sp) + bkk = real( bp( kk ),KIND=sp) + akk = akk / bkk**2 + ap( kk ) = akk + if( k CHPTRF: computes the factorization of a complex Hermitian packed + !> matrix A using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_chptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt + complex(sp) :: d12, d21, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CHPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**h using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( ap( kc+k-1 ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_icamax( k-1, ap( kc ), 1 ) + colmax = cabs1( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=sp) + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_icamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, cabs1( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( ap( kpc+imax-1 ),KIND=sp) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_cswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = conjg( ap( knc+j-1 ) ) + ap( knc+j-1 ) = conjg( ap( kx ) ) + ap( kx ) = t + end do + ap( kx+kk-1 ) = conjg( ap( kx+kk-1 ) ) + r1 = real( ap( knc+kk-1 ),KIND=sp) + ap( knc+kk-1 ) = real( ap( kpc+kp-1 ),KIND=sp) + ap( kpc+kp-1 ) = r1 + if( kstep==2 ) then + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=sp) + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + else + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=sp) + if( kstep==2 )ap( kc-1 ) = real( ap( kc-1 ),KIND=sp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h + r1 = one / real( ap( kc+k-1 ),KIND=sp) + call stdlib_chpr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_csscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h + if( k>2 ) then + d = stdlib_slapy2( real( ap( k-1+( k-1 )*k / 2 ),KIND=sp),aimag( ap( k-1+( & + k-1 )*k / 2 ) ) ) + d22 = real( ap( k-1+( k-2 )*( k-1 ) / 2 ),KIND=sp) / d + d11 = real( ap( k+( k-1 )*k / 2 ),KIND=sp) / d + tt = one / ( d11*d22-one ) + d12 = ap( k-1+( k-1 )*k / 2 ) / d + d = tt / d + do j = k - 2, 1, -1 + wkm1 = d*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-conjg( d12 )*ap( j+( k-1 )*k & + / 2 ) ) + wk = d*( d22*ap( j+( k-1 )*k / 2 )-d12*ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *conjg( wk ) -ap( i+( k-2 )*( k-1 ) / 2 )*conjg( wkm1 ) + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + ap( j+( j-1 )*j / 2 ) = cmplx( real( ap( j+( j-1 )*j / 2 ),KIND=sp), & + zero,KIND=sp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( ap( kc ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( ap( kpc ),KIND=sp) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp CHPTRI: computes the inverse of a complex Hermitian indefinite matrix + !> A in packed storage using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHPTRF. + + pure subroutine stdlib_chptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + real(sp) :: ak, akp1, d, t + complex(sp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,conjg,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CHPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = one / real( ap( kc+k-1 ),KIND=sp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_chpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kc ), 1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -real( stdlib_cdotc( k-1, work, 1, ap( kc ), 1 ),& + KIND=sp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( ap( kcnext+k-1 ) ) + ak = real( ap( kc+k-1 ),KIND=sp) / t + akp1 = real( ap( kcnext+k ),KIND=sp) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-one ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_chpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kc ), 1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -real( stdlib_cdotc( k-1, work, 1, ap( kc ), 1 ),& + KIND=sp) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_cdotc( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_ccopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_chpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -real( stdlib_cdotc( k-1, work, 1, ap( kcnext & + ),1 ),KIND=sp) + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_cswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = conjg( ap( kc+j-1 ) ) + ap( kc+j-1 ) = conjg( ap( kx ) ) + ap( kx ) = temp + end do + ap( kc+kp-1 ) = conjg( ap( kc+kp-1 ) ) + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = one / real( ap( kc ),KIND=sp) + ! compute column k of the inverse. + if( k CLA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_cla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, ldab, m, n, kl, ku, trans + ! Array Arguments + complex(sp), intent(in) :: ab(ldab,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke + complex(sp) :: cdum + ! Intrinsic Functions + intrinsic :: max,abs,real,aimag,sign + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( kl<0 .or. kl>m-1 ) then + info = 4 + else if( ku<0 .or. ku>n-1 ) then + info = 5 + else if( ldab0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + kd = ku + 1 + ke = kl + 1 + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_cla_gbamv + + !> CLA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(sp) function stdlib_cla_gbrpvgrw( n, kl, ku, ncols, ab, ldab, afb,ldafb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, kl, ku, ncols, ldab, ldafb + ! Array Arguments + complex(sp), intent(in) :: ab(ldab,*), afb(ldafb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j, kd + real(sp) :: amax, umax, rpvgrw + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + rpvgrw = one + kd = ku + 1 + do j = 1, ncols + amax = zero + umax = zero + do i = max( j-ku, 1 ), min( j+kl, n ) + amax = max( cabs1( ab( kd+i-j, j ) ), amax ) + end do + do i = max( j-ku, 1 ), j + umax = max( cabs1( afb( kd+i-j, j ) ), umax ) + end do + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_cla_gbrpvgrw = rpvgrw + end function stdlib_cla_gbrpvgrw + + !> CLA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_cla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + integer(ilp), intent(in) :: trans + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny + complex(sp) :: cdum + ! Intrinsic Functions + intrinsic :: max,abs,real,aimag,sign + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + do j = 1, lenx + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + do j = 1, lenx + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + jx = kx + do j = 1, lenx + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == 0.0_sp ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == 0.0_sp ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= 0.0_sp ) then + jx = kx + do j = 1, lenx + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_cla_geamv + + !> CLA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(sp) function stdlib_cla_gerpvgrw( n, ncols, a, lda, af, ldaf ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, ncols, lda, ldaf + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: amax, umax, rpvgrw + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: max,min,abs,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + rpvgrw = one + do j = 1, ncols + amax = zero + umax = zero + do i = 1, n + amax = max( cabs1( a( i, j ) ), amax ) + end do + do i = 1, j + umax = max( cabs1( af( i, j ) ), umax ) + end do + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_cla_gerpvgrw = rpvgrw + end function stdlib_cla_gerpvgrw + + !> CLA_SYAMV performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_cla_heamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n, uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: max,abs,sign,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) )then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_cla_heamv + + !> CLA_LIN_BERR: computes componentwise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the componentwise absolute value of the matrix + !> or vector Z. + + pure subroutine stdlib_cla_lin_berr( n, nz, nrhs, res, ayb, berr ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, nz, nrhs + ! Array Arguments + real(sp), intent(in) :: ayb(n,nrhs) + real(sp), intent(out) :: berr(nrhs) + complex(sp), intent(in) :: res(n,nrhs) + ! ===================================================================== + ! Local Scalars + real(sp) :: tmp,safe1 + integer(ilp) :: i, j + complex(sp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + complex(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! adding safe1 to the numerator guards against spuriously zero + ! residuals. a similar safeguard is in the cla_yyamv routine used + ! to compute ayb. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (nz+1)*safe1 + do j = 1, nrhs + berr(j) = zero + do i = 1, n + if (ayb(i,j) /= 0.0_sp) then + tmp = (safe1 + cabs1(res(i,j)))/ayb(i,j) + berr(j) = max( berr(j), tmp ) + end if + ! if ayb is exactly 0.0_sp (and if computed by cla_yyamv), then we know + ! the true residual also must be exactly zero. + end do + end do + end subroutine stdlib_cla_lin_berr + + !> CLA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(sp) function stdlib_cla_porpvgrw( uplo, ncols, a, lda, af, ldaf, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols, lda, ldaf + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: amax, umax, rpvgrw + logical(lk) :: upper + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + ! stdlib_spotrf will have factored only the ncolsxncols leading minor, so + ! we restrict the growth search to that minor and use only the first + ! 2*ncols workspace entries. + rpvgrw = one + do i = 1, 2*ncols + work( i ) = zero + end do + ! find the max magnitude entry of each column. + if ( upper ) then + do j = 1, ncols + do i = 1, j + work( ncols+j ) =max( cabs1( a( i, j ) ), work( ncols+j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( ncols+j ) =max( cabs1( a( i, j ) ), work( ncols+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of the factor in + ! af. no pivoting, so no permutations. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do j = 1, ncols + do i = 1, j + work( j ) = max( cabs1( af( i, j ) ), work( j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( j ) = max( cabs1( af( i, j ) ), work( j ) ) + end do + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_cla_porpvgrw = rpvgrw + end function stdlib_cla_porpvgrw + + !> CLA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_cla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + integer(ilp), intent(in) :: uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: max,abs,sign,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) )then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_cla_syamv + + !> CLA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + + pure subroutine stdlib_cla_wwaddw( n, x, y, w ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(inout) :: x(*), y(*) + complex(sp), intent(in) :: w(*) + ! ===================================================================== + ! Local Scalars + complex(sp) :: s + integer(ilp) :: i + ! Executable Statements + do 10 i = 1, n + s = x(i) + w(i) + s = (s + s) - s + y(i) = ((x(i) - s) + w(i)) + y(i) + x(i) = s + 10 continue + return + end subroutine stdlib_cla_wwaddw + + !> CLACGV: conjugates a complex vector of length N. + + pure subroutine stdlib_clacgv( n, x, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ioff + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( incx==1 ) then + do i = 1, n + x( i ) = conjg( x( i ) ) + end do + else + ioff = 1 + if( incx<0 )ioff = 1 - ( n-1 )*incx + do i = 1, n + x( ioff ) = conjg( x( ioff ) ) + ioff = ioff + incx + end do + end if + return + end subroutine stdlib_clacgv + + !> CLACN2: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + pure subroutine stdlib_clacn2( n, v, x, est, kase, isave ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(inout) :: isave(3) + complex(sp), intent(out) :: v(*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + ! Local Scalars + integer(ilp) :: i, jlast + real(sp) :: absxi, altsgn, estold, safmin, temp + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,real + ! Executable Statements + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + if( kase==0 ) then + do i = 1, n + x( i ) = cmplx( one / real( n,KIND=sp),KIND=sp) + end do + kase = 1 + isave( 1 ) = 1 + return + end if + go to ( 20, 40, 70, 90, 120 )isave( 1 ) + ! ................ entry (isave( 1 ) = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 130 + end if + est = stdlib_scsum1( n, x, 1 ) + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=sp) / absxi,aimag( x( i ) ) / absxi,KIND=sp) + + else + x( i ) = cone + end if + end do + kase = 2 + isave( 1 ) = 2 + return + ! ................ entry (isave( 1 ) = 2) + ! first iteration. x has been overwritten by ctrans(a)*x. + 40 continue + isave( 2 ) = stdlib_icmax1( n, x, 1 ) + isave( 3 ) = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = czero + end do + x( isave( 2 ) ) = cone + kase = 1 + isave( 1 ) = 3 + return + ! ................ entry (isave( 1 ) = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_ccopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_scsum1( n, v, 1 ) + ! test for cycling. + if( est<=estold )go to 100 + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=sp) / absxi,aimag( x( i ) ) / absxi,KIND=sp) + + else + x( i ) = cone + end if + end do + kase = 2 + isave( 1 ) = 4 + return + ! ................ entry (isave( 1 ) = 4) + ! x has been overwritten by ctrans(a)*x. + 90 continue + jlast = isave( 2 ) + isave( 2 ) = stdlib_icmax1( n, x, 1 ) + if( ( abs( x( jlast ) )/=abs( x( isave( 2 ) ) ) ) .and.( isave( 3 )est ) then + call stdlib_ccopy( n, x, 1, v, 1 ) + est = temp + end if + 130 continue + kase = 0 + return + end subroutine stdlib_clacn2 + + !> CLACON: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + subroutine stdlib_clacon( n, v, x, est, kase ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: est + ! Array Arguments + complex(sp), intent(out) :: v(n) + complex(sp), intent(inout) :: x(n) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + ! Local Scalars + integer(ilp) :: i, iter, j, jlast, jump + real(sp) :: absxi, altsgn, estold, safmin, temp + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,real + ! Save Statement + save + ! Executable Statements + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + if( kase==0 ) then + do i = 1, n + x( i ) = cmplx( one / real( n,KIND=sp),KIND=sp) + end do + kase = 1 + jump = 1 + return + end if + go to ( 20, 40, 70, 90, 120 )jump + ! ................ entry (jump = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 130 + end if + est = stdlib_scsum1( n, x, 1 ) + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=sp) / absxi,aimag( x( i ) ) / absxi,KIND=sp) + + else + x( i ) = cone + end if + end do + kase = 2 + jump = 2 + return + ! ................ entry (jump = 2) + ! first iteration. x has been overwritten by ctrans(a)*x. + 40 continue + j = stdlib_icmax1( n, x, 1 ) + iter = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = czero + end do + x( j ) = cone + kase = 1 + jump = 3 + return + ! ................ entry (jump = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_ccopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_scsum1( n, v, 1 ) + ! test for cycling. + if( est<=estold )go to 100 + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=sp) / absxi,aimag( x( i ) ) / absxi,KIND=sp) + + else + x( i ) = cone + end if + end do + kase = 2 + jump = 4 + return + ! ................ entry (jump = 4) + ! x has been overwritten by ctrans(a)*x. + 90 continue + jlast = j + j = stdlib_icmax1( n, x, 1 ) + if( ( abs( x( jlast ) )/=abs( x( j ) ) ) .and.( iterest ) then + call stdlib_ccopy( n, x, 1, v, 1 ) + est = temp + end if + 130 continue + kase = 0 + return + end subroutine stdlib_clacon + + !> CLACP2: copies all or part of a real two-dimensional matrix A to a + !> complex matrix B. + + pure subroutine stdlib_clacp2( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_clacp2 + + !> CLACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + + pure subroutine stdlib_clacpy( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_clacpy + + !> CLACRM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by N and complex; B is N by N and real; + !> C is M by N and complex. + + pure subroutine stdlib_clacrm( m, n, a, lda, b, ldb, c, ldc, rwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + ! Array Arguments + real(sp), intent(in) :: b(ldb,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: aimag,cmplx,real + ! Executable Statements + ! quick return if possible. + if( ( m==0 ) .or. ( n==0 ) )return + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = real( a( i, j ),KIND=sp) + end do + end do + l = m*n + 1 + call stdlib_sgemm( 'N', 'N', m, n, n, one, rwork, m, b, ldb, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = rwork( l+( j-1 )*m+i-1 ) + end do + end do + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = aimag( a( i, j ) ) + end do + end do + call stdlib_sgemm( 'N', 'N', m, n, n, one, rwork, m, b, ldb, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = cmplx( real( c( i, j ),KIND=sp),rwork( l+( j-1 )*m+i-1 ),KIND=sp) + + end do + end do + return + end subroutine stdlib_clacrm + + !> CLACRT: performs the operation + !> ( c s )( x ) ==> ( x ) + !> ( -s c )( y ) ( y ) + !> where c and s are complex and the vectors x and y are complex. + + pure subroutine stdlib_clacrt( n, cx, incx, cy, incy, c, s ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + complex(sp), intent(in) :: c, s + ! Array Arguments + complex(sp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(sp) :: ctemp + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 )go to 20 + ! code for unequal increments or equal increments not equal to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + ctemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - s*cx( ix ) + cx( ix ) = ctemp + ix = ix + incx + iy = iy + incy + end do + return + ! code for both increments equal to 1 + 20 continue + do i = 1, n + ctemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - s*cx( i ) + cx( i ) = ctemp + end do + return + end subroutine stdlib_clacrt + + !> CLADIV: := X / Y, where X and Y are complex. The computation of X / Y + !> will not overflow on an intermediary step unless the results + !> overflows. + + pure complex(sp) function stdlib_cladiv( x, y ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: x, y + ! ===================================================================== + ! Local Scalars + real(sp) :: zi, zr + ! Intrinsic Functions + intrinsic :: aimag,cmplx,real + ! Executable Statements + call stdlib_sladiv( real( x,KIND=sp), aimag( x ), real( y,KIND=sp), aimag( y ), zr,zi ) + + stdlib_cladiv = cmplx( zr, zi,KIND=sp) + return + end function stdlib_cladiv + + !> CLAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_claed8( k, n, qsiz, q, ldq, d, rho, cutpnt, z, dlamda,q2, ldq2, w, & + indxp, indx, indxq, perm, givptr,givcol, givnum, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, ldq, ldq2, n, qsiz + integer(ilp), intent(out) :: givptr, info, k + real(sp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: givcol(2,*), indx(*), indxp(*), perm(*) + integer(ilp), intent(inout) :: indxq(*) + real(sp), intent(inout) :: d(*), z(*) + real(sp), intent(out) :: dlamda(*), givnum(2,*), w(*) + complex(sp), intent(inout) :: q(ldq,*) + complex(sp), intent(out) :: q2(ldq2,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: mone = -1.0_sp + + ! Local Scalars + integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2 + real(sp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -2 + else if( qsizn ) then + info = -8 + else if( ldq2n )go to 90 + if( rho*abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + indxp( k2 ) = j + else + ! check if eigenvalues are close enough to allow deflation. + s = z( jlam ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_slapy2( c, s ) + t = d( j ) - d( jlam ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( j ) = tau + z( jlam ) = zero + ! record the appropriate givens rotation + givptr = givptr + 1 + givcol( 1, givptr ) = indxq( indx( jlam ) ) + givcol( 2, givptr ) = indxq( indx( j ) ) + givnum( 1, givptr ) = c + givnum( 2, givptr ) = s + call stdlib_csrot( qsiz, q( 1, indxq( indx( jlam ) ) ), 1,q( 1, indxq( indx( j ) & + ) ), 1, c, s ) + t = d( jlam )*c*c + d( j )*s*s + d( j ) = d( jlam )*s*s + d( j )*c*c + d( jlam ) = t + k2 = k2 - 1 + i = 1 + 80 continue + if( k2+i<=n ) then + if( d( jlam ) CLAESY: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> ( ( A, B );( B, C ) ) + !> provided the norm of the matrix of eigenvectors is larger than + !> some threshold value. + !> RT1 is the eigenvalue of larger absolute value, and RT2 of + !> smaller absolute value. If the eigenvectors are computed, then + !> on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence + !> [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] + !> [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] + + pure subroutine stdlib_claesy( a, b, c, rt1, rt2, evscal, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: a, b, c + complex(sp), intent(out) :: cs1, evscal, rt1, rt2, sn1 + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1_sp + + + + + + ! Local Scalars + real(sp) :: babs, evnorm, tabs, z + complex(sp) :: s, t, tmp + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! special case: the matrix is actually diagonal. + ! to avoid divide by zero later, we treat this case separately. + if( abs( b )==zero ) then + rt1 = a + rt2 = c + if( abs( rt1 )zero )t = z*sqrt( ( t / z )**2+( b / z )**2 ) + ! compute the two eigenvalues. rt1 and rt2 are exchanged + ! if necessary so that rt1 will have the greater magnitude. + rt1 = s + t + rt2 = s - t + if( abs( rt1 )one ) then + t = tabs*sqrt( ( one / tabs )**2+( sn1 / tabs )**2 ) + else + t = sqrt( cone+sn1*sn1 ) + end if + evnorm = abs( t ) + if( evnorm>=thresh ) then + evscal = cone / t + cs1 = evscal + sn1 = sn1*evscal + else + evscal = zero + end if + end if + return + end subroutine stdlib_claesy + + !> CLAEV2: computes the eigendecomposition of a 2-by-2 Hermitian matrix + !> [ A B ] + !> [ CONJG(B) C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + !> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + !> eigenvector for RT1, giving the decomposition + !> [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] + !> [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. + + pure subroutine stdlib_claev2( a, b, c, rt1, rt2, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(out) :: cs1, rt1, rt2 + complex(sp), intent(in) :: a, b, c + complex(sp), intent(out) :: sn1 + ! ===================================================================== + + + ! Local Scalars + real(sp) :: t + complex(sp) :: w + ! Intrinsic Functions + intrinsic :: abs,conjg,real + ! Executable Statements + if( abs( b )==zero ) then + w = one + else + w = conjg( b ) / abs( b ) + end if + call stdlib_slaev2( real( a,KIND=sp), abs( b ), real( c,KIND=sp), rt1, rt2, cs1, t ) + + sn1 = w*t + return + end subroutine stdlib_claev2 + + !> CLAG2Z: converts a COMPLEX matrix, SA, to a COMPLEX*16 matrix, A. + !> Note that while it is possible to overflow while converting + !> from double to single, it is not possible to overflow when + !> converting from single to double. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_clag2z( m, n, sa, ldsa, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + complex(sp), intent(in) :: sa(ldsa,*) + complex(dp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + info = 0 + do j = 1, n + do i = 1, m + a( i, j ) = sa( i, j ) + end do + end do + return + end subroutine stdlib_clag2z + + !> CLAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + + pure subroutine stdlib_clagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(in) :: alpha, beta + ! Array Arguments + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: d(*), dl(*), du(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( n==0 )return + ! multiply b by beta if beta/=1. + if( beta==zero ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = zero + end do + end do + else if( beta==-one ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = -b( i, j ) + end do + end do + end if + if( alpha==one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b + a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + dl( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + dl( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + du( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! compute b := b + a**t * x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + du( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + du( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + dl( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'C' ) ) then + ! compute b := b + a**h * x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + conjg( d( 1 ) )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + conjg( d( 1 ) )*x( 1, j ) +conjg( dl( 1 ) )*x( 2, & + j ) + b( n, j ) = b( n, j ) + conjg( du( n-1 ) )*x( n-1, j ) + conjg( d( n ) )*x(& + n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + conjg( du( i-1 ) )*x( i-1, j ) + conjg( d( i ) )& + *x( i, j ) + conjg( dl( i ) )*x( i+1, j ) + end do + end if + end do + end if + else if( alpha==-one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b - a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - dl( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - dl( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - du( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! compute b := b - a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - du( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - du( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - dl( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'C' ) ) then + ! compute b := b - a**h*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - conjg( d( 1 ) )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - conjg( d( 1 ) )*x( 1, j ) -conjg( dl( 1 ) )*x( 2, & + j ) + b( n, j ) = b( n, j ) - conjg( du( n-1 ) )*x( n-1, j ) - conjg( d( n ) )*x(& + n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - conjg( du( i-1 ) )*x( i-1, j ) - conjg( d( i ) )& + *x( i, j ) - conjg( dl( i ) )*x( i+1, j ) + end do + end if + end do + end if + end if + return + end subroutine stdlib_clagtm + + !> CLAHEF: computes a partial factorization of a complex Hermitian + !> matrix A using the Bunch-Kaufman diagonal pivoting method. The + !> partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_clahef( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(sp) :: absakk, alpha, colmax, r1, rowmax, t + complex(sp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_icamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=sp) + else + ! ============================================================ + ! begin pivot search + ! case(1) + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! begin pivot search along imax row + ! copy column imax to column kw-1 of w and update it + call stdlib_ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + w( imax, kw-1 ) = real( a( imax, imax ),KIND=sp) + call stdlib_ccopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_clacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + jmax = stdlib_icamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) ) + end if + ! case(2) + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + ! case(3) + else if( abs( real( w( imax, kw-1 ),KIND=sp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + ! case(4) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + ! end pivot search along imax row + end if + ! end pivot search + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=sp) + call stdlib_ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + call stdlib_clacgv( kk-1-kp, a( kp, kp+1 ), lda ) + if( kp>1 )call stdlib_ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(4)) + r1 = one / real( a( k, k ),KIND=sp) + call stdlib_csscal( k-1, r1, a( 1, k ), 1 ) + ! (2) conjugate column w(kw) + call stdlib_clacgv( k-1, w( 1, kw ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( conj(d21)*( d11 ) d21*( -1 ) ) + ! ( ( -1 ) ( d22 ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = t/d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0, since in 2x2 pivot case(4) + ! |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=sp)-one ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = conjg( d21 )*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_clacgv( k-1, w( 1, kw ), 1 ) + call stdlib_clacgv( k-2, w( 1, kw-1 ), 1 ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=sp) + call stdlib_cgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=sp) + end do + ! update the rectangular superdiagonal block + call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in of rows in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows j and jp + ! at each step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_cswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j<=n )go to 60 + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 (note that conjg(w) is actually stored) + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + ! copy column k of a to column k of w and update it + w( k, k ) = real( a( k, k ),KIND=sp) + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! begin pivot search along imax row + ! copy column imax to column k+1 of w and update it + call stdlib_ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_clacgv( imax-k, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( a( imax, imax ),KIND=sp) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + ! case(3) + else if( abs( real( w( imax, k+1 ),KIND=sp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + ! case(4) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + ! end pivot search along imax row + end if + ! end pivot search + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=sp) + call stdlib_ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_clacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=sp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_cswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>=1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_clahef + + !> CLAHEF_RK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> CLAHEF_RK is an auxiliary routine called by CHETRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_clahef_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*), e(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, k, kk, kkw, kp, kstep, kw, p + real(sp) :: absakk, alpha, colmax, stemp, r1, rowmax, t, sfmin + complex(sp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,conjg,aimag,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_slamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = czero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 )call stdlib_ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 ) + w( k, kw ) = real( a( k, k ),KIND=sp) + if( k1 ) then + imax = stdlib_icamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( w( k, kw ),KIND=sp) + if( k>1 )call stdlib_ccopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),1 ) + + w( imax, kw-1 ) = real( a( imax, imax ),KIND=sp) + call stdlib_ccopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_clacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + itemp = stdlib_icamax( imax-1, w( 1, kw-1 ), 1 ) + stemp = cabs1( w( itemp, kw-1 ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,kw-1 ),KIND=sp) )1 )call stdlib_ccopy( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! interchange rows k and p in the last k+1 to n columns of a + ! (columns k and k-1 of a for 2-by-2 pivot will be + ! later overwritten). interchange rows k and p + ! in last kkw to nb columns of w. + if( k1 )call stdlib_ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(3)) + ! handle division by a small number + t = real( a( k, k ),KIND=sp) + if( abs( t )>=sfmin ) then + r1 = one / t + call stdlib_csscal( k-1, r1, a( 1, k ), 1 ) + else + do ii = 1, k-1 + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(kw) + call stdlib_clacgv( k-1, w( 1, kw ), 1 ) + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! handle division by a small number. (note: order of + ! operations is important) + ! = ( t*(( d11 )/conj(d21)) t*(( -1 )/d21 ) ) + ! ( (( -1 ) ) (( d22 ) ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0 in 2x2 pivot case(4), + ! since |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=sp)-one ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /d21 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /conjg( d21 ) ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = czero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = czero + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_clacgv( k-1, w( 1, kw ), 1 ) + call stdlib_clacgv( k-2, w( 1, kw-1 ), 1 ) + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=sp) + call stdlib_cgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=sp) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 (note that conjg(w) is actually stored) + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update column k of w + w( k, k ) = real( a( k, k ),KIND=sp) + if( k1 ) then + call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, 1 ), & + ldw, cone, w( k, k ), 1 ) + w( k, k ) = real( w( k, k ),KIND=sp) + end if + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( w( k, k ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), lda, w( & + imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( w( imax, k+1 ),KIND=sp) + end if + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_icamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,k+1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,k+1 ),KIND=sp) )1 )call stdlib_cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + call stdlib_cswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw ) + end if + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=sp) + call stdlib_ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_clacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=sp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=sfmin ) then + r1 = one / t + call stdlib_csscal( n-k, r1, a( k+1, k ), 1 ) + else + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(k) + call stdlib_clacgv( n-k, w( k+1, k ), 1 ) + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2 + ! block d(k:k+1,k:k+1) in columns k and k+1 of a. + ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit + ! block and not stored. + ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1) + ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) = + ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) ) + if( k CLAHEF_ROOK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting + !> method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> CLAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_clahef_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, jp1, jp2, k, kk, kkw, kp, kstep, kw, & + p + real(sp) :: absakk, alpha, colmax, stemp, r1, rowmax, t, sfmin + complex(sp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,conjg,aimag,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_slamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 )call stdlib_ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 ) + w( k, kw ) = real( a( k, k ),KIND=sp) + if( k1 ) then + imax = stdlib_icamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( w( k, kw ),KIND=sp) + if( k>1 )call stdlib_ccopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),1 ) + + w( imax, kw-1 ) = real( a( imax, imax ),KIND=sp) + call stdlib_ccopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_clacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + itemp = stdlib_icamax( imax-1, w( 1, kw-1 ), 1 ) + stemp = cabs1( w( itemp, kw-1 ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,kw-1 ),KIND=sp) )1 )call stdlib_ccopy( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! interchange rows k and p in the last k+1 to n columns of a + ! (columns k and k-1 of a for 2-by-2 pivot will be + ! later overwritten). interchange rows k and p + ! in last kkw to nb columns of w. + if( k1 )call stdlib_ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(3)) + ! handle division by a small number + t = real( a( k, k ),KIND=sp) + if( abs( t )>=sfmin ) then + r1 = one / t + call stdlib_csscal( k-1, r1, a( 1, k ), 1 ) + else + do ii = 1, k-1 + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(kw) + call stdlib_clacgv( k-1, w( 1, kw ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! handle division by a small number. (note: order of + ! operations is important) + ! = ( t*(( d11 )/conj(d21)) t*(( -1 )/d21 ) ) + ! ( (( -1 ) ) (( d22 ) ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0 in 2x2 pivot case(4), + ! since |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=sp)-one ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /d21 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /conjg( d21 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_clacgv( k-1, w( 1, kw ), 1 ) + call stdlib_clacgv( k-2, w( 1, kw-1 ), 1 ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=sp) + call stdlib_cgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=sp) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in of rows in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows j and jp2 + ! (or j and jp2, and j+1 and jp1) at each step j + kstep = 1 + jp1 = 1 + ! (here, j is a diagonal index) + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + ! (here, j is a diagonal index) + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_cswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = jj + 1 + if( kstep==2 .and. jp1/=jj .and. j<=n )call stdlib_cswap( n-j+1, a( jp1, j ), & + lda, a( jj, j ), lda ) + if( j=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update column k of w + w( k, k ) = real( a( k, k ),KIND=sp) + if( k1 ) then + call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, 1 ), & + ldw, cone, w( k, k ), 1 ) + w( k, k ) = real( w( k, k ),KIND=sp) + end if + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( w( k, k ),KIND=sp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), lda, w( & + imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( w( imax, k+1 ),KIND=sp) + end if + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_icamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,k+1 ),KIND=sp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,k+1 ),KIND=sp) )1 )call stdlib_cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + call stdlib_cswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw ) + end if + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=sp) + call stdlib_ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_clacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=sp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=sfmin ) then + r1 = one / t + call stdlib_csscal( n-k, r1, a( k+1, k ), 1 ) + else + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(k) + call stdlib_clacgv( n-k, w( k+1, k ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2 + ! block d(k:k+1,k:k+1) in columns k and k+1 of a. + ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit + ! block and not stored. + ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1) + ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) = + ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) ) + if( k=1 )call stdlib_cswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = jj -1 + if( kstep==2 .and. jp1/=jj .and. j>=1 )call stdlib_cswap( j, a( jp1, 1 ), lda, a(& + jj, 1 ), lda ) + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_clahef_rook + + !> CLAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then CLAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**H gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**H and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] + !> [ conjg(gamma) ] + !> where alpha = x**H*w. + + pure subroutine stdlib_claic1( job, j, x, sest, w, gamma, sestpr, s, c ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: j, job + real(sp), intent(in) :: sest + real(sp), intent(out) :: sestpr + complex(sp), intent(out) :: c, s + complex(sp), intent(in) :: gamma + ! Array Arguments + complex(sp), intent(in) :: w(j), x(j) + ! ===================================================================== + + + ! Local Scalars + real(sp) :: absalp, absest, absgam, b, eps, norma, s1, s2, scl, t, test, tmp, zeta1, & + zeta2 + complex(sp) :: alpha, cosine, sine + ! Intrinsic Functions + intrinsic :: abs,conjg,max,sqrt + ! Executable Statements + eps = stdlib_slamch( 'EPSILON' ) + alpha = stdlib_cdotc( j, x, 1, w, 1 ) + absalp = abs( alpha ) + absgam = abs( gamma ) + absest = abs( sest ) + if( job==1 ) then + ! estimating largest singular value + ! special cases + if( sest==zero ) then + s1 = max( absgam, absalp ) + if( s1==zero ) then + s = zero + c = one + sestpr = zero + else + s = alpha / s1 + c = gamma / s1 + tmp = real( sqrt( s*conjg( s )+c*conjg( c ) ),KIND=sp) + s = s / tmp + c = c / tmp + sestpr = s1*tmp + end if + return + else if( absgam<=eps*absest ) then + s = one + c = zero + tmp = max( absest, absalp ) + s1 = absest / tmp + s2 = absalp / tmp + sestpr = tmp*sqrt( s1*s1+s2*s2 ) + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = one + c = zero + sestpr = s2 + else + s = zero + c = one + sestpr = s1 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + scl = sqrt( one+tmp*tmp ) + sestpr = s2*scl + s = ( alpha / s2 ) / scl + c = ( gamma / s2 ) / scl + else + tmp = s2 / s1 + scl = sqrt( one+tmp*tmp ) + sestpr = s1*scl + s = ( alpha / s1 ) / scl + c = ( gamma / s1 ) / scl + end if + return + else + ! normal case + zeta1 = absalp / absest + zeta2 = absgam / absest + b = ( one-zeta1*zeta1-zeta2*zeta2 )*half + c = zeta1*zeta1 + if( b>zero ) then + t = real( c / ( b+sqrt( b*b+c ) ),KIND=sp) + else + t = real( sqrt( b*b+c ) - b,KIND=sp) + end if + sine = -( alpha / absest ) / t + cosine = -( gamma / absest ) / ( one+t ) + tmp = real( sqrt( sine * conjg( sine )+ cosine * conjg( cosine ) ),KIND=sp) + + s = sine / tmp + c = cosine / tmp + sestpr = sqrt( t+one )*absest + return + end if + else if( job==2 ) then + ! estimating smallest singular value + ! special cases + if( sest==zero ) then + sestpr = zero + if( max( absgam, absalp )==zero ) then + sine = one + cosine = zero + else + sine = -conjg( gamma ) + cosine = conjg( alpha ) + end if + s1 = max( abs( sine ), abs( cosine ) ) + s = sine / s1 + c = cosine / s1 + tmp = real( sqrt( s*conjg( s )+c*conjg( c ) ),KIND=sp) + s = s / tmp + c = c / tmp + return + else if( absgam<=eps*absest ) then + s = zero + c = one + sestpr = absgam + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = zero + c = one + sestpr = s1 + else + s = one + c = zero + sestpr = s2 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + scl = sqrt( one+tmp*tmp ) + sestpr = absest*( tmp / scl ) + s = -( conjg( gamma ) / s2 ) / scl + c = ( conjg( alpha ) / s2 ) / scl + else + tmp = s2 / s1 + scl = sqrt( one+tmp*tmp ) + sestpr = absest / scl + s = -( conjg( gamma ) / s1 ) / scl + c = ( conjg( alpha ) / s1 ) / scl + end if + return + else + ! normal case + zeta1 = absalp / absest + zeta2 = absgam / absest + norma = max( one+zeta1*zeta1+zeta1*zeta2,zeta1*zeta2+zeta2*zeta2 ) + ! see if root is closer to zero or to one + test = one + two*( zeta1-zeta2 )*( zeta1+zeta2 ) + if( test>=zero ) then + ! root is close to zero, compute directly + b = ( zeta1*zeta1+zeta2*zeta2+one )*half + c = zeta2*zeta2 + t = real( c / ( b+sqrt( abs( b*b-c ) ) ),KIND=sp) + sine = ( alpha / absest ) / ( one-t ) + cosine = -( gamma / absest ) / t + sestpr = sqrt( t+four*eps*eps*norma )*absest + else + ! root is closer to one, shift by that amount + b = ( zeta2*zeta2+zeta1*zeta1-one )*half + c = zeta1*zeta1 + if( b>=zero ) then + t = real( -c / ( b+sqrt( b*b+c ) ),KIND=sp) + else + t = real( b - sqrt( b*b+c ),KIND=sp) + end if + sine = -( alpha / absest ) / t + cosine = -( gamma / absest ) / ( one+t ) + sestpr = sqrt( one+t+four*eps*eps*norma )*absest + end if + tmp = real( sqrt( sine * conjg( sine )+ cosine * conjg( cosine ) ),KIND=sp) + + s = sine / tmp + c = cosine / tmp + return + end if + end if + return + end subroutine stdlib_claic1 + + !> CLAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + + pure subroutine stdlib_clapmr( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, in, j, jj + complex(sp) :: temp + ! Executable Statements + if( m<=1 )return + do i = 1, m + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, m + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do jj = 1, n + temp = x( j, jj ) + x( j, jj ) = x( in, jj ) + x( in, jj ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, m + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do jj = 1, n + temp = x( i, jj ) + x( i, jj ) = x( j, jj ) + x( j, jj ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_clapmr + + !> CLAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + + pure subroutine stdlib_clapmt( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ii, j, in + complex(sp) :: temp + ! Executable Statements + if( n<=1 )return + do i = 1, n + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, n + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do ii = 1, m + temp = x( ii, j ) + x( ii, j ) = x( ii, in ) + x( ii, in ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, n + if( k( i )>0 )go to 100 + k( i ) = -k( i ) + j = k( i ) + 80 continue + if( j==i )go to 100 + do ii = 1, m + temp = x( ii, i ) + x( ii, i ) = x( ii, j ) + x( ii, j ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 80 + 100 continue + end do + end if + return + end subroutine stdlib_clapmt + + !> CLAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + + pure subroutine stdlib_claqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(sp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(in) :: c(*), r(*) + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*ab( ku+1+i-j, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_claqgb + + !> CLAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + + pure subroutine stdlib_claqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(in) :: c(*), r(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*a( i, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = 1, m + a( i, j ) = r( i )*a( i, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*r( i )*a( i, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_claqge + + !> CLAQHB: equilibrates an Hermitian band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_claqhb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(out) :: s(*) + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j - 1 + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + ab( kd+1, j ) = cj*cj*real( ab( kd+1, j ),KIND=sp) + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + ab( 1, j ) = cj*cj*real( ab( 1, j ),KIND=sp) + do i = j + 1, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_claqhb + + !> CLAQHE: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_claqhe( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j - 1 + a( i, j ) = cj*s( i )*a( i, j ) + end do + a( j, j ) = cj*cj*real( a( j, j ),KIND=sp) + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + a( j, j ) = cj*cj*real( a( j, j ),KIND=sp) + do i = j + 1, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_claqhe + + !> CLAQHP: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_claqhp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j - 1 + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + ap( jc+j-1 ) = cj*cj*real( ap( jc+j-1 ),KIND=sp) + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + ap( jc ) = cj*cj*real( ap( jc ),KIND=sp) + do i = j + 1, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_claqhp + + !> Given a 2-by-2 or 3-by-3 matrix H, CLAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - s1*I)*(H - s2*I) + !> scaling to avoid overflows and most underflows. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + + pure subroutine stdlib_claqr1( n, h, ldh, s1, s2, v ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(sp), intent(in) :: s1, s2 + integer(ilp), intent(in) :: ldh, n + ! Array Arguments + complex(sp), intent(in) :: h(ldh,*) + complex(sp), intent(out) :: v(*) + ! ================================================================ + ! Parameters + real(sp), parameter :: rzero = 0.0_sp + + + ! Local Scalars + complex(sp) :: cdum, h21s, h31s + real(sp) :: s + ! Intrinsic Functions + intrinsic :: abs,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! quick return if possible + if( n/=2 .and. n/=3 ) then + return + end if + if( n==2 ) then + s = cabs1( h( 1, 1 )-s2 ) + cabs1( h( 2, 1 ) ) + if( s==rzero ) then + v( 1 ) = czero + v( 2 ) = czero + else + h21s = h( 2, 1 ) / s + v( 1 ) = h21s*h( 1, 2 ) + ( h( 1, 1 )-s1 )*( ( h( 1, 1 )-s2 ) / s ) + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-s1-s2 ) + end if + else + s = cabs1( h( 1, 1 )-s2 ) + cabs1( h( 2, 1 ) ) +cabs1( h( 3, 1 ) ) + if( s==czero ) then + v( 1 ) = czero + v( 2 ) = czero + v( 3 ) = czero + else + h21s = h( 2, 1 ) / s + h31s = h( 3, 1 ) / s + v( 1 ) = ( h( 1, 1 )-s1 )*( ( h( 1, 1 )-s2 ) / s ) +h( 1, 2 )*h21s + h( 1, 3 )& + *h31s + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-s1-s2 ) + h( 2, 3 )*h31s + v( 3 ) = h31s*( h( 1, 1 )+h( 3, 3 )-s1-s2 ) + h21s*h( 3, 2 ) + end if + end if + end subroutine stdlib_claqr1 + + !> CLAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_claqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_claqsb + + !> CLAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_claqsp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(sp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = j, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_claqsp + + !> CLAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_claqsy( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_claqsy + + !> CLAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + + pure subroutine stdlib_clar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc, & + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1, bn, n + integer(ilp), intent(out) :: negcnt + integer(ilp), intent(inout) :: r + real(sp), intent(in) :: gaptol, lambda, pivmin + real(sp), intent(out) :: mingma, nrminv, resid, rqcorr, ztz + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*) + real(sp), intent(in) :: d(*), l(*), ld(*), lld(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: sawnan1, sawnan2 + integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2 + real(sp) :: dminus, dplus, eps, s, tmp + ! Intrinsic Functions + intrinsic :: abs,real + ! Executable Statements + eps = stdlib_slamch( 'PRECISION' ) + if( r==0 ) then + r1 = b1 + r2 = bn + else + r1 = r + r2 = r + end if + ! storage for lplus + indlpl = 0 + ! storage for uminus + indumn = n + inds = 2*n + 1 + indp = 3*n + 1 + if( b1==1 ) then + work( inds ) = zero + else + work( inds+b1-1 ) = lld( b1-1 ) + end if + ! compute the stationary transform (using the differential form) + ! until the index r2. + sawnan1 = .false. + neg1 = 0 + s = work( inds+b1-1 ) - lambda + do i = b1, r1 - 1 + dplus = d( i ) + s + work( indlpl+i ) = ld( i ) / dplus + if(dplus CLAR2V: applies a vector of complex plane rotations with real cosines + !> from both sides to a sequence of 2-by-2 complex Hermitian matrices, + !> defined by the elements of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := + !> ( conjg(z(i)) y(i) ) + !> ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) + !> ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) + + pure subroutine stdlib_clar2v( n, x, y, z, incx, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, n + ! Array Arguments + real(sp), intent(in) :: c(*) + complex(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: x(*), y(*), z(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix + real(sp) :: ci, sii, sir, t1i, t1r, t5, t6, xi, yi, zii, zir + complex(sp) :: si, t2, t3, t4, zi + ! Intrinsic Functions + intrinsic :: aimag,cmplx,conjg,real + ! Executable Statements + ix = 1 + ic = 1 + do i = 1, n + xi = real( x( ix ),KIND=sp) + yi = real( y( ix ),KIND=sp) + zi = z( ix ) + zir = real( zi,KIND=sp) + zii = aimag( zi ) + ci = c( ic ) + si = s( ic ) + sir = real( si,KIND=sp) + sii = aimag( si ) + t1r = sir*zir - sii*zii + t1i = sir*zii + sii*zir + t2 = ci*zi + t3 = t2 - conjg( si )*xi + t4 = conjg( t2 ) + si*yi + t5 = ci*xi + t1r + t6 = ci*yi - t1r + x( ix ) = ci*t5 + ( sir*real( t4,KIND=sp)+sii*aimag( t4 ) ) + y( ix ) = ci*t6 - ( sir*real( t3,KIND=sp)-sii*aimag( t3 ) ) + z( ix ) = ci*t3 + conjg( si )*cmplx( t6, t1i,KIND=sp) + ix = ix + incx + ic = ic + incc + end do + return + end subroutine stdlib_clar2v + + !> CLARCM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by M and real; B is M by N and complex; + !> C is M by N and complex. + + pure subroutine stdlib_clarcm( m, n, a, lda, b, ldb, c, ldc, rwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: b(ldb,*) + complex(sp), intent(out) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: aimag,cmplx,real + ! Executable Statements + ! quick return if possible. + if( ( m==0 ) .or. ( n==0 ) )return + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = real( b( i, j ),KIND=sp) + end do + end do + l = m*n + 1 + call stdlib_sgemm( 'N', 'N', m, n, m, one, a, lda, rwork, m, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = rwork( l+( j-1 )*m+i-1 ) + end do + end do + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = aimag( b( i, j ) ) + end do + end do + call stdlib_sgemm( 'N', 'N', m, n, m, one, a, lda, rwork, m, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = cmplx( real( c( i, j ),KIND=sp),rwork( l+( j-1 )*m+i-1 ),KIND=sp) + + end do + end do + return + end subroutine stdlib_clarcm + + !> CLARF: applies a complex elementary reflector H to a complex M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead + !> tau. + + pure subroutine stdlib_clarf( side, m, n, v, incv, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, ldc, m, n + complex(sp), intent(in) :: tau + ! Array Arguments + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(in) :: v(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: applyleft + integer(ilp) :: i, lastv, lastc + ! Executable Statements + applyleft = stdlib_lsame( side, 'L' ) + lastv = 0 + lastc = 0 + if( tau/=czero ) then + ! set up variables for scanning v. lastv begins pointing to the end + ! of v. + if( applyleft ) then + lastv = m + else + lastv = n + end if + if( incv>0 ) then + i = 1 + (lastv-1) * incv + else + i = 1 + end if + ! look for the last non-czero row in v. + do while( lastv>0 .and. v( i )==czero ) + lastv = lastv - 1 + i = i - incv + end do + if( applyleft ) then + ! scan for the last non-czero column in c(1:lastv,:). + lastc = stdlib_ilaclc(lastv, n, c, ldc) + else + ! scan for the last non-czero row in c(:,1:lastv). + lastc = stdlib_ilaclr(m, lastv, c, ldc) + end if + end if + ! note that lastc.eq.0_sp renders the blas operations null; no special + ! case is needed at this level. + if( applyleft ) then + ! form h * c + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastv,1:lastc)**h * v(1:lastv,1) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', lastv, lastc, cone,c, ldc, v, incv, & + czero, work, 1 ) + ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**h + call stdlib_cgerc( lastv, lastc, -tau, v, incv, work, 1, c, ldc ) + end if + else + ! form c * h + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1) + call stdlib_cgemv( 'NO TRANSPOSE', lastc, lastv, cone, c, ldc,v, incv, czero, & + work, 1 ) + ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**h + call stdlib_cgerc( lastc, lastv, -tau, work, 1, v, incv, c, ldc ) + end if + end if + return + end subroutine stdlib_clarf + + !> CLARFB: applies a complex block reflector H or its transpose H**H to a + !> complex M-by-N matrix C, from either the left or the right. + + pure subroutine stdlib_clarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, ldc, & + work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(in) :: t(ldt,*), v(ldv,*) + complex(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'C' + else + transt = 'N' + end if + if( stdlib_lsame( storev, 'C' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 ) (first k rows) + ! ( v2 ) + ! where v1 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v = (c1**h * v1 + c2**h * v2) (stored in work) + ! w := c1**h + do j = 1, k + call stdlib_ccopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_clacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v, & + ldv, work, ldwork ) + if( m>k ) then + ! w := w + c2**h *v2 + call stdlib_cgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', n,k, m-k, cone, & + c( k+1, 1 ), ldc,v( k+1, 1 ), ldv, cone, work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ctrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v * w**h + if( m>k ) then + ! c2 := c2 - v2 * w**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',m-k, n, k, -cone, & + v( k+1, 1 ), ldv, work,ldwork, cone, c( k+1, 1 ), ldc ) + end if + ! w := w * v1**h + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v, ldv, work, ldwork ) + ! c1 := c1 - w**h + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_ccopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v, & + ldv, work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,cone, c( 1, k+& + 1 ), ldc, v( k+1, 1 ), ldv,cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ctrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v**h + if( n>k ) then + ! c2 := c2 - w * v2**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,n-k, k, -cone, & + work, ldwork, v( k+1, 1 ),ldv, cone, c( 1, k+1 ), ldc ) + end if + ! w := w * v1**h + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v, ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 ) + ! ( v2 ) (last k rows) + ! where v2 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v = (c1**h * v1 + c2**h * v2) (stored in work) + ! w := c2**h + do j = 1, k + call stdlib_ccopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_clacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v( m-& + k+1, 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**h * v1 + call stdlib_cgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', n,k, m-k, cone, & + c, ldc, v, ldv, cone, work,ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ctrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v * w**h + if( m>k ) then + ! c1 := c1 - v1 * w**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',m-k, n, k, -cone, & + v, ldv, work, ldwork,cone, c, ldc ) + end if + ! w := w * v2**h + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v( m-k+1, 1 ), ldv, work,ldwork ) + ! c2 := c2 - w**h + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) -conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_ccopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v( n-& + k+1, 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,cone, c, ldc, & + v, ldv, cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ctrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v**h + if( n>k ) then + ! c1 := c1 - w * v1**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,n-k, k, -cone, & + work, ldwork, v, ldv, cone,c, ldc ) + end if + ! w := w * v2**h + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v( n-k+1, 1 ), ldv, work,ldwork ) + ! c2 := c2 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + else if( stdlib_lsame( storev, 'R' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 v2 ) (v1: first k columns) + ! where v1 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v**h = (c1**h * v1**h + c2**h * v2**h) (stored in work) + ! w := c1**h + do j = 1, k + call stdlib_ccopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_clacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v1**h + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v, ldv, work, ldwork ) + if( m>k ) then + ! w := w + c2**h * v2**h + call stdlib_cgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', n, k, m-k, & + cone,c( k+1, 1 ), ldc, v( 1, k+1 ), ldv, cone,work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ctrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v**h * w**h + if( m>k ) then + ! c2 := c2 - v2**h * w**h + call stdlib_cgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', m-k, n, k, & + -cone,v( 1, k+1 ), ldv, work, ldwork, cone,c( k+1, 1 ), ldc ) + end if + ! w := w * v1 + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v, & + ldv, work, ldwork ) + ! c1 := c1 - w**h + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v**h = (c1*v1**h + c2*v2**h) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_ccopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1**h + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v, ldv, work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,k, n-k, cone, & + c( 1, k+1 ), ldc,v( 1, k+1 ), ldv, cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ctrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c2 := c2 - w * v2 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-cone, work, & + ldwork, v( 1, k+1 ), ldv, cone,c( 1, k+1 ), ldc ) + end if + ! w := w * v1 + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v, & + ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 v2 ) (v2: last k columns) + ! where v2 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v**h = (c1**h * v1**h + c2**h * v2**h) (stored in work) + ! w := c2**h + do j = 1, k + call stdlib_ccopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_clacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v2**h + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v( 1, m-k+1 ), ldv, work,ldwork ) + if( m>k ) then + ! w := w + c1**h * v1**h + call stdlib_cgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', n, k, m-k, & + cone, c,ldc, v, ldv, cone, work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ctrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v**h * w**h + if( m>k ) then + ! c1 := c1 - v1**h * w**h + call stdlib_cgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', m-k, n, k, & + -cone, v,ldv, work, ldwork, cone, c, ldc ) + end if + ! w := w * v2 + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v( 1, & + m-k+1 ), ldv, work, ldwork ) + ! c2 := c2 - w**h + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) -conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v**h = (c1*v1**h + c2*v2**h) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_ccopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2**h + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v( 1, n-k+1 ), ldv, work,ldwork ) + if( n>k ) then + ! w := w + c1 * v1**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,k, n-k, cone, & + c, ldc, v, ldv, cone, work,ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ctrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c1 := c1 - w * v1 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-cone, work, & + ldwork, v, ldv, cone, c, ldc ) + end if + ! w := w * v2 + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v( 1, & + n-k+1 ), ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + end if + return + end subroutine stdlib_clarfb + + !> CLARFB_GETT: applies a complex Householder block reflector H from the + !> left to a complex (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + + pure subroutine stdlib_clarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ident + integer(ilp), intent(in) :: k, lda, ldb, ldt, ldwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(in) :: t(ldt,*) + complex(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnotident + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<0 .or. n<=0 .or. k==0 .or. k>n )return + lnotident = .not.stdlib_lsame( ident, 'I' ) + ! ------------------------------------------------------------------ + ! first step. computation of the column block 2: + ! ( a2 ) := h * ( a2 ) + ! ( b2 ) ( b2 ) + ! ------------------------------------------------------------------ + if( n>k ) then + ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n) + ! into w2=work(1:k, 1:n-k) column-by-column. + do j = 1, n-k + call stdlib_ccopy( k, a( 1, k+j ), 1, work( 1, j ), 1 ) + end do + if( lnotident ) then + ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2, + ! v1 is not an identy matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_ctrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork ) + + end if + ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2 + ! v2 stored in b1. + if( m>0 ) then + call stdlib_cgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1, k+1 ), ldb, cone, & + work, ldwork ) + end if + ! col2_(4) compute w2: = t * w2, + ! t is upper-triangular. + call stdlib_ctrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork ) + ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2, + ! v2 stored in b1. + if( m>0 ) then + call stdlib_cgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1, & + k+1 ), ldb ) + end if + if( lnotident ) then + ! col2_(6) compute w2: = v1 * w2 = a1 * w2, + ! v1 is not an identity matrix, but unit lower-triangular, + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_ctrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork ) + + end if + ! col2_(7) compute a2: = a2 - w2 = + ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k), + ! column-by-column. + do j = 1, n-k + do i = 1, k + a( i, k+j ) = a( i, k+j ) - work( i, j ) + end do + end do + end if + ! ------------------------------------------------------------------ + ! second step. computation of the column block 1: + ! ( a1 ) := h * ( a1 ) + ! ( b1 ) ( 0 ) + ! ------------------------------------------------------------------ + ! col1_(1) compute w1: = a1. copy the upper-triangular + ! a1 = a(1:k, 1:k) into the upper-triangular + ! w1 = work(1:k, 1:k) column-by-column. + do j = 1, k + call stdlib_ccopy( j, a( 1, j ), 1, work( 1, j ), 1 ) + end do + ! set the subdiagonal elements of w1 to zero column-by-column. + do j = 1, k - 1 + do i = j + 1, k + work( i, j ) = czero + end do + end do + if( lnotident ) then + ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_ctrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork ) + end if + ! col1_(3) compute w1: = t * w1, + ! t is upper-triangular, + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_ctrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork ) + ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1, + ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal. + if( m>0 ) then + call stdlib_ctrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb ) + end if + if( lnotident ) then + ! col1_(5) compute w1: = v1 * w1 = a1 * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular on input with zeroes below the diagonal, + ! and square on output. + call stdlib_ctrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork ) + ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k) + ! column-by-column. a1 is upper-triangular on input. + ! if ident, a1 is square on output, and w1 is square, + ! if not ident, a1 is upper-triangular on output, + ! w1 is upper-triangular. + ! col1_(6)_a compute elements of a1 below the diagonal. + do j = 1, k - 1 + do i = j + 1, k + a( i, j ) = - work( i, j ) + end do + end do + end if + ! col1_(6)_b compute elements of a1 on and above the diagonal. + do j = 1, k + do i = 1, j + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + return + end subroutine stdlib_clarfb_gett + + !> CLARFG: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, with beta real, and x is an + !> (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + !> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + pure subroutine stdlib_clarfg( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + complex(sp), intent(inout) :: alpha + complex(sp), intent(out) :: tau + ! Array Arguments + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(sp) :: alphi, alphr, beta, rsafmn, safmin, xnorm + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,real,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_scnrm2( n-1, x, incx ) + alphr = real( alpha,KIND=sp) + alphi = aimag( alpha ) + if( xnorm==zero .and. alphi==zero ) then + ! h = i + tau = zero + else + ! general case + beta = -sign( stdlib_slapy3( alphr, alphi, xnorm ), alphr ) + safmin = stdlib_slamch( 'S' ) / stdlib_slamch( 'E' ) + rsafmn = one / safmin + knt = 0 + if( abs( beta ) CLARFGP: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is real and non-negative, and + !> x is an (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + + subroutine stdlib_clarfgp( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + complex(sp), intent(inout) :: alpha + complex(sp), intent(out) :: tau + ! Array Arguments + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(sp) :: alphi, alphr, beta, bignum, smlnum, xnorm + complex(sp) :: savealpha + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,real,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_scnrm2( n-1, x, incx ) + alphr = real( alpha,KIND=sp) + alphi = aimag( alpha ) + if( xnorm==zero ) then + ! h = [1-alpha/abs(alpha) 0; 0 i], sign chosen so alpha >= 0. + if( alphi==zero ) then + if( alphr>=zero ) then + ! when tau.eq.zero, the vector is special-cased to be + ! all zeros in the application routines. we do not need + ! to clear it. + tau = zero + else + ! however, the application routines rely on explicit + ! zero checks when tau.ne.zero, and we must clear x. + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + alpha = -alpha + end if + else + ! only "reflecting" the diagonal entry to be real and non-negative. + xnorm = stdlib_slapy2( alphr, alphi ) + tau = cmplx( one - alphr / xnorm, -alphi / xnorm,KIND=sp) + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + alpha = xnorm + end if + else + ! general case + beta = sign( stdlib_slapy3( alphr, alphi, xnorm ), alphr ) + smlnum = stdlib_slamch( 'S' ) / stdlib_slamch( 'E' ) + bignum = one / smlnum + knt = 0 + if( abs( beta )=zero ) then + tau = zero + else + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + beta = real( -savealpha,KIND=sp) + end if + else + xnorm = stdlib_slapy2( alphr, alphi ) + tau = cmplx( one - alphr / xnorm, -alphi / xnorm,KIND=sp) + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + beta = xnorm + end if + else + ! this is the general case. + call stdlib_cscal( n-1, alpha, x, incx ) + end if + ! if beta is subnormal, it may lose relative accuracy + do j = 1, knt + beta = beta*smlnum + end do + alpha = beta + end if + return + end subroutine stdlib_clarfgp + + !> CLARFT: forms the triangular factor T of a complex block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + + pure subroutine stdlib_clarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + complex(sp), intent(out) :: t(ldt,*) + complex(sp), intent(in) :: tau(*), v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, prevlastv, lastv + ! Executable Statements + ! quick return if possible + if( n==0 )return + if( stdlib_lsame( direct, 'F' ) ) then + prevlastv = n + do i = 1, k + prevlastv = max( prevlastv, i ) + if( tau( i )==czero ) then + ! h(i) = i + do j = 1, i + t( j, i ) = czero + end do + else + ! general case + if( stdlib_lsame( storev, 'C' ) ) then + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( lastv, i )/=czero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * conjg( v( i , j ) ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**h * v(i:j,i) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', j-i, i-1,-tau( i ), v( i+1, 1 ), & + ldv,v( i+1, i ), 1,cone, t( 1, i ), 1 ) + else + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( i, lastv )/=czero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( j , i ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**h + call stdlib_cgemm( 'N', 'C', i-1, 1, j-i, -tau( i ),v( 1, i+1 ), ldv, v( i,& + i+1 ), ldv,cone, t( 1, i ), ldt ) + end if + ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i) + call stdlib_ctrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', i-1, t,ldt, t( 1, i ),& + 1 ) + t( i, i ) = tau( i ) + if( i>1 ) then + prevlastv = max( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + end do + else + prevlastv = 1 + do i = k, 1, -1 + if( tau( i )==czero ) then + ! h(i) = i + do j = i, k + t( j, i ) = czero + end do + else + ! general case + if( i1 ) then + prevlastv = min( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + t( i, i ) = tau( i ) + end if + end do + end if + return + end subroutine stdlib_clarft + + !> CLARFX: applies a complex elementary reflector H to a complex m by n + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix + !> This version uses inline code if H has order < 11. + + pure subroutine stdlib_clarfx( side, m, n, v, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: ldc, m, n + complex(sp), intent(in) :: tau + ! Array Arguments + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(in) :: v(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j + complex(sp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, & + v6, v7, v8, v9 + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( tau==czero )return + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c, where h has order m. + go to ( 10, 30, 50, 70, 90, 110, 130, 150,170, 190 )m + ! code for general m + call stdlib_clarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 10 continue + ! special code for 1 x 1 householder + t1 = cone - tau*v( 1 )*conjg( v( 1 ) ) + do j = 1, n + c( 1, j ) = t1*c( 1, j ) + end do + go to 410 + 30 continue + ! special code for 2 x 2 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + end do + go to 410 + 50 continue + ! special code for 3 x 3 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + end do + go to 410 + 70 continue + ! special code for 4 x 4 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + end do + go to 410 + 90 continue + ! special code for 5 x 5 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + end do + go to 410 + 110 continue + ! special code for 6 x 6 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + end do + go to 410 + 130 continue + ! special code for 7 x 7 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + end do + go to 410 + 150 continue + ! special code for 8 x 8 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + end do + go to 410 + 170 continue + ! special code for 9 x 9 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + v9 = conjg( v( 9 ) ) + t9 = tau*conjg( v9 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + end do + go to 410 + 190 continue + ! special code for 10 x 10 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + v9 = conjg( v( 9 ) ) + t9 = tau*conjg( v9 ) + v10 = conjg( v( 10 ) ) + t10 = tau*conjg( v10 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) +v10*c( 10, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + c( 10, j ) = c( 10, j ) - sum*t10 + end do + go to 410 + else + ! form c * h, where h has order n. + go to ( 210, 230, 250, 270, 290, 310, 330, 350,370, 390 )n + ! code for general n + call stdlib_clarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 210 continue + ! special code for 1 x 1 householder + t1 = cone - tau*v( 1 )*conjg( v( 1 ) ) + do j = 1, m + c( j, 1 ) = t1*c( j, 1 ) + end do + go to 410 + 230 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + end do + go to 410 + 250 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + end do + go to 410 + 270 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + end do + go to 410 + 290 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + end do + go to 410 + 310 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + end do + go to 410 + 330 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + end do + go to 410 + 350 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + end do + go to 410 + 370 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + v9 = v( 9 ) + t9 = tau*conjg( v9 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + end do + go to 410 + 390 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + v9 = v( 9 ) + t9 = tau*conjg( v9 ) + v10 = v( 10 ) + t10 = tau*conjg( v10 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) +v10*c( j, 10 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + c( j, 10 ) = c( j, 10 ) - sum*t10 + end do + go to 410 + end if + 410 return + end subroutine stdlib_clarfx + + !> CLARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n Hermitian matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + + pure subroutine stdlib_clarfy( uplo, n, v, incv, tau, c, ldc, work ) + ! -- lapack test routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv, ldc, n + complex(sp), intent(in) :: tau + ! Array Arguments + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(in) :: v(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: alpha + ! Executable Statements + if( tau==czero )return + ! form w:= c * v + call stdlib_chemv( uplo, n, cone, c, ldc, v, incv, czero, work, 1 ) + alpha = -chalf*tau*stdlib_cdotc( n, work, 1, v, incv ) + call stdlib_caxpy( n, alpha, v, incv, work, 1 ) + ! c := c - v * w' - w * v' + call stdlib_cher2( uplo, n, -tau, v, incv, work, 1, c, ldc ) + return + end subroutine stdlib_clarfy + + !> CLARNV: returns a vector of n random complex numbers from a uniform or + !> normal distribution. + + pure subroutine stdlib_clarnv( idist, iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: idist, n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + complex(sp), intent(out) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + real(sp), parameter :: twopi = 6.28318530717958647692528676655900576839e+0_sp + + + + ! Local Scalars + integer(ilp) :: i, il, iv + ! Local Arrays + real(sp) :: u(lv) + ! Intrinsic Functions + intrinsic :: cmplx,exp,log,min,sqrt + ! Executable Statements + do 60 iv = 1, n, lv / 2 + il = min( lv / 2, n-iv+1 ) + ! call stdlib_slaruv to generate 2*il realnumbers from a uniform (0,1,KIND=sp) + ! distribution (2*il <= lv) + call stdlib_slaruv( iseed, 2*il, u ) + if( idist==1 ) then + ! copy generated numbers + do i = 1, il + x( iv+i-1 ) = cmplx( u( 2*i-1 ), u( 2*i ),KIND=sp) + end do + else if( idist==2 ) then + ! convert generated numbers to uniform (-1,1) distribution + do i = 1, il + x( iv+i-1 ) = cmplx( two*u( 2*i-1 )-one,two*u( 2*i )-one,KIND=sp) + end do + else if( idist==3 ) then + ! convert generated numbers to normal (0,1) distribution + do i = 1, il + x( iv+i-1 ) = sqrt( -two*log( u( 2*i-1 ) ) )*exp( cmplx( zero, twopi*u( 2*i ),& + KIND=sp) ) + end do + else if( idist==4 ) then + ! convert generated numbers to complex numbers uniformly + ! distributed on the unit disk + do i = 1, il + x( iv+i-1 ) = sqrt( u( 2*i-1 ) )*exp( cmplx( zero, twopi*u( 2*i ),KIND=sp) ) + + end do + else if( idist==5 ) then + ! convert generated numbers to complex numbers uniformly + ! distributed on the unit circle + do i = 1, il + x( iv+i-1 ) = exp( cmplx( zero, twopi*u( 2*i ),KIND=sp) ) + end do + end if + 60 continue + return + end subroutine stdlib_clarnv + + !> ! + !> + !> CLARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -conjg(S) C ] [ G ] [ 0 ] + !> where C is real and C**2 + |S|**2 = 1. + !> The mathematical formulas used for C and S are + !> sgn(x) = { x / |x|, x != 0 + !> { 1, x = 0 + !> R = sgn(F) * sqrt(|F|**2 + |G|**2) + !> C = |F| / sqrt(|F|**2 + |G|**2) + !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) + !> When F and G are real, the formulas simplify to C = F/R and + !> S = G/R, and the returned values of C, S, and R should be + !> identical to those returned by CLARTG. + !> The algorithm used to compute these quantities incorporates scaling + !> to avoid overflow or underflow in computing the square root of the + !> sum of squares. + !> This is a faster version of the BLAS1 routine CROTG, except for + !> the following differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0, then C=0 and S is chosen so that R is real. + !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. + + pure subroutine stdlib_clartg( f, g, c, s, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! february 2021 + ! Scalar Arguments + real(sp), intent(out) :: c + complex(sp), intent(in) :: f, g + complex(sp), intent(out) :: r, s + ! Local Scalars + real(sp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + complex(sp) :: fs, gs, t + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(sp) :: abssq + ! Statement Function Definitions + abssq( t ) = real( t,KIND=sp)**2 + aimag( t )**2 + ! Executable Statements + if( g == czero ) then + c = one + s = czero + r = f + else if( f == czero ) then + c = zero + g1 = max( abs(real(g,KIND=sp)), abs(aimag(g)) ) + if( g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + g2 = abssq( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else + ! use scaled algorithm + u = min( safmax, max( safmin, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if + else + f1 = max( abs(real(f,KIND=sp)), abs(aimag(f)) ) + g1 = max( abs(real(g,KIND=sp)), abs(aimag(g)) ) + if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + f2 = abssq( f ) + g2 = abssq( g ) + h2 = f2 + g2 + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = f2*p + s = conjg( g )*( f*p ) + r = f*( h2*p ) + else + ! use scaled algorithm + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + if( f1*uu < rtmin ) then + ! f is not well-scaled when scaled by g1. + ! use a different scaling for f. + v = min( safmax, max( safmin, f1 ) ) + vv = one / v + w = v * uu + fs = f*vv + f2 = abssq( fs ) + h2 = f2*w**2 + g2 + else + ! otherwise use the same scaling for f and g. + w = one + fs = f*uu + f2 = abssq( fs ) + h2 = f2 + g2 + end if + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = ( f2*p )*w + s = conjg( gs )*( fs*p ) + r = ( fs*( h2*p ) )*u + end if + end if + return + end subroutine stdlib_clartg + + !> CLARTV: applies a vector of complex plane rotations with real cosines + !> to elements of the complex vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) + + pure subroutine stdlib_clartv( n, x, incx, y, incy, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(sp), intent(in) :: c(*) + complex(sp), intent(in) :: s(*) + complex(sp), intent(inout) :: x(*), y(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + complex(sp) :: xi, yi + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( iy ) + x( ix ) = c( ic )*xi + s( ic )*yi + y( iy ) = c( ic )*yi - conjg( s( ic ) )*xi + ix = ix + incx + iy = iy + incy + ic = ic + incc + end do + return + end subroutine stdlib_clartv + + !> CLARZ: applies a complex elementary reflector H to a complex + !> M-by-N matrix C, from either the left or the right. H is represented + !> in the form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead + !> tau. + !> H is a product of k elementary reflectors as returned by CTZRZF. + + pure subroutine stdlib_clarz( side, m, n, l, v, incv, tau, c, ldc, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, l, ldc, m, n + complex(sp), intent(in) :: tau + ! Array Arguments + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(in) :: v(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Executable Statements + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c + if( tau/=czero ) then + ! w( 1:n ) = conjg( c( 1, 1:n ) ) + call stdlib_ccopy( n, c, ldc, work, 1 ) + call stdlib_clacgv( n, work, 1 ) + ! w( 1:n ) = conjg( w( 1:n ) + c( m-l+1:m, 1:n )**h * v( 1:l ) ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', l, n, cone, c( m-l+1, 1 ),ldc, v, incv,& + cone, work, 1 ) + call stdlib_clacgv( n, work, 1 ) + ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n ) + call stdlib_caxpy( n, -tau, work, 1, c, ldc ) + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! tau * v( 1:l ) * w( 1:n )**h + call stdlib_cgeru( l, n, -tau, v, incv, work, 1, c( m-l+1, 1 ),ldc ) + end if + else + ! form c * h + if( tau/=czero ) then + ! w( 1:m ) = c( 1:m, 1 ) + call stdlib_ccopy( m, c, 1, work, 1 ) + ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l ) + call stdlib_cgemv( 'NO TRANSPOSE', m, l, cone, c( 1, n-l+1 ), ldc,v, incv, cone, & + work, 1 ) + ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m ) + call stdlib_caxpy( m, -tau, work, 1, c, 1 ) + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! tau * w( 1:m ) * v( 1:l )**h + call stdlib_cgerc( m, l, -tau, work, 1, v, incv, c( 1, n-l+1 ),ldc ) + end if + end if + return + end subroutine stdlib_clarz + + !> CLARZB: applies a complex block reflector H or its transpose H**H + !> to a complex distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_clarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: c(ldc,*), t(ldt,*), v(ldv,*) + complex(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, info, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -3 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CLARZB', -info ) + return + end if + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'C' + else + transt = 'N' + end if + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c + ! w( 1:n, 1:k ) = c( 1:k, 1:n )**h + do j = 1, k + call stdlib_ccopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ... + ! c( m-l+1:m, 1:n )**h * v( 1:k, 1:l )**t + if( l>0 )call stdlib_cgemm( 'TRANSPOSE', 'CONJUGATE TRANSPOSE', n, k, l,cone, c( m-& + l+1, 1 ), ldc, v, ldv, cone, work,ldwork ) + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t or w( 1:m, 1:k ) * t + call stdlib_ctrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k, cone, t,ldt, work, & + ldwork ) + ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**h + do j = 1, n + do i = 1, k + c( i, j ) = c( i, j ) - work( j, i ) + end do + end do + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! v( 1:k, 1:l )**h * w( 1:n, 1:k )**h + if( l>0 )call stdlib_cgemm( 'TRANSPOSE', 'TRANSPOSE', l, n, k, -cone, v, ldv,work, & + ldwork, cone, c( m-l+1, 1 ), ldc ) + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h + ! w( 1:m, 1:k ) = c( 1:m, 1:k ) + do j = 1, k + call stdlib_ccopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ... + ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**h + if( l>0 )call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, l, cone,c( 1, n-l+1 )& + , ldc, v, ldv, cone, work, ldwork ) + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * conjg( t ) or + ! w( 1:m, 1:k ) * t**h + do j = 1, k + call stdlib_clacgv( k-j+1, t( j, j ), 1 ) + end do + call stdlib_ctrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k, cone, t,ldt, work, & + ldwork ) + do j = 1, k + call stdlib_clacgv( k-j+1, t( j, j ), 1 ) + end do + ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k ) + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! w( 1:m, 1:k ) * conjg( v( 1:k, 1:l ) ) + do j = 1, l + call stdlib_clacgv( k, v( 1, j ), 1 ) + end do + if( l>0 )call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, l, k, -cone,work, & + ldwork, v, ldv, cone, c( 1, n-l+1 ), ldc ) + do j = 1, l + call stdlib_clacgv( k, v( 1, j ), 1 ) + end do + end if + return + end subroutine stdlib_clarzb + + !> CLARZT: forms the triangular factor T of a complex block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_clarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + complex(sp), intent(out) :: t(ldt,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + ! Executable Statements + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -1 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CLARZT', -info ) + return + end if + do i = k, 1, -1 + if( tau( i )==czero ) then + ! h(i) = i + do j = i, k + t( j, i ) = czero + end do + else + ! general case + if( i CLASCL: multiplies the M by N complex matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + + pure subroutine stdlib_clascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, lda, m, n + real(sp), intent(in) :: cfrom, cto + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: i, itype, j, k1, k2, k3, k4 + real(sp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( stdlib_lsame( type, 'G' ) ) then + itype = 0 + else if( stdlib_lsame( type, 'L' ) ) then + itype = 1 + else if( stdlib_lsame( type, 'U' ) ) then + itype = 2 + else if( stdlib_lsame( type, 'H' ) ) then + itype = 3 + else if( stdlib_lsame( type, 'B' ) ) then + itype = 4 + else if( stdlib_lsame( type, 'Q' ) ) then + itype = 5 + else if( stdlib_lsame( type, 'Z' ) ) then + itype = 6 + else + itype = -1 + end if + if( itype==-1 ) then + info = -1 + else if( cfrom==zero .or. stdlib_sisnan(cfrom) ) then + info = -4 + else if( stdlib_sisnan(cto) ) then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 .or. ( itype==4 .and. n/=m ) .or.( itype==5 .and. n/=m ) ) then + info = -7 + else if( itype<=3 .and. lda=4 ) then + if( kl<0 .or. kl>max( m-1, 0 ) ) then + info = -2 + else if( ku<0 .or. ku>max( n-1, 0 ) .or.( ( itype==4 .or. itype==5 ) .and. kl/=ku ) & + )then + info = -3 + else if( ( itype==4 .and. ldaabs( ctoc ) .and. ctoc/=zero ) then + mul = smlnum + done = .false. + cfromc = cfrom1 + else if( abs( cto1 )>abs( cfromc ) ) then + mul = bignum + done = .false. + ctoc = cto1 + else + mul = ctoc / cfromc + done = .true. + end if + end if + if( itype==0 ) then + ! full matrix + do j = 1, n + do i = 1, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==1 ) then + ! lower triangular matrix + do j = 1, n + do i = j, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==2 ) then + ! upper triangular matrix + do j = 1, n + do i = 1, min( j, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==3 ) then + ! upper hessenberg matrix + do j = 1, n + do i = 1, min( j+1, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==4 ) then + ! lower chalf of a symmetric band matrix + k3 = kl + 1 + k4 = n + 1 + do j = 1, n + do i = 1, min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==5 ) then + ! upper chalf of a symmetric band matrix + k1 = ku + 2 + k3 = ku + 1 + do j = 1, n + do i = max( k1-j, 1 ), k3 + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==6 ) then + ! band matrix + k1 = kl + ku + 2 + k2 = kl + 1 + k3 = 2*kl + ku + 1 + k4 = kl + ku + 1 + m + do j = 1, n + do i = max( k1-j, k2 ), min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + end if + if( .not.done )go to 10 + return + end subroutine stdlib_clascl + + !> CLASET: initializes a 2-D array A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + + pure subroutine stdlib_claset( uplo, m, n, alpha, beta, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, m, n + complex(sp), intent(in) :: alpha, beta + ! Array Arguments + complex(sp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + ! set the diagonal to beta and the strictly upper triangular + ! part of the array to alpha. + do j = 2, n + do i = 1, min( j-1, m ) + a( i, j ) = alpha + end do + end do + do i = 1, min( n, m ) + a( i, i ) = beta + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + ! set the diagonal to beta and the strictly lower triangular + ! part of the array to alpha. + do j = 1, min( m, n ) + do i = j + 1, m + a( i, j ) = alpha + end do + end do + do i = 1, min( n, m ) + a( i, i ) = beta + end do + else + ! set the array to beta on the diagonal and alpha on the + ! offdiagonal. + do j = 1, n + do i = 1, m + a( i, j ) = alpha + end do + end do + do i = 1, min( m, n ) + a( i, i ) = beta + end do + end if + return + end subroutine stdlib_claset + + !> CLASR: applies a sequence of real plane rotations to a complex matrix + !> A, from either the left or the right. + !> When SIDE = 'L', the transformation takes the form + !> A := P*A + !> and when SIDE = 'R', the transformation takes the form + !> A := A*P**T + !> where P is an orthogonal matrix consisting of a sequence of z plane + !> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + !> and P**T is the transpose of P. + !> When DIRECT = 'F' (Forward sequence), then + !> P = P(z-1) * ... * P(2) * P(1) + !> and when DIRECT = 'B' (Backward sequence), then + !> P = P(1) * P(2) * ... * P(z-1) + !> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + !> R(k) = ( c(k) s(k) ) + !> = ( -s(k) c(k) ). + !> When PIVOT = 'V' (Variable pivot), the rotation is performed + !> for the plane (k,k+1), i.e., P(k) has the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears as a rank-2 modification to the identity matrix in + !> rows and columns k and k+1. + !> When PIVOT = 'T' (Top pivot), the rotation is performed for the + !> plane (1,k+1), so P(k) has the form + !> P(k) = ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears in rows and columns 1 and k+1. + !> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + !> performed for the plane (k,z), giving P(k) the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> where R(k) appears in rows and columns k and z. The rotations are + !> performed without ever forming P(k) explicitly. + + pure subroutine stdlib_clasr( side, pivot, direct, m, n, c, s, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, pivot, side + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(in) :: c(*), s(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + real(sp) :: ctemp, stemp + complex(sp) :: temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.( stdlib_lsame( side, 'L' ) .or. stdlib_lsame( side, 'R' ) ) ) then + info = 1 + else if( .not.( stdlib_lsame( pivot, 'V' ) .or. stdlib_lsame( pivot,'T' ) .or. & + stdlib_lsame( pivot, 'B' ) ) ) then + info = 2 + else if( .not.( stdlib_lsame( direct, 'F' ) .or. stdlib_lsame( direct, 'B' ) ) )& + then + info = 3 + else if( m<0 ) then + info = 4 + else if( n<0 ) then + info = 5 + else if( lda ! + !> + !> CLASSQ: returns the values scl and smsq such that + !> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + !> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + !> assumed to be non-negative. + !> scale and sumsq must be supplied in SCALE and SUMSQ and + !> scl and smsq are overwritten on SCALE and SUMSQ respectively. + !> If scale * sqrt( sumsq ) > tbig then + !> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, + !> and if 0 < scale * sqrt( sumsq ) < tsml then + !> we require: scale <= sqrt( HUGE ) / ssml on entry, + !> where + !> tbig -- upper threshold for values whose square is representable; + !> sbig -- scaling constant for big numbers; \see la_constants.f90 + !> tsml -- lower threshold for values whose square is representable; + !> ssml -- scaling constant for small numbers; \see la_constants.f90 + !> and + !> TINY*EPS -- tiniest representable number; + !> HUGE -- biggest representable number. + + pure subroutine stdlib_classq( n, x, incx, scl, sumsq ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(sp), intent(inout) :: scl, sumsq + ! Array Arguments + complex(sp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(sp) :: abig, amed, asml, ax, ymax, ymin + ! quick return if possible + if( ieee_is_nan(scl) .or. ieee_is_nan(sumsq) ) return + if( sumsq == zero ) scl = one + if( scl == zero ) then + scl = one + sumsq = zero + end if + if (n <= 0) then + return + end if + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(real(x(ix),KIND=sp)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ax = abs(aimag(x(ix))) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! put the existing sum of squares into one of the accumulators + if( sumsq > zero ) then + ax = scl*sqrt( sumsq ) + if (ax > tbig) then + ! we assume scl >= sqrt( tiny*eps ) / sbig + abig = abig + (scl*sbig)**2 * sumsq + else if (ax < tsml) then + ! we assume scl <= sqrt( huge ) / ssml + if (notbig) asml = asml + (scl*ssml)**2 * sumsq + else + amed = amed + scl**2 * sumsq + end if + end if + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range or zero + scl = one + sumsq = amed + end if + return + end subroutine stdlib_classq + + !> CLASWP: performs a series of row interchanges on the matrix A. + !> One row interchange is initiated for each of rows K1 through K2 of A. + + pure subroutine stdlib_claswp( n, a, lda, k1, k2, ipiv, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k1, k2, lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32 + complex(sp) :: temp + ! Executable Statements + ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows + ! k1 through k2. + if( incx>0 ) then + ix0 = k1 + i1 = k1 + i2 = k2 + inc = 1 + else if( incx<0 ) then + ix0 = k1 + ( k1-k2 )*incx + i1 = k2 + i2 = k1 + inc = -1 + else + return + end if + n32 = ( n / 32 )*32 + if( n32/=0 ) then + do j = 1, n32, 32 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = j, j + 31 + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end do + end if + if( n32/=n ) then + n32 = n32 + 1 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = n32, n + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end if + return + end subroutine stdlib_claswp + + !> CLASYF: computes a partial factorization of a complex symmetric matrix + !> A using the Bunch-Kaufman diagonal pivoting method. The partial + !> factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**T denotes the transpose of U. + !> CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_clasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(sp) :: absakk, alpha, colmax, rowmax + complex(sp) :: d11, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_icamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column kw-1 of w and update it + call stdlib_ccopy( imax, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + call stdlib_ccopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + if( k1 ) then + jmax = stdlib_icamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( w( imax, kw-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + if( kp>1 )call stdlib_ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k2 ) then + ! compose the columns of the inverse of 2-by-2 pivot + ! block d in the following way to reduce the number + ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by + ! this inverse + ! d**(-1) = ( d11 d21 )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = + ! ( (-d21 ) ( d11 ) ) + ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * + ! * ( ( d22/d21 ) ( -1 ) ) = + ! ( ( -1 ) ( d11/d21 ) ) + ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) ( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = 1/d21 * t * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + ! = d21 * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / d21 + d22 = w( k-1, kw-1 ) / d21 + t = cone / ( d11*d22-cone ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + d21 = t / d21 + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = d21*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_cgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_cswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + ! copy column k of a to column k of w and update it + call stdlib_ccopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ), lda,w( k, 1 ), ldw,& + cone, w( k, k ), 1 ) + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column k+1 of w and update it + call stdlib_ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_ccopy( n-imax+1, a( imax, imax ), 1, w( imax, k+1 ),1 ) + call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( imax, & + 1 ), ldw, cone, w( k, k+1 ),1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_icamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( w( imax, k+1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + if( kp1 )call stdlib_cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + call stdlib_ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_cswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_clasyf + + !> CLASYF_RK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> CLASYF_RK is an auxiliary routine called by CSYTRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_clasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*), w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii + real(sp) :: absakk, alpha, colmax, rowmax, sfmin, stemp + complex(sp) :: d11, d12, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_slamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = czero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_icamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_icamax( imax-1, w( 1, kw-1 ), 1 ) + stemp = cabs1( w( itemp, kw-1 ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(cabs1( w( imax, kw-1 ) )1 ) then + if( cabs1( a( k, k ) )>=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_cscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = cone / ( d11*d22-cone ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = czero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_cgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -cone, a( & + 1, k+1 ), lda, w( j, kw+1 ),ldw, cone, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_ccopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, & + 1 ), ldw, cone, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), & + lda, w( imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_icamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( cabs1( w( imax, k+1 ) )=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_cscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k CLASYF_ROOK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> CLASYF_ROOK is an auxiliary routine called by CSYTRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_clasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, & + ii + real(sp) :: absakk, alpha, colmax, rowmax, stemp, sfmin + complex(sp) :: d11, d12, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_slamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_icamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_icamax( imax-1, w( 1, kw-1 ), 1 ) + stemp = cabs1( w( itemp, kw-1 ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(cabs1( w( imax, kw-1 ) )1 ) then + if( cabs1( a( k, k ) )>=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_cscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = cone / ( d11*d22-cone ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_cgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n + j = k + 1 + 60 continue + kstep = 1 + jp1 = 1 + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_cswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = j - 1 + if( jp1/=jj .and. kstep==2 )call stdlib_cswap( n-j+1, a( jp1, j ), lda, a( jj, j & + ), lda ) + if( j<=n )go to 60 + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_ccopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, & + 1 ), ldw, cone, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_cgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), & + lda, w( imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_icamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( cabs1( w( imax, k+1 ) )=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_cscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k=1 )call stdlib_cswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = j + 1 + if( jp1/=jj .and. kstep==2 )call stdlib_cswap( j, a( jp1, 1 ), lda, a( jj, 1 ), & + lda ) + if( j>=1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_clasyf_rook + + !> CLATBS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular band matrix. Here A**T denotes the transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine CTBSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_clatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(inout) :: cnorm(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind + real(sp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(sp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,min,real + ! Statement Functions + real(sp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=sp) / 2. ) +abs( aimag( zdum ) / 2. ) + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( kd<0 ) then + info = -6 + else if( ldab0 ) then + cnorm( j ) = stdlib_scasum( jlen, ab( 2, j ), 1 ) + else + cnorm( j ) = zero + end if + end do + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum/2. + imax = stdlib_isamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum*half ) then + tscal = one + else + tscal = half / ( smlnum*tmax ) + call stdlib_sscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_ctbsv can be used. + xmax = zero + do j = 1, n + xmax = max( xmax, cabs2( x( j ) ) ) + end do + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + maind = kd + 1 + else + jfirst = 1 + jlast = n + jinc = 1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 60 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + tjjs = ab( maind, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + maind = kd + 1 + else + jfirst = n + jlast = 1 + jinc = -1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = ab( maind, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_ctbsv( uplo, trans, diag, n, kd, ab, ldab, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_csscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + loop_110: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 105 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 105 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_csscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - + ! x(j)* a(max(1,j-kd):j-1,j) + jlen = min( kd, j-1 ) + call stdlib_caxpy( jlen, -x( j )*tscal,ab( kd+1-jlen, j ), 1, x( j-jlen & + ), 1 ) + i = stdlib_icamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + else if( j0 )call stdlib_caxpy( jlen, -x( j )*tscal, ab( 2, j ), 1,x( j+1 ),& + 1 ) + i = j + stdlib_icamax( n-j, x( j+1 ), 1 ) + xmax = cabs1( x( i ) ) + end if + end do loop_110 + else if( stdlib_lsame( trans, 'T' ) ) then + ! solve a**t * x = b + loop_150: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_cladiv( uscal, tjjs ) + end if + if( rec1 )csumj = stdlib_cdotu( jlen, ab( 2, j ), 1, x( j+1 ),1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + csumj = csumj + ( ab( kd+i-jlen, j )*uscal )*x( j-jlen-1+i ) + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + csumj = csumj + ( ab( i+1, j )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==cmplx( tscal,KIND=sp) ) then + ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - csumj + xj = cabs1( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 145 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 145 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_cladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_150 + else + ! solve a**h * x = b + loop_190: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( ab( maind, j ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_cladiv( uscal, tjjs ) + end if + if( rec1 )csumj = stdlib_cdotc( jlen, ab( 2, j ), 1, x( j+1 ),1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + csumj = csumj + ( conjg( ab( kd+i-jlen, j ) )*uscal )*x( j-jlen-1+i ) + + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + csumj = csumj + ( conjg( ab( i+1, j ) )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==cmplx( tscal,KIND=sp) ) then + ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - csumj + xj = cabs1( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = conjg( ab( maind, j ) )*tscal + else + tjjs = tscal + if( tscal==one )go to 185 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 185 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_cladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_190 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_sscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_clatbs + + !> CLATPS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular matrix stored in packed form. Here A**T denotes the + !> transpose of A, A**H denotes the conjugate transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine CTPSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_clatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(inout) :: cnorm(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen + real(sp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(sp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,min,real + ! Statement Functions + real(sp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=sp) / 2. ) +abs( aimag( zdum ) / 2. ) + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CLATPS', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine machine dependent parameters to control overflow. + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + smlnum = smlnum / stdlib_slamch( 'PRECISION' ) + bignum = one / smlnum + scale = one + if( stdlib_lsame( normin, 'N' ) ) then + ! compute the 1-norm of each column, not including the diagonal. + if( upper ) then + ! a is upper triangular. + ip = 1 + do j = 1, n + cnorm( j ) = stdlib_scasum( j-1, ap( ip ), 1 ) + ip = ip + j + end do + else + ! a is lower triangular. + ip = 1 + do j = 1, n - 1 + cnorm( j ) = stdlib_scasum( n-j, ap( ip+1 ), 1 ) + ip = ip + n - j + 1 + end do + cnorm( n ) = zero + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum/2. + imax = stdlib_isamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum*half ) then + tscal = one + else + tscal = half / ( smlnum*tmax ) + call stdlib_sscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_ctpsv can be used. + xmax = zero + do j = 1, n + xmax = max( xmax, cabs2( x( j ) ) ) + end do + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + else + jfirst = 1 + jlast = n + jinc = 1 + end if + if( tscal/=one ) then + grow = zero + go to 60 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = n + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + tjjs = ap( ip ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + ip = ip + jinc*jlen + jlen = jlen - 1 + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = ap( ip ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + jlen = jlen + 1 + ip = ip + jinc*jlen + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_ctpsv( uplo, trans, diag, n, ap, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_csscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + ip = jfirst*( jfirst+1 ) / 2 + loop_110: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + if( tscal==one )go to 105 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 105 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_csscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_caxpy( j-1, -x( j )*tscal, ap( ip-j+1 ), 1, x,1 ) + i = stdlib_icamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + ip = ip - j + else + if( jj + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_cladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 145 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_cladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_150 + else + ! solve a**h * x = b + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + loop_190: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( ap( ip ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_cladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 185 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_cladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_190 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_sscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_clatps + + !> CLATRD: reduces NB rows and columns of a complex Hermitian matrix A to + !> Hermitian tridiagonal form by a unitary similarity + !> transformation Q**H * A * Q, and returns the matrices V and W which are + !> needed to apply the transformation to the unreduced part of A. + !> If UPLO = 'U', CLATRD reduces the last NB rows and columns of a + !> matrix, of which the upper triangle is supplied; + !> if UPLO = 'L', CLATRD reduces the first NB rows and columns of a + !> matrix, of which the lower triangle is supplied. + !> This is an auxiliary routine called by CHETRD. + + pure subroutine stdlib_clatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + real(sp), intent(out) :: e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), w(ldw,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iw + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: min,real + ! Executable Statements + ! quick return if possible + if( n<=0 )return + if( stdlib_lsame( uplo, 'U' ) ) then + ! reduce last nb columns of upper triangle + loop_10: do i = n, n - nb + 1, -1 + iw = i - n + nb + if( i1 ) then + ! generate elementary reflector h(i) to annihilate + ! a(1:i-2,i) + alpha = a( i-1, i ) + call stdlib_clarfg( i-1, alpha, a( 1, i ), 1, tau( i-1 ) ) + e( i-1 ) = real( alpha,KIND=sp) + a( i-1, i ) = cone + ! compute w(1:i-1,i) + call stdlib_chemv( 'UPPER', i-1, cone, a, lda, a( 1, i ), 1,czero, w( 1, iw ),& + 1 ) + if( i CLATRS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow. Here A is an upper or lower + !> triangular matrix, A**T denotes the transpose of A, A**H denotes the + !> conjugate transpose of A, x and b are n-element vectors, and s is a + !> scaling factor, usually less than or equal to 1, chosen so that the + !> components of x will be less than the overflow threshold. If the + !> unscaled problem will not cause overflow, the Level 2 BLAS routine + !> CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + !> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_clatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(inout) :: cnorm(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast + real(sp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(sp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,min,real + ! Statement Functions + real(sp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=sp) / 2. ) +abs( aimag( zdum ) / 2. ) + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = a( j, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_ctrsv( uplo, trans, diag, n, a, lda, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_csscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + loop_110: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 105 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 105 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_csscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_caxpy( j-1, -x( j )*tscal, a( 1, j ), 1, x,1 ) + i = stdlib_icamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + else + if( jj + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_cladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 145 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_cladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_150 + else + ! solve a**h * x = b + loop_190: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( a( j, j ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_cladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_csscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_cladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 185 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_cladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_190 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_sscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_clatrs + + !> CLATRZ: factors the M-by-(M+L) complex upper trapezoidal matrix + !> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means + !> of unitary transformations, where Z is an (M+L)-by-(M+L) unitary + !> matrix and, R and A1 are M-by-M upper triangular matrices. + + pure subroutine stdlib_clatrz( m, n, l, a, lda, tau, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: l, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m==0 ) then + return + else if( m==n ) then + do i = 1, n + tau( i ) = czero + end do + return + end if + do i = m, 1, -1 + ! generate elementary reflector h(i) to annihilate + ! [ a(i,i) a(i,n-l+1:n) ] + call stdlib_clacgv( l, a( i, n-l+1 ), lda ) + alpha = conjg( a( i, i ) ) + call stdlib_clarfg( l+1, alpha, a( i, n-l+1 ), lda, tau( i ) ) + tau( i ) = conjg( tau( i ) ) + ! apply h(i) to a(1:i-1,i:n) from the right + call stdlib_clarz( 'RIGHT', i-1, n-i+1, l, a( i, n-l+1 ), lda,conjg( tau( i ) ), a( & + 1, i ), lda, work ) + a( i, i ) = conjg( alpha ) + end do + return + end subroutine stdlib_clatrz + + !> CLAUNHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> CLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine CLAUNHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, CLAUNHR_COL_GETRFNP2 + !> is self-sufficient and can be used without CLAUNHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + + pure recursive subroutine stdlib_claunhr_col_getrfnp2( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: d(*) + ! ===================================================================== + + + ! Local Scalars + real(sp) :: sfmin + integer(ilp) :: i, iinfo, n1, n2 + complex(sp) :: z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_cscal( m-1, cone / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 2, m + a( i, 1 ) = a( i, 1 ) / a( 1, 1 ) + end do + end if + else + ! divide the matrix b into four submatrices + n1 = min( m, n ) / 2 + n2 = n-n1 + ! factor b11, recursive call + call stdlib_claunhr_col_getrfnp2( n1, n1, a, lda, d, iinfo ) + ! solve for b21 + call stdlib_ctrsm( 'R', 'U', 'N', 'N', m-n1, n1, cone, a, lda,a( n1+1, 1 ), lda ) + + ! solve for b12 + call stdlib_ctrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1, n1+1 ), lda ) + + ! update b22, i.e. compute the schur complement + ! b22 := b22 - b21*b12 + call stdlib_cgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, cone, a( n1+1, n1+1 ), lda ) + ! factor b22, recursive call + call stdlib_claunhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo ) + + end if + return + end subroutine stdlib_claunhr_col_getrfnp2 + + !> CLAUU2: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the unblocked form of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_clauu2( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: cmplx,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CLAUUM: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the blocked form of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_clauum( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ib, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_clauu2( uplo, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute the product u * u**h. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', i-1, & + ib, cone, a( i, i ), lda,a( 1, i ), lda ) + call stdlib_clauu2( 'UPPER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',i-1, ib, n-i-ib+1,& + cone, a( 1, i+ib ),lda, a( i, i+ib ), lda, cone, a( 1, i ),lda ) + call stdlib_cherk( 'UPPER', 'NO TRANSPOSE', ib, n-i-ib+1,one, a( i, i+ib ),& + lda, one, a( i, i ),lda ) + end if + end do + else + ! compute the product l**h * l. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_ctrmm( 'LEFT', 'LOWER', 'CONJUGATE TRANSPOSE','NON-UNIT', ib, i-1,& + cone, a( i, i ), lda,a( i, 1 ), lda ) + call stdlib_clauu2( 'LOWER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_cgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', ib,i-1, n-i-ib+1,& + cone, a( i+ib, i ), lda,a( i+ib, 1 ), lda, cone, a( i, 1 ), lda ) + call stdlib_cherk( 'LOWER', 'CONJUGATE TRANSPOSE', ib,n-i-ib+1, one, a( i+& + ib, i ), lda, one,a( i, i ), lda ) + end if + end do + end if + end if + return + end subroutine stdlib_clauum + + !> CPBEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite band matrix A and reduce its condition + !> number (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_cpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(out) :: s(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j + real(sp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab CPBSTF: computes a split Cholesky factorization of a complex + !> Hermitian positive definite band matrix A. + !> This routine is designed to be used in conjunction with CHBGST. + !> The factorization has the form A = S**H*S where S is a band matrix + !> of the same bandwidth as A and the following structure: + !> S = ( U ) + !> ( M L ) + !> where U is upper triangular of order m = (n+kd)/2, and L is lower + !> triangular of order n-m. + + pure subroutine stdlib_cpbstf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, km, m + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_csscal( km, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_clacgv( km, ab( kd, j+1 ), kld ) + call stdlib_cher( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + call stdlib_clacgv( km, ab( kd, j+1 ), kld ) + end if + end do + else + ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m). + do j = n, m + 1, -1 + ! compute s(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=sp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 50 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( j-1, kd ) + ! compute elements j-km:j-1 of the j-th row and update the + ! trailing submatrix within the band. + call stdlib_csscal( km, one / ajj, ab( km+1, j-km ), kld ) + call stdlib_clacgv( km, ab( km+1, j-km ), kld ) + call stdlib_cher( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1, j-km ), kld ) + + call stdlib_clacgv( km, ab( km+1, j-km ), kld ) + end do + ! factorize the updated submatrix a(1:m,1:m) as u**h*u. + do j = 1, m + ! compute s(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=sp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 50 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( kd, m-j ) + ! compute elements j+1:j+km of the j-th column and update the + ! trailing submatrix within the band. + if( km>0 ) then + call stdlib_csscal( km, one / ajj, ab( 2, j ), 1 ) + call stdlib_cher( 'LOWER', km, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 50 continue + info = j + return + end subroutine stdlib_cpbstf + + !> CPBTF2: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U , if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix, U**H is the conjugate transpose + !> of U, and L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_cpbtf2( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, kn + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_csscal( kn, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_clacgv( kn, ab( kd, j+1 ), kld ) + call stdlib_cher( 'UPPER', kn, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + call stdlib_clacgv( kn, ab( kd, j+1 ), kld ) + end if + end do + else + ! compute the cholesky factorization a = l*l**h. + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=sp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + ! compute elements j+1:j+kn of column j and update the + ! trailing submatrix within the band. + kn = min( kd, n-j ) + if( kn>0 ) then + call stdlib_csscal( kn, one / ajj, ab( 2, j ), 1 ) + call stdlib_cher( 'LOWER', kn, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 30 continue + info = j + return + end subroutine stdlib_cpbtf2 + + !> CPBTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite band matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by CPBTRF. + + pure subroutine stdlib_cpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab CPOEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_cpoequ( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(out) :: s(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( lda CPOEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + !> This routine differs from CPOEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled diagonal entries are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_cpoequb( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: smin, base, tmp + ! Intrinsic Functions + intrinsic :: max,min,sqrt,log,int + ! Executable Statements + ! test the input parameters. + ! positive definite only performs 1 pass of equilibration. + info = 0 + if( n<0 ) then + info = -1 + else if( lda CPOTF2: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U , if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_cpotf2( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CPOTRF2: computes the Cholesky factorization of a Hermitian + !> positive definite matrix A using the recursive algorithm. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = n/2 + !> [ A21 | A22 ] n2 = n-n1 + !> The subroutine calls itself to factor A11. Update and scale A21 + !> or A12, update A22 then calls itself to factor A22. + + pure recursive subroutine stdlib_cpotrf2( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: n1, n2, iinfo + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max,real,sqrt + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CPOTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by CPOTRF. + + pure subroutine stdlib_cpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CPPEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A in packed storage and reduce + !> its condition number (with respect to the two-norm). S contains the + !> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix + !> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. + !> This choice of S puts the condition number of B within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_cppequ( uplo, n, ap, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(out) :: s(*) + complex(sp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, jj + real(sp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CPPEQU', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + scond = one + amax = zero + return + end if + ! initialize smin and amax. + s( 1 ) = real( ap( 1 ),KIND=sp) + smin = s( 1 ) + amax = s( 1 ) + if( upper ) then + ! uplo = 'u': upper triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + i + s( i ) = real( ap( jj ),KIND=sp) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + else + ! uplo = 'l': lower triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + n - i + 2 + s( i ) = real( ap( jj ),KIND=sp) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + end if + if( smin<=zero ) then + ! find the first non-positive diagonal element and return. + do i = 1, n + if( s( i )<=zero ) then + info = i + return + end if + end do + else + ! set the scale factors to the reciprocals + ! of the diagonal elements. + do i = 1, n + s( i ) = one / sqrt( s( i ) ) + end do + ! compute scond = min(s(i)) / max(s(i)) + scond = sqrt( smin ) / sqrt( amax ) + end if + return + end subroutine stdlib_cppequ + + !> CPPTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A stored in packed format. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_cpptrf( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CPPTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( upper ) then + ! compute the cholesky factorization a = u**h * u. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + ! compute elements 1:j-1 of column j. + if( j>1 )call stdlib_ctpsv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',j-1, ap, & + ap( jc ), 1 ) + ! compute u(j,j) and test for non-positive-definiteness. + ajj = real( real( ap( jj ),KIND=sp) - stdlib_cdotc( j-1,ap( jc ), 1, ap( jc ), 1 & + ),KIND=sp) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ap( jj ) = sqrt( ajj ) + end do + else + ! compute the cholesky factorization a = l * l**h. + jj = 1 + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = real( ap( jj ),KIND=sp) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ap( jj ) = ajj + ! compute elements j+1:n of column j and update the trailing + ! submatrix. + if( j CPPTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A in packed storage using the Cholesky + !> factorization A = U**H*U or A = L*L**H computed by CPPTRF. + + pure subroutine stdlib_cpptrs( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb CPSTF2: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 2 BLAS. + + pure subroutine stdlib_cpstf2( uplo, n, a, lda, piv, rank, tol, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: tol + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(2*n) + integer(ilp), intent(out) :: piv(n) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: ctemp + real(sp) :: ajj, sstop, stemp + integer(ilp) :: i, itemp, j, pvt + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: conjg,max,real,sqrt + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + work( i ) = work( i ) +real( conjg( a( j-1, i ) )*a( j-1, i ),KIND=sp) + + end if + work( n+i ) = real( a( i, i ),KIND=sp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=sstop.or.stdlib_sisnan( ajj ) ) then + a( j, j ) = ajj + go to 190 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_cswap( j-1, a( 1, j ), 1, a( 1, pvt ), 1 ) + if( pvt1 ) then + work( i ) = work( i ) +real( conjg( a( i, j-1 ) )*a( i, j-1 ),KIND=sp) + + end if + work( n+i ) = real( a( i, i ),KIND=sp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=sstop.or.stdlib_sisnan( ajj ) ) then + a( j, j ) = ajj + go to 190 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_cswap( j-1, a( j, 1 ), lda, a( pvt, 1 ), lda ) + if( pvt CPSTRF: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 3 BLAS. + + pure subroutine stdlib_cpstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: tol + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(2*n) + integer(ilp), intent(out) :: piv(n) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: ctemp + real(sp) :: ajj, sstop, stemp + integer(ilp) :: i, itemp, j, jb, k, nb, pvt + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: conjg,max,min,real,sqrt,maxloc + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_cpstf2( uplo, n, a( 1, 1 ), lda, piv, rank, tol, work,info ) + go to 230 + else + ! initialize piv + do i = 1, n + piv( i ) = i + end do + ! compute stopping value + do i = 1, n + work( i ) = real( a( i, i ),KIND=sp) + end do + pvt = maxloc( work( 1:n ), 1 ) + ajj = real( a( pvt, pvt ),KIND=sp) + if( ajj<=zero.or.stdlib_sisnan( ajj ) ) then + rank = 0 + info = 1 + go to 230 + end if + ! compute stopping value if not supplied + if( tolk ) then + work( i ) = work( i ) +real( conjg( a( j-1, i ) )*a( j-1, i ),& + KIND=sp) + end if + work( n+i ) = real( a( i, i ),KIND=sp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=sstop.or.stdlib_sisnan( ajj ) ) then + a( j, j ) = ajj + go to 220 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_cswap( j-1, a( 1, j ), 1, a( 1, pvt ), 1 ) + if( pvtk ) then + work( i ) = work( i ) +real( conjg( a( i, j-1 ) )*a( i, j-1 ),& + KIND=sp) + end if + work( n+i ) = real( a( i, i ),KIND=sp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=sstop.or.stdlib_sisnan( ajj ) ) then + a( j, j ) = ajj + go to 220 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_cswap( j-1, a( j, 1 ), lda, a( pvt, 1 ), lda ) + if( pvt CPTCON: computes the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite tridiagonal matrix + !> using the factorization A = L*D*L**H or A = U**H*D*U computed by + !> CPTTRF. + !> Norm(inv(A)) is computed by a direct method, and the reciprocal of + !> the condition number is computed as + !> RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_cptcon( n, d, e, anorm, rcond, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(in) :: d(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ix + real(sp) :: ainvnm + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input arguments. + info = 0 + if( n<0 ) then + info = -1 + else if( anorm CPTTRF: computes the L*D*L**H factorization of a complex Hermitian + !> positive definite tridiagonal matrix A. The factorization may also + !> be regarded as having the form A = U**H *D*U. + + pure subroutine stdlib_cpttrf( n, d, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: d(*) + complex(sp), intent(inout) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i4 + real(sp) :: eii, eir, f, g + ! Intrinsic Functions + intrinsic :: aimag,cmplx,mod,real + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'CPTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! compute the l*d*l**h (or u**h *d*u) factorization of a. + i4 = mod( n-1, 4 ) + do i = 1, i4 + if( d( i )<=zero ) then + info = i + go to 20 + end if + eir = real( e( i ),KIND=sp) + eii = aimag( e( i ) ) + f = eir / d( i ) + g = eii / d( i ) + e( i ) = cmplx( f, g,KIND=sp) + d( i+1 ) = d( i+1 ) - f*eir - g*eii + end do + loop_110: do i = i4+1, n - 4, 4 + ! drop out of the loop if d(i) <= 0: the matrix is not positive + ! definite. + if( d( i )<=zero ) then + info = i + go to 20 + end if + ! solve for e(i) and d(i+1). + eir = real( e( i ),KIND=sp) + eii = aimag( e( i ) ) + f = eir / d( i ) + g = eii / d( i ) + e( i ) = cmplx( f, g,KIND=sp) + d( i+1 ) = d( i+1 ) - f*eir - g*eii + if( d( i+1 )<=zero ) then + info = i+1 + go to 20 + end if + ! solve for e(i+1) and d(i+2). + eir = real( e( i+1 ),KIND=sp) + eii = aimag( e( i+1 ) ) + f = eir / d( i+1 ) + g = eii / d( i+1 ) + e( i+1 ) = cmplx( f, g,KIND=sp) + d( i+2 ) = d( i+2 ) - f*eir - g*eii + if( d( i+2 )<=zero ) then + info = i+2 + go to 20 + end if + ! solve for e(i+2) and d(i+3). + eir = real( e( i+2 ),KIND=sp) + eii = aimag( e( i+2 ) ) + f = eir / d( i+2 ) + g = eii / d( i+2 ) + e( i+2 ) = cmplx( f, g,KIND=sp) + d( i+3 ) = d( i+3 ) - f*eir - g*eii + if( d( i+3 )<=zero ) then + info = i+3 + go to 20 + end if + ! solve for e(i+3) and d(i+4). + eir = real( e( i+3 ),KIND=sp) + eii = aimag( e( i+3 ) ) + f = eir / d( i+3 ) + g = eii / d( i+3 ) + e( i+3 ) = cmplx( f, g,KIND=sp) + d( i+4 ) = d( i+4 ) - f*eir - g*eii + end do loop_110 + ! check d(n) for positive definiteness. + if( d( n )<=zero )info = n + 20 continue + return + end subroutine stdlib_cpttrf + + !> CPTTS2: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H*D*U or A = L*D*L**H computed by CPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + + pure subroutine stdlib_cptts2( iuplo, n, nrhs, d, e, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: iuplo, ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: d(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: e(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + if( n==1 )call stdlib_csscal( nrhs, 1. / d( 1 ), b, ldb ) + return + end if + if( iuplo==1 ) then + ! solve a * x = b using the factorization a = u**h *d*u, + ! overwriting each right hand side vector with its solution. + if( nrhs<=2 ) then + j = 1 + 5 continue + ! solve u**h * x = b. + do i = 2, n + b( i, j ) = b( i, j ) - b( i-1, j )*conjg( e( i-1 ) ) + end do + ! solve d * u * x = b. + do i = 1, n + b( i, j ) = b( i, j ) / d( i ) + end do + do i = n - 1, 1, -1 + b( i, j ) = b( i, j ) - b( i+1, j )*e( i ) + end do + if( j CROT: applies a plane rotation, where the cos (C) is real and the + !> sin (S) is complex, and the vectors CX and CY are complex. + + pure subroutine stdlib_crot( n, cx, incx, cy, incy, c, s ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(sp), intent(in) :: c + complex(sp), intent(in) :: s + ! Array Arguments + complex(sp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(sp) :: stemp + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 )go to 20 + ! code for unequal increments or equal increments not equal to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + stemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - conjg( s )*cx( ix ) + cx( ix ) = stemp + ix = ix + incx + iy = iy + incy + end do + return + ! code for both increments equal to 1 + 20 continue + do i = 1, n + stemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - conjg( s )*cx( i ) + cx( i ) = stemp + end do + return + end subroutine stdlib_crot + + !> CSPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_cspmv( uplo, n, alpha, ap, x, incx, beta, y, incy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, incy, n + complex(sp), intent(in) :: alpha, beta + ! Array Arguments + complex(sp), intent(in) :: ap(*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + complex(sp) :: temp1, temp2 + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 6 + else if( incy==0 ) then + info = 9 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CSPMV ', info ) + return + end if + ! quick return if possible. + if( ( n==0 ) .or. ( ( alpha==czero ) .and. ( beta==cone ) ) )return + ! set up the start points in x and y. + if( incx>0 ) then + kx = 1 + else + kx = 1 - ( n-1 )*incx + end if + if( incy>0 ) then + ky = 1 + else + ky = 1 - ( n-1 )*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + ! first form y := beta*y. + if( beta/=cone ) then + if( incy==1 ) then + if( beta==czero ) then + do i = 1, n + y( i ) = czero + end do + else + do i = 1, n + y( i ) = beta*y( i ) + end do + end if + else + iy = ky + if( beta==czero ) then + do i = 1, n + y( iy ) = czero + iy = iy + incy + end do + else + do i = 1, n + y( iy ) = beta*y( iy ) + iy = iy + incy + end do + end if + end if + end if + if( alpha==czero )return + kk = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! form y when ap contains the upper triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + k = kk + do i = 1, j - 1 + y( i ) = y( i ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( i ) + k = k + 1 + end do + y( j ) = y( j ) + temp1*ap( kk+j-1 ) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + ix = kx + iy = ky + do k = kk, kk + j - 2 + y( iy ) = y( iy ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( ix ) + ix = ix + incx + iy = iy + incy + end do + y( jy ) = y( jy ) + temp1*ap( kk+j-1 ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + y( j ) = y( j ) + temp1*ap( kk ) + k = kk + 1 + do i = j + 1, n + y( i ) = y( i ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( i ) + k = k + 1 + end do + y( j ) = y( j ) + alpha*temp2 + kk = kk + ( n-j+1 ) + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + y( jy ) = y( jy ) + temp1*ap( kk ) + ix = jx + iy = jy + do k = kk + 1, kk + n - j + ix = ix + incx + iy = iy + incy + y( iy ) = y( iy ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( ix ) + end do + y( jy ) = y( jy ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + ( n-j+1 ) + end do + end if + end if + return + end subroutine stdlib_cspmv + + !> CSPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_cspr( uplo, n, alpha, x, incx, ap ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, n + complex(sp), intent(in) :: alpha + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + complex(sp) :: temp + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CSPR ', info ) + return + end if + ! quick return if possible. + if( ( n==0 ) .or. ( alpha==czero ) )return + ! set the start point in x if the increment is not unity. + if( incx<=0 ) then + kx = 1 - ( n-1 )*incx + else if( incx/=1 ) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! form a when upper triangle is stored in ap. + if( incx==1 ) then + do j = 1, n + if( x( j )/=czero ) then + temp = alpha*x( j ) + k = kk + do i = 1, j - 1 + ap( k ) = ap( k ) + x( i )*temp + k = k + 1 + end do + ap( kk+j-1 ) = ap( kk+j-1 ) + x( j )*temp + else + ap( kk+j-1 ) = ap( kk+j-1 ) + end if + kk = kk + j + end do + else + jx = kx + do j = 1, n + if( x( jx )/=czero ) then + temp = alpha*x( jx ) + ix = kx + do k = kk, kk + j - 2 + ap( k ) = ap( k ) + x( ix )*temp + ix = ix + incx + end do + ap( kk+j-1 ) = ap( kk+j-1 ) + x( jx )*temp + else + ap( kk+j-1 ) = ap( kk+j-1 ) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if( incx==1 ) then + do j = 1, n + if( x( j )/=czero ) then + temp = alpha*x( j ) + ap( kk ) = ap( kk ) + temp*x( j ) + k = kk + 1 + do i = j + 1, n + ap( k ) = ap( k ) + x( i )*temp + k = k + 1 + end do + else + ap( kk ) = ap( kk ) + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1, n + if( x( jx )/=czero ) then + temp = alpha*x( jx ) + ap( kk ) = ap( kk ) + temp*x( jx ) + ix = jx + do k = kk + 1, kk + n - j + ix = ix + incx + ap( k ) = ap( k ) + x( ix )*temp + end do + else + ap( kk ) = ap( kk ) + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_cspr + + !> CSPTRF: computes the factorization of a complex symmetric matrix A + !> stored in packed format using the Bunch-Kaufman diagonal pivoting + !> method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_csptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(sp) :: absakk, alpha, colmax, rowmax + complex(sp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CSPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**t using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( ap( kc+k-1 ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_icamax( k-1, ap( kc ), 1 ) + colmax = cabs1( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_icamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, cabs1( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( ap( kpc+imax-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_cswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = ap( knc+j-1 ) + ap( knc+j-1 ) = ap( kx ) + ap( kx ) = t + end do + t = ap( knc+kk-1 ) + ap( knc+kk-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = t + if( kstep==2 ) then + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = cone / ap( kc+k-1 ) + call stdlib_cspr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_cscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = ap( k-1+( k-1 )*k / 2 ) + d22 = ap( k-1+( k-2 )*( k-1 ) / 2 ) / d12 + d11 = ap( k+( k-1 )*k / 2 ) / d12 + t = cone / ( d11*d22-cone ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-ap( j+( k-1 )*k / 2 ) ) + + wk = d12*( d22*ap( j+( k-1 )*k / 2 )-ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *wk -ap( i+( k-2 )*( k-1 ) / 2 )*wkm1 + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( ap( kc ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( ap( kpc ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp CSPTRI: computes the inverse of a complex symmetric indefinite matrix + !> A in packed storage using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSPTRF. + + pure subroutine stdlib_csptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + complex(sp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CSPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = cone / ap( kc+k-1 ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_cspmv( uplo, k-1, -cone, ap, work, 1, czero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_cdotu( k-1, work, 1, ap( kc ), 1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = ap( kcnext+k-1 ) + ak = ap( kc+k-1 ) / t + akp1 = ap( kcnext+k ) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-cone ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_cspmv( uplo, k-1, -cone, ap, work, 1, czero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_cdotu( k-1, work, 1, ap( kc ), 1 ) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_cdotu( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_ccopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_cspmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -stdlib_cdotu( k-1, work, 1, ap( kcnext ), 1 ) + + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_cswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = ap( kc+j-1 ) + ap( kc+j-1 ) = ap( kx ) + ap( kx ) = temp + end do + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = cone / ap( kc ) + ! compute column k of the inverse. + if( k CSPTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A stored in packed format using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by CSPTRF. + + pure subroutine stdlib_csptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgeru( k-1, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_cscal( nrhs, cone / ap( kc+k-1 ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_cgeru( k-2, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_cgeru( k-2, nrhs, -cone, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, & + 1 ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, ap( kc ),1, cone, b( k,& + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, ap( kc ),1, cone, b( k,& + 1 ), ldb ) + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb,ap( kc+k ), 1, cone, b( & + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k CSRSCL: multiplies an n-element complex vector x by the real scalar + !> 1/a. This is done without overflow or underflow as long as + !> the final result x/a does not overflow or underflow. + + pure subroutine stdlib_csrscl( n, sa, sx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(sp), intent(in) :: sa + ! Array Arguments + complex(sp), intent(inout) :: sx(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + real(sp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 )return + ! get machine parameters + smlnum = stdlib_slamch( 'S' ) + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! initialize the denominator to sa and the numerator to 1. + cden = sa + cnum = one + 10 continue + cden1 = cden*smlnum + cnum1 = cnum / bignum + if( abs( cden1 )>abs( cnum ) .and. cnum/=zero ) then + ! pre-multiply x by smlnum if cden is large compared to cnum. + mul = smlnum + done = .false. + cden = cden1 + else if( abs( cnum1 )>abs( cden ) ) then + ! pre-multiply x by bignum if cden is small compared to cnum. + mul = bignum + done = .false. + cnum = cnum1 + else + ! multiply x by cnum / cden and return. + mul = cnum / cden + done = .true. + end if + ! scale the vector x by mul + call stdlib_csscal( n, mul, sx, incx ) + if( .not.done )go to 10 + return + end subroutine stdlib_csrscl + + !> CSTEIN: computes the eigenvectors of a real symmetric tridiagonal + !> matrix T corresponding to specified eigenvalues, using inverse + !> iteration. + !> The maximum number of iterations allowed for each eigenvector is + !> specified by an internal parameter MAXITS (currently set to 5). + !> Although the eigenvectors are real, they are stored in a complex + !> array, which may be passed to CUNMTR or CUPMTR for back + !> transformation to the eigenvectors of a complex Hermitian matrix + !> which was reduced to tridiagonal form. + + pure subroutine stdlib_cstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, m, n + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), isplit(*) + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(in) :: d(*), e(*), w(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: odm3 = 1.0e-3_sp + real(sp), parameter :: odm1 = 1.0e-1_sp + integer(ilp), parameter :: maxits = 5 + integer(ilp), parameter :: extra = 2 + + + + ! Local Scalars + integer(ilp) :: b1, blksiz, bn, gpind, i, iinfo, indrv1, indrv2, indrv3, indrv4, & + indrv5, its, j, j1, jblk, jmax, jr, nblk, nrmchk + real(sp) :: ctr, eps, eps1, nrm, onenrm, ortol, pertol, scl, sep, stpcrt, tol, xj, & + xjm + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,cmplx,max,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + do i = 1, m + ifail( i ) = 0 + end do + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -4 + else if( ldz1 ) then + eps1 = abs( eps*xj ) + pertol = ten*eps1 + sep = xj - xjm + if( sepmaxits )go to 120 + ! normalize and scale the righthand side vector pb. + jmax = stdlib_isamax( blksiz, work( indrv1+1 ), 1 ) + scl = blksiz*onenrm*max( eps,abs( work( indrv4+blksiz ) ) ) /abs( work( indrv1+& + jmax ) ) + call stdlib_sscal( blksiz, scl, work( indrv1+1 ), 1 ) + ! solve the system lu = pb. + call stdlib_slagts( -1, blksiz, work( indrv4+1 ), work( indrv2+2 ),work( indrv3+& + 1 ), work( indrv5+1 ), iwork,work( indrv1+1 ), tol, iinfo ) + ! reorthogonalize by modified gram-schmidt if eigenvalues are + ! close enough. + if( jblk==1 )go to 110 + if( abs( xj-xjm )>ortol )gpind = j + if( gpind/=j ) then + do i = gpind, j - 1 + ctr = zero + do jr = 1, blksiz + ctr = ctr + work( indrv1+jr )*real( z( b1-1+jr, i ),KIND=sp) + end do + do jr = 1, blksiz + work( indrv1+jr ) = work( indrv1+jr ) -ctr*real( z( b1-1+jr, i ),& + KIND=sp) + end do + end do + end if + ! check the infinity norm of the iterate. + 110 continue + jmax = stdlib_isamax( blksiz, work( indrv1+1 ), 1 ) + nrm = abs( work( indrv1+jmax ) ) + ! continue for additional iterations after norm reaches + ! stopping criterion. + if( nrm CSTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the implicit QL or QR method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + !> matrix to tridiagonal form. + + pure subroutine stdlib_csteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + + ! Local Scalars + integer(ilp) :: i, icompz, ii, iscale, j, jtot, k, l, l1, lend, lendm1, lendp1, lendsv,& + lm1, lsv, m, mm, mm1, nm1, nmaxit + real(sp) :: anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, s, safmax, safmin, ssfmax, & + ssfmin, tst + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldzn )go to 160 + if( l1>1 )e( l1-1 ) = zero + if( l1<=nm1 ) then + do m = l1, nm1 + tst = abs( e( m ) ) + if( tst==zero )go to 30 + if( tst<=( sqrt( abs( d( m ) ) )*sqrt( abs( d( m+1 ) ) ) )*eps ) then + e( m ) = zero + go to 30 + end if + end do + end if + m = n + 30 continue + l = l1 + lsv = l + lend = m + lendsv = lend + l1 = m + 1 + if( lend==l )go to 10 + ! scale submatrix in rows and columns l to lend + anorm = stdlib_slanst( 'I', lend-l+1, d( l ), e( l ) ) + iscale = 0 + if( anorm==zero )go to 10 + if( anorm>ssfmax ) then + iscale = 1 + call stdlib_slascl( 'G', 0, 0, anorm, ssfmax, lend-l+1, 1, d( l ), n,info ) + call stdlib_slascl( 'G', 0, 0, anorm, ssfmax, lend-l, 1, e( l ), n,info ) + else if( anorml ) then + ! ql iteration + ! look for small subdiagonal element. + 40 continue + if( l/=lend ) then + lendm1 = lend - 1 + do m = l, lendm1 + tst = abs( e( m ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m+1 ) )+safmin )go to 60 + end do + end if + m = lend + 60 continue + if( m0 ) then + call stdlib_slaev2( d( l ), e( l ), d( l+1 ), rt1, rt2, c, s ) + work( l ) = c + work( n-1+l ) = s + call stdlib_clasr( 'R', 'V', 'B', n, 2, work( l ),work( n-1+l ), z( 1, l ), & + ldz ) + else + call stdlib_slae2( d( l ), e( l ), d( l+1 ), rt1, rt2 ) + end if + d( l ) = rt1 + d( l+1 ) = rt2 + e( l ) = zero + l = l + 2 + if( l<=lend )go to 40 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l+1 )-p ) / ( two*e( l ) ) + r = stdlib_slapy2( g, one ) + g = d( m ) - p + ( e( l ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + mm1 = m - 1 + do i = mm1, l, -1 + f = s*e( i ) + b = c*e( i ) + call stdlib_slartg( g, f, c, s, r ) + if( i/=m-1 )e( i+1 ) = r + g = d( i+1 ) - p + r = ( d( i )-g )*s + two*c*b + p = s*r + d( i+1 ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = -s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = m - l + 1 + call stdlib_clasr( 'R', 'V', 'B', n, mm, work( l ), work( n-1+l ),z( 1, l ), ldz & + ) + end if + d( l ) = d( l ) - p + e( l ) = g + go to 40 + ! eigenvalue found. + 80 continue + d( l ) = p + l = l + 1 + if( l<=lend )go to 40 + go to 140 + else + ! qr iteration + ! look for small superdiagonal element. + 90 continue + if( l/=lend ) then + lendp1 = lend + 1 + do m = l, lendp1, -1 + tst = abs( e( m-1 ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m-1 ) )+safmin )go to 110 + end do + end if + m = lend + 110 continue + if( m>lend )e( m-1 ) = zero + p = d( l ) + if( m==l )go to 130 + ! if remaining matrix is 2-by-2, use stdlib_slae2 or stdlib_slaev2 + ! to compute its eigensystem. + if( m==l-1 ) then + if( icompz>0 ) then + call stdlib_slaev2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2, c, s ) + work( m ) = c + work( n-1+m ) = s + call stdlib_clasr( 'R', 'V', 'F', n, 2, work( m ),work( n-1+m ), z( 1, l-1 ), & + ldz ) + else + call stdlib_slae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 ) + end if + d( l-1 ) = rt1 + d( l ) = rt2 + e( l-1 ) = zero + l = l - 2 + if( l>=lend )go to 90 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l-1 )-p ) / ( two*e( l-1 ) ) + r = stdlib_slapy2( g, one ) + g = d( m ) - p + ( e( l-1 ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + lm1 = l - 1 + do i = m, lm1 + f = s*e( i ) + b = c*e( i ) + call stdlib_slartg( g, f, c, s, r ) + if( i/=m )e( i-1 ) = r + g = d( i ) - p + r = ( d( i+1 )-g )*s + two*c*b + p = s*r + d( i ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = l - m + 1 + call stdlib_clasr( 'R', 'V', 'F', n, mm, work( m ), work( n-1+m ),z( 1, m ), ldz & + ) + end if + d( l ) = d( l ) - p + e( lm1 ) = g + go to 90 + ! eigenvalue found. + 130 continue + d( l ) = p + l = l - 1 + if( l>=lend )go to 90 + go to 140 + end if + ! undo scaling if necessary + 140 continue + if( iscale==1 ) then + call stdlib_slascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_slascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + else if( iscale==2 ) then + call stdlib_slascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_slascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + end if + ! check for no convergence to an eigenvalue after a total + ! of n*maxit iterations. + if( jtot==nmaxit ) then + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + return + end if + go to 10 + ! order eigenvalues and eigenvectors. + 160 continue + if( icompz==0 ) then + ! use quick sort + call stdlib_slasrt( 'I', n, d, info ) + else + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

CSYCONV: convert A given by TRF into L and D and vice-versa. + !> Get Non-diag elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + + pure subroutine stdlib_csyconv( uplo, way, n, a, lda, ipiv, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, j + complex(sp) :: temp + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda 1 ) + if( ipiv(i) < 0 ) then + e(i)=a(i-1,i) + e(i-1)=czero + a(i-1,i)=czero + i=i-1 + else + e(i)=czero + endif + i=i-1 + end do + ! convert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + i=i-1 + endif + i=i-1 + end do + else + ! revert a (a is upper) + ! revert permutations + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i+1 + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + endif + i=i+1 + end do + ! revert value + i=n + do while ( i > 1 ) + if( ipiv(i) < 0 ) then + a(i-1,i)=e(i) + i=i-1 + endif + i=i-1 + end do + end if + else + ! a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + i=1 + e(n)=czero + do while ( i <= n ) + if( i 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i+1,j) + a(i+1,j)=temp + end do + endif + i=i+1 + endif + i=i+1 + end do + else + ! revert a (a is lower) + ! revert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(i,j) + a(i,j)=a(ip,j) + a(ip,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i-1 + if (i > 1) then + do j= 1,i-1 + temp=a(i+1,j) + a(i+1,j)=a(ip,j) + a(ip,j)=temp + end do + endif + endif + i=i-1 + end do + ! revert value + i=1 + do while ( i <= n-1 ) + if( ipiv(i) < 0 ) then + a(i+1,i)=e(i) + i=i+1 + endif + i=i+1 + end do + end if + end if + return + end subroutine stdlib_csyconv + + !> If parameter WAY = 'C': + !> CSYCONVF: converts the factorization output format used in + !> CSYTRF provided on entry in parameter A into the factorization + !> output format used in CSYTRF_RK (or CSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in CSYTRF into + !> the format used in CSYTRF_RK (or CSYTRF_BK). + !> If parameter WAY = 'R': + !> CSYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in CSYTRF_RK + !> (or CSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in CSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in CSYTRF_RK + !> (or CSYTRF_BK) into the format used in CSYTRF. + !> CSYCONVF can also convert in Hermitian matrix case, i.e. between + !> formats used in CHETRF and CHETRF_RK (or CHETRF_BK). + + pure subroutine stdlib_csyconvf( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = czero + a( i-1, i ) = czero + i = i - 1 + else + e( i ) = czero + end if + i = i - 1 + end do + ! convert permutations and ipiv + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and czero out + ! corresponding entries in input storage a + i = 1 + e( n ) = czero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_cswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_cswap( i-1, a( i+1, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is no interchnge of rows i and and ipiv(i), + ! so this should be reflected in ipiv format for + ! *sytrf_rk ( or *sytrf_bk) + ipiv( i ) = i + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations and ipiv + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_cswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_cswap( i-1, a( ip, 1 ), lda,a( i+1, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is cone interchange of rows i+1 and ipiv(i+1), + ! so this should be recorded in consecutive entries + ! in ipiv format for *sytrf + ipiv( i ) = ipiv( i+1 ) + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_csyconvf + + !> If parameter WAY = 'C': + !> CSYCONVF_ROOK: converts the factorization output format used in + !> CSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in CSYTRF_RK (or CSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for CSYTRF_ROOK and + !> CSYTRF_RK (or CSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> CSYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in CSYTRF_RK + !> (or CSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in CSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for CSYTRF_ROOK and + !> CSYTRF_RK (or CSYTRF_BK) is the same and is not converted. + !> CSYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between + !> formats used in CHETRF_ROOK and CHETRF_RK (or CHETRF_BK). + + pure subroutine stdlib_csyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, ip2 + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = czero + a( i-1, i ) = czero + i = i - 1 + else + e( i ) = czero + end if + i = i - 1 + end do + ! convert permutations + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and czero out + ! corresponding entries in input storage a + i = 1 + e( n ) = czero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_cswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i and ipiv(i) and i+1 and ipiv(i+1) + ! in a(i:n,1:i-1) + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_cswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + if( ip2/=(i+1) ) then + call stdlib_cswap( i-1, a( i+1, 1 ), lda,a( ip2, 1 ), lda ) + end if + end if + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_cswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i+1) and i and ipiv(i) + ! in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip2/=(i+1) ) then + call stdlib_cswap( i-1, a( ip2, 1 ), lda,a( i+1, 1 ), lda ) + end if + if( ip/=i ) then + call stdlib_cswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_csyconvf_rook + + !> CSYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_csyequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(sp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,int,log,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'CSYEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_sp / s( j ) + end do + tol = one / sqrt( 2.0_sp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + real( s( i )*work( i ),KIND=sp) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_classq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = cabs1( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = real( n-2,KIND=sp) * ( real( work( i ),KIND=sp) - t*si ) + c0 = -(t*si)*si + 2 * real( work( i ),KIND=sp) * si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + real( work( i ),KIND=sp) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_slamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_slamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_csyequb + + !> CSYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + + pure subroutine stdlib_csymv( uplo, n, alpha, a, lda, x, incx, beta, y, incy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, incy, lda, n + complex(sp), intent(in) :: alpha, beta + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), x(*) + complex(sp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + complex(sp) :: temp1, temp2 + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( lda0 ) then + kx = 1 + else + kx = 1 - ( n-1 )*incx + end if + if( incy>0 ) then + ky = 1 + else + ky = 1 - ( n-1 )*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + ! first form y := beta*y. + if( beta/=cone ) then + if( incy==1 ) then + if( beta==czero ) then + do i = 1, n + y( i ) = czero + end do + else + do i = 1, n + y( i ) = beta*y( i ) + end do + end if + else + iy = ky + if( beta==czero ) then + do i = 1, n + y( iy ) = czero + iy = iy + incy + end do + else + do i = 1, n + y( iy ) = beta*y( iy ) + iy = iy + incy + end do + end if + end if + end if + if( alpha==czero )return + if( stdlib_lsame( uplo, 'U' ) ) then + ! form y when a is stored in upper triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + do i = 1, j - 1 + y( i ) = y( i ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( i ) + end do + y( j ) = y( j ) + temp1*a( j, j ) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + ix = kx + iy = ky + do i = 1, j - 1 + y( iy ) = y( iy ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( ix ) + ix = ix + incx + iy = iy + incy + end do + y( jy ) = y( jy ) + temp1*a( j, j ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + y( j ) = y( j ) + temp1*a( j, j ) + do i = j + 1, n + y( i ) = y( i ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( i ) + end do + y( j ) = y( j ) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + y( jy ) = y( jy ) + temp1*a( j, j ) + ix = jx + iy = jy + do i = j + 1, n + ix = ix + incx + iy = iy + incy + y( iy ) = y( iy ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( ix ) + end do + y( jy ) = y( jy ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_csymv + + !> CSYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + + pure subroutine stdlib_csyr( uplo, n, alpha, x, incx, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, lda, n + complex(sp), intent(in) :: alpha + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, ix, j, jx, kx + complex(sp) :: temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 5 + else if( lda CSYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + + pure subroutine stdlib_csyswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(sp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_cswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=a(i1+i,i2) + a(i1+i,i2)=tmp + end do + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from i1 to i1-1 + call stdlib_cswap ( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=a(i2,i1+i) + a(i2,i1+i)=tmp + end do + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_csyswapr + + !> CSYTF2: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_csytf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(sp) :: absakk, alpha, colmax, rowmax + complex(sp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_icamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero .or. stdlib_sisnan(absakk) ) then + ! column k is zero or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = imax + stdlib_icamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_icamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, cabs1( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_cswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + call stdlib_cswap( kk-kp-1, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = cone / a( k, k ) + call stdlib_csyr( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_cscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*a( j, k-1 )-a( j, k ) ) + wk = d12*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*wk -a( i, k-1 )*wkm1 + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_icamax( imax-k, a( imax, k ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp CSYTF2_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_csytf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(sp) :: absakk, alpha, colmax, rowmax, stemp, sfmin + complex(sp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,max,sqrt,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_icamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_icamax( imax-1, a( 1, imax ), 1 ) + stemp = cabs1( a( itemp, imax ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1( a( imax, imax ) )1 )call stdlib_cswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_cswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_cswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_cswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( cabs1( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = cone / a( k, k ) + call stdlib_csyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_cscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_csyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = czero + a( k-1, k ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**t using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1( a( imax, imax ) )(k+1) )call stdlib_cswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_cswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = cone / a( k, k ) + call stdlib_csyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_cscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_csyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k CSYTF2_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_csytf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(sp) :: absakk, alpha, colmax, rowmax, stemp, sfmin + complex(sp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,max,sqrt,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_icamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_icamax( imax-1, a( 1, imax ), 1 ) + stemp = cabs1( a( itemp, imax ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! cabs1( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1(a( imax, imax ))1 )call stdlib_cswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_cswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + if( kp>1 )call stdlib_cswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_cswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( cabs1( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = cone / a( k, k ) + call stdlib_csyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_cscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_csyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! cabs1( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1(a( imax, imax ))(k+1) )call stdlib_cswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_cswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = cone / a( k, k ) + call stdlib_csyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_cscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_csyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k CSYTRF: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_csytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_clasyf( uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_csytf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_clasyf; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_clasyf( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, n, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_csytf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_csytrf + + !> CSYTRF_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_csytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_clasyf_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_csytf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_clasyf_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_csytf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_cswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_csytrf_rk + + !> CSYTRF_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_csytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_clasyf_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_csytf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_clasyf_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_clasyf_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_csytf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_csytrf_rook + + !> CSYTRI: computes the inverse of a complex symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> CSYTRF. + + pure subroutine stdlib_csytri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + complex(sp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_csymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_cdotu( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = a( k, k+1 ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-cone ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_csymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_cdotu( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_cdotu( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_ccopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_csymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_cdotu( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_cswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k CSYTRI_ROOK: computes the inverse of a complex symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by CSYTRF_ROOK. + + pure subroutine stdlib_csytri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + complex(sp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_csymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_cdotu( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = a( k, k+1 ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-cone ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_ccopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_csymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_cdotu( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_cdotu( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_ccopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_csymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_cdotu( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k+1,1:k+1) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_cswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_cswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_cswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_cswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k CSYTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSYTRF. + + pure subroutine stdlib_csytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + call stdlib_cscal( nrhs, cone / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_cgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_cgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, a( 1, k ),1, cone, b( & + k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, a( 1, k ),1, cone, b( & + k, 1 ), ldb ) + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb,a( 1, k+1 ), 1, cone, b(& + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k CSYTRS2: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSYTRF and converted by CSYCONV. + + pure subroutine stdlib_csytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_ctrsm('L','U','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + call stdlib_cscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_ctrsm('L','U','T','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_cswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ctrsm('L','L','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + call stdlib_cscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / akm1k + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i, j ) / akm1k + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**t \ b) -> b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_ctrsm('L','L','T','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_csyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_csytrs2 + + !> CSYTRS_3: solves a system of linear equations A * X = B with a complex + !> symmetric matrix A using the factorization computed + !> by CSYTRF_RK or CSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_csytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*), e(*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_ctrsm( 'L', 'U', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + call stdlib_cscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_ctrsm( 'L', 'U', 'T', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ctrsm( 'L', 'L', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + call stdlib_cscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else if( i b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_ctrsm('L', 'L', 'T', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_csytrs_3 + + !> CSYTRS_AA: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by CSYTRF_AA. + + pure subroutine stdlib_csytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**t \ b -> b [ (u**t \p**t * b) ] + call stdlib_ctrsm( 'L', 'U', 'T', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**t \p**t * b) ] + call stdlib_clacpy( 'F', 1, n, a( 1, 1 ), lda+1, work( n ), 1) + if( n>1 ) then + call stdlib_clacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_clacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_cgtsv( n, nrhs, work( 1 ), work( n ), work( 2*n ), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**t \p**t * b) ) ] + call stdlib_ctrsm( 'L', 'U', 'N', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b -> b [ p * (u**t \ (t \ (u \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**t. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_ctrsm( 'L', 'L', 'N', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_clacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_clacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1 ) + call stdlib_clacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_cgtsv( n, nrhs, work( 1 ), work(n), work( 2*n ), b, ldb,info) + ! 3) backward substitution with l**t + if( n>1 ) then + ! compute (l**t \ b) -> b [ l**t \ (t \ (l \p**t * b) ) ] + call stdlib_ctrsm( 'L', 'L', 'T', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b -> b [ p * (l**t \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_csytrs_aa + + !> CSYTRS_ROOK: solves a system of linear equations A*X = B with + !> a complex symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by CSYTRF_ROOK. + + pure subroutine stdlib_csytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + call stdlib_cscal( nrhs, cone / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1 ) + if( kp/=k-1 )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + if( k>2 ) then + call stdlib_cgeru( k-2, nrhs,-cone, a( 1, k ), 1, b( k, 1 ),ldb, b( 1, 1 ), & + ldb ) + call stdlib_cgeru( k-2, nrhs,-cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 )& + , ldb ) + end if + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 )call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), 1, & + cone, b( k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), 1, cone, & + b( k, 1 ), ldb ) + call stdlib_cgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 ), 1, cone,& + b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1). + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_cswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k CTBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by CTBTRS or some other + !> means before entering this routine. CTBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_ctbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, ferr,& + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: ab(ldab,*), b(ldb,*), x(ldx,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_ctbsv( uplo, transt, diag, n, kd, ab, ldab, work,1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_ctbsv( uplo, transn, diag, n, kd, ab, ldab, work,1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_ctbrfs + + !> CTBTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by-NRHS matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_ctbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab Level 3 BLAS like routine for A in RFP Format. + !> CTFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**H. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_ctfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, diag, side, trans, uplo + integer(ilp), intent(in) :: ldb, m, n + complex(sp), intent(in) :: alpha + ! Array Arguments + complex(sp), intent(in) :: a(0:*) + complex(sp), intent(inout) :: b(0:ldb-1,0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans + integer(ilp) :: m1, m2, n1, n2, k, info, i, j + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lside = stdlib_lsame( side, 'L' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lside .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -3 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -4 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 ) then + info = -7 + else if( ldb CTFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + + pure subroutine stdlib_ctfttp( transr, uplo, n, arf, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(out) :: ap(0:*) + complex(sp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: conjg + ! Intrinsic Functions + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTFTTP', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + ap( 0 ) = arf( 0 ) + else + ap( 0 ) = conjg( arf( 0 ) ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_ctfttp + + !> CTFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + + pure subroutine stdlib_ctfttr( transr, uplo, n, arf, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(sp), intent(out) :: a(0:lda-1,0:*) + complex(sp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt, nx2, np1x2 + integer(ilp) :: i, j, l, ij + ! Intrinsic Functions + intrinsic :: conjg,max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda=n + ij = 0 + do j = 0, n2 + do i = n1, n2 + j + a( n2+j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = j, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0); lda=n + ij = nt - n + do j = n - 1, n1, -1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = j - n1, n1 - 1 + a( j-n1, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + ij = ij - nx2 + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ij = 0 + do j = 0, n2 - 1 + do i = 0, j + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = n1 + j, n - 1 + a( i, n1+j ) = arf( ij ) + ij = ij + 1 + end do + end do + do j = n2, n - 1 + do i = 0, n1 - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ij = 0 + do j = 0, n1 + do i = n1, n - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + do j = 0, n1 - 1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = n2 + j, n - 1 + a( n2+j, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1); lda=n+1 + ij = 0 + do j = 0, k - 1 + do i = k, k + j + a( k+j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = j, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0); lda=n+1 + ij = nt - n - 1 + do j = n - 1, k, -1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = j - k, k - 1 + a( j-k, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + ij = ij - np1x2 + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper, a=b) + ! t1 -> a(0,1) , t2 -> a(0,0) , s -> a(0,k+1) : + ! t1 -> a(0+k) , t2 -> a(0+0) , s -> a(0+k*(k+1)); lda=k + ij = 0 + j = k + do i = k, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do j = 0, k - 2 + do i = 0, j + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = k + 1 + j, n - 1 + a( i, k+1+j ) = arf( ij ) + ij = ij + 1 + end do + end do + do j = k - 1, n - 1 + do i = 0, k - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is even (see paper, a=b) + ! t1 -> a(0,k+1) , t2 -> a(0,k) , s -> a(0,0) + ! t1 -> a(0+k*(k+1)) , t2 -> a(0+k*k) , s -> a(0+0)); lda=k + ij = 0 + do j = 0, k + do i = k, n - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + do j = 0, k - 2 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = k + 1 + j, n - 1 + a( k+1+j, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + ! note that here j = k-1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end if + end if + end if + return + end subroutine stdlib_ctfttr + + !> CTGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of complex matrices (S,P), where S and P are upper triangular. + !> Matrix pairs of this type are produced by the generalized Schur + !> factorization of a complex matrix pair (A,B): + !> A = Q*S*Z**H, B = Q*P*Z**H + !> as computed by CGGHRD + CHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal elements of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the unitary factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + + pure subroutine stdlib_ctgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, & + mm, m, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldp, lds, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: p(ldp,*), s(lds,*) + complex(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: compl, compr, ilall, ilback, ilbbad, ilcomp, lsa, lsb + integer(ilp) :: i, ibeg, ieig, iend, ihwmny, im, iside, isrc, j, je, jr + real(sp) :: acoefa, acoeff, anorm, ascale, bcoefa, big, bignum, bnorm, bscale, dmin, & + safmin, sbeta, scale, small, temp, ulp, xmax + complex(sp) :: bcoeff, ca, cb, d, salpha, sum, suma, sumb, x + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,min,real + ! Statement Functions + real(sp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=sp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode and test the input parameters + if( stdlib_lsame( howmny, 'A' ) ) then + ihwmny = 1 + ilall = .true. + ilback = .false. + else if( stdlib_lsame( howmny, 'S' ) ) then + ihwmny = 2 + ilall = .false. + ilback = .false. + else if( stdlib_lsame( howmny, 'B' ) ) then + ihwmny = 3 + ilall = .true. + ilback = .true. + else + ihwmny = -1 + end if + if( stdlib_lsame( side, 'R' ) ) then + iside = 1 + compl = .false. + compr = .true. + else if( stdlib_lsame( side, 'L' ) ) then + iside = 2 + compl = .true. + compr = .false. + else if( stdlib_lsame( side, 'B' ) ) then + iside = 3 + compl = .true. + compr = .true. + else + iside = -1 + end if + info = 0 + if( iside<0 ) then + info = -1 + else if( ihwmny<0 ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lds=safmin .and. abs( acoeff )=safmin .and. abs1( bcoeff )bignum*temp ) then + do jr = je, j - 1 + work( jr ) = temp*work( jr ) + end do + xmax = one + end if + suma = czero + sumb = czero + do jr = je, j - 1 + suma = suma + conjg( s( jr, j ) )*work( jr ) + sumb = sumb + conjg( p( jr, j ) )*work( jr ) + end do + sum = acoeff*suma - conjg( bcoeff )*sumb + ! form x(j) = - sum / conjg( a*s(j,j) - b*p(j,j) ) + ! with scaling and perturbation of the denominator + d = conjg( acoeff*s( j, j )-bcoeff*p( j, j ) ) + if( abs1( d )<=dmin )d = cmplx( dmin,KIND=sp) + if( abs1( d )=bignum*abs1( d ) ) then + temp = one / abs1( sum ) + do jr = je, j - 1 + work( jr ) = temp*work( jr ) + end do + xmax = temp*xmax + sum = temp*sum + end if + end if + work( j ) = stdlib_cladiv( -sum, d ) + xmax = max( xmax, abs1( work( j ) ) ) + end do loop_100 + ! back transform eigenvector if howmny='b'. + if( ilback ) then + call stdlib_cgemv( 'N', n, n+1-je, cone, vl( 1, je ), ldvl,work( je ), 1, & + czero, work( n+1 ), 1 ) + isrc = 2 + ibeg = 1 + else + isrc = 1 + ibeg = je + end if + ! copy and scale eigenvector into column of vl + xmax = zero + do jr = ibeg, n + xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) ) + end do + if( xmax>safmin ) then + temp = one / xmax + do jr = ibeg, n + vl( jr, ieig ) = temp*work( ( isrc-1 )*n+jr ) + end do + else + ibeg = n + 1 + end if + do jr = 1, ibeg - 1 + vl( jr, ieig ) = czero + end do + end if + end do loop_140 + end if + ! right eigenvectors + if( compr ) then + ieig = im + 1 + ! main loop over eigenvalues + loop_250: do je = n, 1, -1 + if( ilall ) then + ilcomp = .true. + else + ilcomp = select( je ) + end if + if( ilcomp ) then + ieig = ieig - 1 + if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=sp) )& + <=safmin ) then + ! singular matrix pencil -- return unit eigenvector + do jr = 1, n + vr( jr, ieig ) = czero + end do + vr( ieig, ieig ) = cone + cycle loop_250 + end if + ! non-singular eigenvalue: + ! compute coefficients a and b in + ! ( a a - b b ) x = 0 + temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=sp) )& + *bscale, safmin ) + salpha = ( temp*s( je, je ) )*ascale + sbeta = ( temp*real( p( je, je ),KIND=sp) )*bscale + acoeff = sbeta*ascale + bcoeff = salpha*bscale + ! scale to avoid underflow + lsa = abs( sbeta )>=safmin .and. abs( acoeff )=safmin .and. abs1( bcoeff )=bignum*abs1( d ) ) then + temp = one / abs1( work( j ) ) + do jr = 1, je + work( jr ) = temp*work( jr ) + end do + end if + end if + work( j ) = stdlib_cladiv( -work( j ), d ) + if( j>1 ) then + ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling + if( abs1( work( j ) )>one ) then + temp = one / abs1( work( j ) ) + if( acoefa*rwork( j )+bcoefa*rwork( n+j )>=bignum*temp ) then + do jr = 1, je + work( jr ) = temp*work( jr ) + end do + end if + end if + ca = acoeff*work( j ) + cb = bcoeff*work( j ) + do jr = 1, j - 1 + work( jr ) = work( jr ) + ca*s( jr, j ) -cb*p( jr, j ) + end do + end if + end do loop_210 + ! back transform eigenvector if howmny='b'. + if( ilback ) then + call stdlib_cgemv( 'N', n, je, cone, vr, ldvr, work, 1,czero, work( n+1 ), & + 1 ) + isrc = 2 + iend = n + else + isrc = 1 + iend = je + end if + ! copy and scale eigenvector into column of vr + xmax = zero + do jr = 1, iend + xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) ) + end do + if( xmax>safmin ) then + temp = one / xmax + do jr = 1, iend + vr( jr, ieig ) = temp*work( ( isrc-1 )*n+jr ) + end do + else + iend = 0 + end if + do jr = iend + 1, n + vr( jr, ieig ) = czero + end do + end if + end do loop_250 + end if + return + end subroutine stdlib_ctgevc + + !> CTGEX2: swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) + !> in an upper triangular matrix pair (A, B) by an unitary equivalence + !> transformation. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + + pure subroutine stdlib_ctgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, lda, ldb, ldq, ldz, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: twenty = 2.0e+1_sp + integer(ilp), parameter :: ldst = 2 + logical(lk), parameter :: wands = .true. + + + + + ! Local Scalars + logical(lk) :: strong, weak + integer(ilp) :: i, m + real(sp) :: cq, cz, eps, sa, sb, scale, smlnum, sum, thresha, threshb + complex(sp) :: cdum, f, g, sq, sz + ! Local Arrays + complex(sp) :: s(ldst,ldst), t(ldst,ldst), work(8) + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=1 )return + m = ldst + weak = .false. + strong = .false. + ! make a local copy of selected block in (a, b) + call stdlib_clacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst ) + call stdlib_clacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst ) + ! compute the threshold for testing the acceptance of swapping. + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + scale = real( czero,KIND=sp) + sum = real( cone,KIND=sp) + call stdlib_clacpy( 'FULL', m, m, s, ldst, work, m ) + call stdlib_clacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m ) + call stdlib_classq( m*m, work, 1, scale, sum ) + sa = scale*sqrt( sum ) + scale = real( czero,KIND=sp) + sum = real( cone,KIND=sp) + call stdlib_classq( m*m, work(m*m+1), 1, scale, sum ) + sb = scale*sqrt( sum ) + ! thres has been changed from + ! thresh = max( ten*eps*sa, smlnum ) + ! to + ! thresh = max( twenty*eps*sa, smlnum ) + ! on 04/01/10. + ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by + ! jim demmel and guillaume revy. see forum post 1783. + thresha = max( twenty*eps*sa, smlnum ) + threshb = max( twenty*eps*sb, smlnum ) + ! compute unitary ql and rq that swap 1-by-1 and 1-by-1 blocks + ! using givens rotations and perform the swap tentatively. + f = s( 2, 2 )*t( 1, 1 ) - t( 2, 2 )*s( 1, 1 ) + g = s( 2, 2 )*t( 1, 2 ) - t( 2, 2 )*s( 1, 2 ) + sa = abs( s( 2, 2 ) ) * abs( t( 1, 1 ) ) + sb = abs( s( 1, 1 ) ) * abs( t( 2, 2 ) ) + call stdlib_clartg( g, f, cz, sz, cdum ) + sz = -sz + call stdlib_crot( 2, s( 1, 1 ), 1, s( 1, 2 ), 1, cz, conjg( sz ) ) + call stdlib_crot( 2, t( 1, 1 ), 1, t( 1, 2 ), 1, cz, conjg( sz ) ) + if( sa>=sb ) then + call stdlib_clartg( s( 1, 1 ), s( 2, 1 ), cq, sq, cdum ) + else + call stdlib_clartg( t( 1, 1 ), t( 2, 1 ), cq, sq, cdum ) + end if + call stdlib_crot( 2, s( 1, 1 ), ldst, s( 2, 1 ), ldst, cq, sq ) + call stdlib_crot( 2, t( 1, 1 ), ldst, t( 2, 1 ), ldst, cq, sq ) + ! weak stability test: |s21| <= o(eps f-norm((a))) + ! and |t21| <= o(eps f-norm((b))) + weak = abs( s( 2, 1 ) )<=thresha .and.abs( t( 2, 1 ) )<=threshb + if( .not.weak )go to 20 + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr, b-ql**h*t*qr)) <= o(eps*f-norm((a, b))) + call stdlib_clacpy( 'FULL', m, m, s, ldst, work, m ) + call stdlib_clacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m ) + call stdlib_crot( 2, work, 1, work( 3 ), 1, cz, -conjg( sz ) ) + call stdlib_crot( 2, work( 5 ), 1, work( 7 ), 1, cz, -conjg( sz ) ) + call stdlib_crot( 2, work, 2, work( 2 ), 2, cq, -sq ) + call stdlib_crot( 2, work( 5 ), 2, work( 6 ), 2, cq, -sq ) + do i = 1, 2 + work( i ) = work( i ) - a( j1+i-1, j1 ) + work( i+2 ) = work( i+2 ) - a( j1+i-1, j1+1 ) + work( i+4 ) = work( i+4 ) - b( j1+i-1, j1 ) + work( i+6 ) = work( i+6 ) - b( j1+i-1, j1+1 ) + end do + scale = real( czero,KIND=sp) + sum = real( cone,KIND=sp) + call stdlib_classq( m*m, work, 1, scale, sum ) + sa = scale*sqrt( sum ) + scale = real( czero,KIND=sp) + sum = real( cone,KIND=sp) + call stdlib_classq( m*m, work(m*m+1), 1, scale, sum ) + sb = scale*sqrt( sum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 20 + end if + ! if the swap is accepted ("weakly" and "strongly"), apply the + ! equivalence transformations to the original matrix pair (a,b) + call stdlib_crot( j1+1, a( 1, j1 ), 1, a( 1, j1+1 ), 1, cz, conjg( sz ) ) + call stdlib_crot( j1+1, b( 1, j1 ), 1, b( 1, j1+1 ), 1, cz, conjg( sz ) ) + call stdlib_crot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda, cq, sq ) + call stdlib_crot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb, cq, sq ) + ! set n1 by n2 (2,1) blocks to 0 + a( j1+1, j1 ) = czero + b( j1+1, j1 ) = czero + ! accumulate transformations into q and z if requested. + if( wantz )call stdlib_crot( n, z( 1, j1 ), 1, z( 1, j1+1 ), 1, cz, conjg( sz ) ) + + if( wantq )call stdlib_crot( n, q( 1, j1 ), 1, q( 1, j1+1 ), 1, cq, conjg( sq ) ) + + ! exit with info = 0 if swap was successfully performed. + return + ! exit with info = 1 if swap was rejected. + 20 continue + info = 1 + return + end subroutine stdlib_ctgex2 + + !> CTGEXC: reorders the generalized Schur decomposition of a complex + !> matrix pair (A,B), using an unitary equivalence transformation + !> (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with + !> row index IFST is moved to row ILST. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + + pure subroutine stdlib_ctgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ifst, lda, ldb, ldq, ldz, n + integer(ilp), intent(inout) :: ilst + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: here + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input arguments. + info = 0 + if( n<0 ) then + info = -3 + else if( ldan ) then + info = -12 + else if( ilst<1 .or. ilst>n ) then + info = -13 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTGEXC', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + if( ifst==ilst )return + if( ifst=ilst )go to 20 + here = here + 1 + end if + ilst = here + return + end subroutine stdlib_ctgexc + + !> CTPLQT2: computes a LQ a factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_ctplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda CTPQRT2: computes a QR factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_ctpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda CTPRFB: applies a complex "triangular-pentagonal" block reflector H or its + !> conjugate transpose H**H to a complex matrix C, which is composed of two + !> blocks A and B, either from the left or right. + + pure subroutine stdlib_ctprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(in) :: t(ldt,*), v(ldv,*) + complex(sp), intent(out) :: work(ldwork,*) + ! ========================================================================== + + ! Local Scalars + integer(ilp) :: i, j, mp, np, kp + logical(lk) :: left, forward, column, right, backward, row + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return + if( stdlib_lsame( storev, 'C' ) ) then + column = .true. + row = .false. + else if ( stdlib_lsame( storev, 'R' ) ) then + column = .false. + row = .true. + else + column = .false. + row = .false. + end if + if( stdlib_lsame( side, 'L' ) ) then + left = .true. + right = .false. + else if( stdlib_lsame( side, 'R' ) ) then + left = .false. + right = .true. + else + left = .false. + right = .false. + end if + if( stdlib_lsame( direct, 'F' ) ) then + forward = .true. + backward = .false. + else if( stdlib_lsame( direct, 'B' ) ) then + forward = .false. + backward = .true. + else + forward = .false. + backward = .false. + end if + ! --------------------------------------------------------------------------- + if( column .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (m-by-k) + ! form h c or h**h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - t (a + v**h b) or a = a - t**h (a + v**h b) + ! b = b - v t (a + v**h b) or b = b - v t**h (a + v**h b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'U', 'C', 'N', l, n, cone, v( mp, 1 ), ldv,work, ldwork ) + + call stdlib_cgemm( 'C', 'N', l, n, m-l, cone, v, ldv, b, ldb,cone, work, ldwork ) + + call stdlib_cgemm( 'C', 'N', k-l, n, m, cone, v( 1, kp ), ldv,b, ldb, czero, work( & + kp, 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'N', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb ) + + call stdlib_cgemm( 'N', 'N', l, n, k-l, -cone, v( mp, kp ), ldv,work( kp, 1 ), & + ldwork, cone, b( mp, 1 ), ldb ) + call stdlib_ctrmm( 'L', 'U', 'N', 'N', l, n, cone, v( mp, 1 ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (n-by-k) + ! form c h or c h**h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - (a + b v) t or a = a - (a + b v) t**h + ! b = b - (a + b v) t v**h or b = b - (a + b v) t**h v**h + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_ctrmm( 'R', 'U', 'N', 'N', m, l, cone, v( np, 1 ), ldv,work, ldwork ) + + call stdlib_cgemm( 'N', 'N', m, l, n-l, cone, b, ldb,v, ldv, cone, work, ldwork ) + + call stdlib_cgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v( 1, kp ), ldv, czero, work( & + 1, kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_cgemm( 'N', 'C', m, l, k-l, -cone, work( 1, kp ), ldwork,v( np, kp ), & + ldv, cone, b( 1, np ), ldb ) + call stdlib_ctrmm( 'R', 'U', 'C', 'N', m, l, cone, v( np, 1 ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (m-by-k) + ! [ i ] (k-by-k) + ! form h c or h**h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - t (a + v**h b) or a = a - t**h (a + v**h b) + ! b = b - v t (a + v**h b) or b = b - v t**h (a + v**h b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_cgemm( 'C', 'N', l, n, m-l, cone, v( mp, kp ), ldv,b( mp, 1 ), ldb, & + cone, work( kp, 1 ), ldwork ) + call stdlib_cgemm( 'C', 'N', k-l, n, m, cone, v, ldv,b, ldb, czero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'L', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'N', 'N', m-l, n, k, -cone, v( mp, 1 ), ldv,work, ldwork, cone, & + b( mp, 1 ), ldb ) + call stdlib_cgemm( 'N', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb ) + + call stdlib_ctrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (n-by-k) + ! [ i ] (k-by-k) + ! form c h or c h**h where c = [ b a ] (b is m-by-n, a is m-by-k) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - (a + b v) t or a = a - (a + b v) t**h + ! b = b - (a + b v) t v**h or b = b - (a + b v) t**h v**h + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_ctrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_cgemm( 'N', 'N', m, l, n-l, cone, b( 1, np ), ldb,v( np, kp ), ldv, & + cone, work( 1, kp ), ldwork ) + call stdlib_cgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v, ldv, czero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v( np, 1 ), ldv, cone, & + b( 1, np ), ldb ) + call stdlib_cgemm( 'N', 'C', m, l, k-l, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_ctrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - t (a + v b) or a = a - t**h (a + v b) + ! b = b - v**h t (a + v b) or b = b - v**h t**h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1, mp ), ldv,work, ldb ) + + call stdlib_cgemm( 'N', 'N', l, n, m-l, cone, v, ldv,b, ldb,cone, work, ldwork ) + + call stdlib_cgemm( 'N', 'N', k-l, n, m, cone, v( kp, 1 ), ldv,b, ldb, czero, work( & + kp, 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'C', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb ) + + call stdlib_cgemm( 'C', 'N', l, n, k-l, -cone, v( kp, mp ), ldv,work( kp, 1 ), & + ldwork, cone, b( mp, 1 ), ldb ) + call stdlib_ctrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1, mp ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - (a + b v**h) t or a = a - (a + b v**h) t**h + ! b = b - (a + b v**h) t v or b = b - (a + b v**h) t**h v + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_ctrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1, np ), ldv,work, ldwork ) + + call stdlib_cgemm( 'N', 'C', m, l, n-l, cone, b, ldb, v, ldv,cone, work, ldwork ) + + call stdlib_cgemm( 'N', 'C', m, k-l, n, cone, b, ldb,v( kp, 1 ), ldv, czero, work( & + 1, kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_cgemm( 'N', 'N', m, l, k-l, -cone, work( 1, kp ), ldwork,v( kp, np ), & + ldv, cone, b( 1, np ), ldb ) + call stdlib_ctrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1, np ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - t (a + v b) or a = a - t**h (a + v b) + ! b = b - v**h t (a + v b) or b = b - v**h t**h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'U', 'N', 'N', l, n, cone, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_cgemm( 'N', 'N', l, n, m-l, cone, v( kp, mp ), ldv,b( mp, 1 ), ldb, & + cone, work( kp, 1 ), ldwork ) + call stdlib_cgemm( 'N', 'N', k-l, n, m, cone, v, ldv, b, ldb,czero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'L', 'L ', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'C', 'N', m-l, n, k, -cone, v( 1, mp ), ldv,work, ldwork, cone, & + b( mp, 1 ), ldb ) + call stdlib_cgemm( 'C', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb ) + + call stdlib_ctrmm( 'L', 'U', 'C', 'N', l, n, cone, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**h where c = [ b a ] (a is m-by-k, b is m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - (a + b v**h) t or a = a - (a + b v**h) t**h + ! b = b - (a + b v**h) t v or b = b - (a + b v**h) t**h v + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_ctrmm( 'R', 'U', 'C', 'N', m, l, cone, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_cgemm( 'N', 'C', m, l, n-l, cone, b( 1, np ), ldb,v( kp, np ), ldv, & + cone, work( 1, kp ), ldwork ) + call stdlib_cgemm( 'N', 'C', m, k-l, n, cone, b, ldb, v, ldv,czero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ctrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_cgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v( 1, np ), ldv, cone, & + b( 1, np ), ldb ) + call stdlib_cgemm( 'N', 'N', m, l, k-l , -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_ctrmm( 'R', 'U', 'N', 'N', m, l, cone, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + end if + return + end subroutine stdlib_ctprfb + + !> CTPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by CTPTRS or some other + !> means before entering this routine. CTPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_ctprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: ap(*), b(ldb,*), x(ldx,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, kc, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_ctpsv( uplo, transt, diag, n, ap, work, 1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_ctpsv( uplo, transn, diag, n, ap, work, 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_ctprfs + + !> CTPTRI: computes the inverse of a complex upper or lower triangular + !> matrix A stored in packed format. + + pure subroutine stdlib_ctptri( uplo, diag, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc, jclast, jj + complex(sp) :: ajj + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTPTRI', -info ) + return + end if + ! check for singularity if non-unit. + if( nounit ) then + if( upper ) then + jj = 0 + do info = 1, n + jj = jj + info + if( ap( jj )==czero )return + end do + else + jj = 1 + do info = 1, n + if( ap( jj )==czero )return + jj = jj + n - info + 1 + end do + end if + info = 0 + end if + if( upper ) then + ! compute inverse of upper triangular matrix. + jc = 1 + do j = 1, n + if( nounit ) then + ap( jc+j-1 ) = cone / ap( jc+j-1 ) + ajj = -ap( jc+j-1 ) + else + ajj = -cone + end if + ! compute elements 1:j-1 of j-th column. + call stdlib_ctpmv( 'UPPER', 'NO TRANSPOSE', diag, j-1, ap,ap( jc ), 1 ) + call stdlib_cscal( j-1, ajj, ap( jc ), 1 ) + jc = jc + j + end do + else + ! compute inverse of lower triangular matrix. + jc = n*( n+1 ) / 2 + do j = n, 1, -1 + if( nounit ) then + ap( jc ) = cone / ap( jc ) + ajj = -ap( jc ) + else + ajj = -cone + end if + if( j CTPTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + + pure subroutine stdlib_ctptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldb CTPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + + pure subroutine stdlib_ctpttf( transr, uplo, n, ap, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(in) :: ap(0:*) + complex(sp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: conjg,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTPTTF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + arf( 0 ) = ap( 0 ) + else + arf( 0 ) = conjg( ap( 0 ) ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_ctpttf + + !> CTPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + + pure subroutine stdlib_ctpttr( uplo, n, ap, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(sp), intent(out) :: a(lda,*) + complex(sp), intent(in) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CTREVC: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + + pure subroutine stdlib_ctrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + complex(sp), parameter :: cmzero = (0.0e+0_sp,0.0e+0_sp) + complex(sp), parameter :: cmone = (1.0e+0_sp,0.0e+0_sp) + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, over, rightv, somev + integer(ilp) :: i, ii, is, j, k, ki + real(sp) :: ovfl, remax, scale, smin, smlnum, ulp, unfl + complex(sp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + if( somev ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt1 ) then + call stdlib_clatrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', 'Y',ki-1, t, ldt, & + work( 1 ), scale, rwork,info ) + work( ki ) = scale + end if + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_ccopy( ki, work( 1 ), 1, vr( 1, is ), 1 ) + ii = stdlib_icamax( ki, vr( 1, is ), 1 ) + remax = one / cabs1( vr( ii, is ) ) + call stdlib_csscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = cmzero + end do + else + if( ki>1 )call stdlib_cgemv( 'N', n, ki-1, cmone, vr, ldvr, work( 1 ),1, & + cmplx( scale,KIND=sp), vr( 1, ki ), 1 ) + ii = stdlib_icamax( n, vr( 1, ki ), 1 ) + remax = one / cabs1( vr( ii, ki ) ) + call stdlib_csscal( n, remax, vr( 1, ki ), 1 ) + end if + ! set back the original diagonal elements of t. + do k = 1, ki - 1 + t( k, k ) = work( k+n ) + end do + is = is - 1 + end do loop_80 + end if + if( leftv ) then + ! compute left eigenvectors. + is = 1 + loop_130: do ki = 1, n + if( somev ) then + if( .not.select( ki ) )cycle loop_130 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + work( n ) = cmone + ! form right-hand side. + do k = ki + 1, n + work( k ) = -conjg( t( ki, k ) ) + end do + ! solve the triangular system: + ! (t(ki+1:n,ki+1:n) - t(ki,ki))**h*x = scale*work. + do k = ki + 1, n + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) ) CTREVC3: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + + pure subroutine stdlib_ctrevc3( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, lwork, rwork, lrwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, lwork, lrwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmin = 8 + integer(ilp), parameter :: nbmax = 128 + + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, lquery, over, rightv, somev + integer(ilp) :: i, ii, is, j, k, ki, iv, maxwrk, nb + real(sp) :: ovfl, remax, scale, smin, smlnum, ulp, unfl + complex(sp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + if( somev ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + nb = stdlib_ilaenv( 1, 'CTREVC', side // howmny, n, -1, -1, -1 ) + maxwrk = n + 2*n*nb + work(1) = maxwrk + rwork(1) = n + lquery = ( lwork==-1 .or. lrwork==-1 ) + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt= n + 2*n*nbmin ) then + nb = (lwork - n) / (2*n) + nb = min( nb, nbmax ) + call stdlib_claset( 'F', n, 1+2*nb, czero, czero, work, n ) + else + nb = 1 + end if + ! set the constants to control overflow. + unfl = stdlib_slamch( 'SAFE MINIMUM' ) + ovfl = one / unfl + call stdlib_slabad( unfl, ovfl ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = unfl*( n / ulp ) + ! store the diagonal elements of t in working array work. + do i = 1, n + work( i ) = t( i, i ) + end do + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + rwork( 1 ) = zero + do j = 2, n + rwork( j ) = stdlib_scasum( j-1, t( 1, j ), 1 ) + end do + if( rightv ) then + ! ============================================================ + ! compute right eigenvectors. + ! iv is index of column in current block. + ! non-blocked version always uses iv=nb=1; + ! blocked version starts with iv=nb, goes down to 1. + ! (note the "0-th" column is used to store the original diagonal.) + iv = nb + is = m + loop_80: do ki = n, 1, -1 + if( somev ) then + if( .not.select( ki ) )cycle loop_80 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + ! -------------------------------------------------------- + ! complex right eigenvector + work( ki + iv*n ) = cone + ! form right-hand side. + do k = 1, ki - 1 + work( k + iv*n ) = -t( k, ki ) + end do + ! solve upper triangular system: + ! [ t(1:ki-1,1:ki-1) - t(ki,ki) ]*x = scale*work. + do k = 1, ki - 1 + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) )1 ) then + call stdlib_clatrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', 'Y',ki-1, t, ldt, & + work( 1 + iv*n ), scale,rwork, info ) + work( ki + iv*n ) = scale + end if + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_ccopy( ki, work( 1 + iv*n ), 1, vr( 1, is ), 1 ) + ii = stdlib_icamax( ki, vr( 1, is ), 1 ) + remax = one / cabs1( vr( ii, is ) ) + call stdlib_csscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = czero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>1 )call stdlib_cgemv( 'N', n, ki-1, cone, vr, ldvr,work( 1 + iv*n ), 1,& + cmplx( scale,KIND=sp),vr( 1, ki ), 1 ) + ii = stdlib_icamax( n, vr( 1, ki ), 1 ) + remax = one / cabs1( vr( ii, ki ) ) + call stdlib_csscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + iv*n ) = czero + end do + ! columns iv:nb of work are valid vectors. + ! when the number of vectors stored reaches nb, + ! or if this was last vector, do the gemm + if( (iv==1) .or. (ki==1) ) then + call stdlib_cgemm( 'N', 'N', n, nb-iv+1, ki+nb-iv, cone,vr, ldvr,work( 1 + & + (iv)*n ), n,czero,work( 1 + (nb+iv)*n ), n ) + ! normalize vectors + do k = iv, nb + ii = stdlib_icamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / cabs1( work( ii + (nb+k)*n ) ) + call stdlib_csscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_clacpy( 'F', n, nb-iv+1,work( 1 + (nb+iv)*n ), n,vr( 1, ki ), & + ldvr ) + iv = nb + else + iv = iv - 1 + end if + end if + ! restore the original diagonal elements of t. + do k = 1, ki - 1 + t( k, k ) = work( k ) + end do + is = is - 1 + end do loop_80 + end if + if( leftv ) then + ! ============================================================ + ! compute left eigenvectors. + ! iv is index of column in current block. + ! non-blocked version always uses iv=1; + ! blocked version starts with iv=1, goes up to nb. + ! (note the "0-th" column is used to store the original diagonal.) + iv = 1 + is = 1 + loop_130: do ki = 1, n + if( somev ) then + if( .not.select( ki ) )cycle loop_130 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + ! -------------------------------------------------------- + ! complex left eigenvector + work( ki + iv*n ) = cone + ! form right-hand side. + do k = ki + 1, n + work( k + iv*n ) = -conjg( t( ki, k ) ) + end do + ! solve conjugate-transposed triangular system: + ! [ t(ki+1:n,ki+1:n) - t(ki,ki) ]**h * x = scale*work. + do k = ki + 1, n + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) ) CTREXC: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that the diagonal element of T with row index IFST + !> is moved to row ILST. + !> The Schur form T is reordered by a unitary similarity transformation + !> Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by + !> postmultplying it with Z. + + pure subroutine stdlib_ctrexc( compq, n, t, ldt, q, ldq, ifst, ilst, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq + integer(ilp), intent(in) :: ifst, ilst, ldq, ldt, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: q(ldq,*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: wantq + integer(ilp) :: k, m1, m2, m3 + real(sp) :: cs + complex(sp) :: sn, t11, t22, temp + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + wantq = stdlib_lsame( compq, 'V' ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldtn ).and.( n>0 )) then + info = -7 + else if(( ilst<1 .or. ilst>n ).and.( n>0 )) then + info = -8 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTREXC', -info ) + return + end if + ! quick return if possible + if( n<=1 .or. ifst==ilst )return + if( ifst CTRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by CTRTRS or some other + !> means before entering this routine. CTRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_ctrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), b(ldb,*), x(ldx,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_ctrsv( uplo, transt, diag, n, a, lda, work, 1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_ctrsv( uplo, transn, diag, n, a, lda, work, 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_ctrrfs + + !> CTRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a complex upper triangular + !> matrix T (or of any matrix Q*T*Q**H with Q unitary). + + pure subroutine stdlib_ctrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm,& + m, work, ldwork, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, ldwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(sp), intent(out) :: rwork(*), s(*), sep(*) + complex(sp), intent(in) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: somcon, wantbh, wants, wantsp + character :: normin + integer(ilp) :: i, ierr, ix, j, k, kase, ks + real(sp) :: bignum, eps, est, lnrm, rnrm, scale, smlnum, xnorm + complex(sp) :: cdum, prod + ! Local Arrays + integer(ilp) :: isave(3) + complex(sp) :: dummy(1) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + ! set m to the number of eigenpairs for which condition numbers are + ! to be computed. + if( somcon ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + if( .not.wants .and. .not.wantsp ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt CTRTI2: computes the inverse of a complex upper or lower triangular + !> matrix. + !> This is the Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_ctrti2( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + complex(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda CTRTRI: computes the inverse of a complex upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_ctrtri( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jb, nb, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_ctrti2( uplo, diag, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute inverse of upper triangular matrix + do j = 1, n, nb + jb = min( nb, n-j+1 ) + ! compute rows 1:j-1 of current block column + call stdlib_ctrmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, cone, a, & + lda, a( 1, j ), lda ) + call stdlib_ctrsm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, -cone, a( & + j, j ), lda, a( 1, j ), lda ) + ! compute inverse of current diagonal block + call stdlib_ctrti2( 'UPPER', diag, jb, a( j, j ), lda, info ) + end do + else + ! compute inverse of lower triangular matrix + nn = ( ( n-1 ) / nb )*nb + 1 + do j = nn, 1, -nb + jb = min( nb, n-j+1 ) + if( j+jb<=n ) then + ! compute rows j+jb:n of current block column + call stdlib_ctrmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, & + cone, a( j+jb, j+jb ), lda,a( j+jb, j ), lda ) + call stdlib_ctrsm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, -& + cone, a( j, j ), lda,a( j+jb, j ), lda ) + end if + ! compute inverse of current diagonal block + call stdlib_ctrti2( 'LOWER', diag, jb, a( j, j ), lda, info ) + end do + end if + end if + return + end subroutine stdlib_ctrtri + + !> CTRTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_ctrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( lda CTRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + + pure subroutine stdlib_ctrttf( transr, uplo, n, a, lda, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(sp), intent(in) :: a(0:lda-1,0:*) + complex(sp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2 + ! Intrinsic Functions + intrinsic :: conjg,max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda=n + ij = 0 + do j = 0, n2 + do i = n1, n2 + j + arf( ij ) = conjg( a( n2+j, i ) ) + ij = ij + 1 + end do + do i = j, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0); lda=n + ij = nt - n + do j = n - 1, n1, -1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = j - n1, n1 - 1 + arf( ij ) = conjg( a( j-n1, l ) ) + ij = ij + 1 + end do + ij = ij - nx2 + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ij = 0 + do j = 0, n2 - 1 + do i = 0, j + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + do i = n1 + j, n - 1 + arf( ij ) = a( i, n1+j ) + ij = ij + 1 + end do + end do + do j = n2, n - 1 + do i = 0, n1 - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda=n2 + ij = 0 + do j = 0, n1 + do i = n1, n - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + do j = 0, n1 - 1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = n2 + j, n - 1 + arf( ij ) = conjg( a( n2+j, l ) ) + ij = ij + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1); lda=n+1 + ij = 0 + do j = 0, k - 1 + do i = k, k + j + arf( ij ) = conjg( a( k+j, i ) ) + ij = ij + 1 + end do + do i = j, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0); lda=n+1 + ij = nt - n - 1 + do j = n - 1, k, -1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = j - k, k - 1 + arf( ij ) = conjg( a( j-k, l ) ) + ij = ij + 1 + end do + ij = ij - np1x2 + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper, a=b) + ! t1 -> a(0,1) , t2 -> a(0,0) , s -> a(0,k+1) : + ! t1 -> a(0+k) , t2 -> a(0+0) , s -> a(0+k*(k+1)); lda=k + ij = 0 + j = k + do i = k, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do j = 0, k - 2 + do i = 0, j + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + do i = k + 1 + j, n - 1 + arf( ij ) = a( i, k+1+j ) + ij = ij + 1 + end do + end do + do j = k - 1, n - 1 + do i = 0, k - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is even (see paper, a=b) + ! t1 -> a(0,k+1) , t2 -> a(0,k) , s -> a(0,0) + ! t1 -> a(0+k*(k+1)) , t2 -> a(0+k*k) , s -> a(0+0)); lda=k + ij = 0 + do j = 0, k + do i = k, n - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + do j = 0, k - 2 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = k + 1 + j, n - 1 + arf( ij ) = conjg( a( k+1+j, l ) ) + ij = ij + 1 + end do + end do + ! note that here j = k-1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end if + end if + end if + return + end subroutine stdlib_ctrttf + + !> CTRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + + pure subroutine stdlib_ctrttp( uplo, n, a, lda, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CTZRZF: reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A + !> to upper triangular form by means of unitary transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N unitary matrix and R is an M-by-M upper + !> triangular matrix. + + pure subroutine stdlib_ctzrzf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iws, ki, kk, ldwork, lwkmin, lwkopt, m1, mu, nb, nbmin, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarzt( 'BACKWARD', 'ROWWISE', n-m, ib, a( i, m1 ),lda, tau( i ), & + work, ldwork ) + ! apply h to a(1:i-1,i:n) from the right + call stdlib_clarzb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', i-1, n-i+1,& + ib, n-m, a( i, m1 ),lda, work, ldwork, a( 1, i ), lda,work( ib+1 ), ldwork ) + + end if + end do + mu = i + nb - 1 + else + mu = m + end if + ! use unblocked code to factor the last or only block + if( mu>0 )call stdlib_clatrz( mu, n, n-m, a, lda, tau, work ) + work( 1 ) = lwkopt + return + end subroutine stdlib_ctzrzf + + !> CUNBDB: simultaneously bidiagonalizes the blocks of an M-by-M + !> partitioned unitary matrix X: + !> [ B11 | B12 0 0 ] + !> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H + !> X = [-----------] = [---------] [----------------] [---------] . + !> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] + !> [ 0 | 0 0 I ] + !> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is + !> not the case, then X must be transposed and/or permuted. This can be + !> done in constant time using the TRANS and SIGNS options. See CUNCSD + !> for details.) + !> The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- + !> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are + !> represented implicitly by Householder vectors. + !> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: signs, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11, ldx12, ldx21, ldx22, lwork, m, p, q + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), tauq2(*), work(*) + complex(sp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) + + ! ==================================================================== + ! Parameters + real(sp), parameter :: realone = 1.0_sp + + + ! Local Scalars + logical(lk) :: colmajor, lquery + integer(ilp) :: i, lworkmin, lworkopt + real(sp) :: z1, z2, z3, z4 + ! Intrinsic Functions + intrinsic :: atan2,cos,max,min,sin + intrinsic :: cmplx,conjg + ! Executable Statements + ! test input arguments + info = 0 + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( .not. stdlib_lsame( signs, 'O' ) ) then + z1 = realone + z2 = realone + z3 = realone + z4 = realone + else + z1 = realone + z2 = -realone + z3 = realone + z4 = -realone + end if + lquery = lwork == -1 + if( m < 0 ) then + info = -3 + else if( p < 0 .or. p > m ) then + info = -4 + else if( q < 0 .or. q > p .or. q > m-p .or.q > m-q ) then + info = -5 + else if( colmajor .and. ldx11 < max( 1, p ) ) then + info = -7 + else if( .not.colmajor .and. ldx11 < max( 1, q ) ) then + info = -7 + else if( colmajor .and. ldx12 < max( 1, p ) ) then + info = -9 + else if( .not.colmajor .and. ldx12 < max( 1, m-q ) ) then + info = -9 + else if( colmajor .and. ldx21 < max( 1, m-p ) ) then + info = -11 + else if( .not.colmajor .and. ldx21 < max( 1, q ) ) then + info = -11 + else if( colmajor .and. ldx22 < max( 1, m-p ) ) then + info = -13 + else if( .not.colmajor .and. ldx22 < max( 1, m-q ) ) then + info = -13 + end if + ! compute workspace + if( info == 0 ) then + lworkopt = m - q + lworkmin = m - q + work(1) = lworkopt + if( lwork < lworkmin .and. .not. lquery ) then + info = -21 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'XORBDB', -info ) + return + else if( lquery ) then + return + end if + ! handle column-major and row-major separately + if( colmajor ) then + ! reduce columns 1, ..., q of x11, x12, x21, and x22 + do i = 1, q + if( i == 1 ) then + call stdlib_cscal( p-i+1, cmplx( z1, 0.0_sp,KIND=sp), x11(i,i), 1 ) + else + call stdlib_cscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_sp,KIND=sp),x11(i,i), & + 1 ) + call stdlib_caxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), x12(& + i,i-1), 1, x11(i,i), 1 ) + end if + if( i == 1 ) then + call stdlib_cscal( m-p-i+1, cmplx( z2, 0.0_sp,KIND=sp), x21(i,i), 1 ) + else + call stdlib_cscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_sp,KIND=sp),x21(i,i),& + 1 ) + call stdlib_caxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), & + x22(i,i-1), 1, x21(i,i), 1 ) + end if + theta(i) = atan2( stdlib_scnrm2( m-p-i+1, x21(i,i), 1 ),stdlib_scnrm2( p-i+1, & + x11(i,i), 1 ) ) + if( p > i ) then + call stdlib_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + else if ( p == i ) then + call stdlib_clarfgp( p-i+1, x11(i,i), x11(i,i), 1, taup1(i) ) + end if + x11(i,i) = cone + if ( m-p > i ) then + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1,taup2(i) ) + else if ( m-p == i ) then + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i,i), 1,taup2(i) ) + end if + x21(i,i) = cone + if ( q > i ) then + call stdlib_clarf( 'L', p-i+1, q-i, x11(i,i), 1,conjg(taup1(i)), x11(i,i+1), & + ldx11, work ) + call stdlib_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1,conjg(taup2(i)), x21(i,i+1),& + ldx21, work ) + end if + if ( m-q+1 > i ) then + call stdlib_clarf( 'L', p-i+1, m-q-i+1, x11(i,i), 1,conjg(taup1(i)), x12(i,i),& + ldx12, work ) + call stdlib_clarf( 'L', m-p-i+1, m-q-i+1, x21(i,i), 1,conjg(taup2(i)), x22(i,& + i), ldx22, work ) + end if + if( i < q ) then + call stdlib_cscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_sp,KIND=sp),x11(i,i+& + 1), ldx11 ) + call stdlib_caxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_sp,KIND=sp),x21(i,i+1)& + , ldx21, x11(i,i+1), ldx11 ) + end if + call stdlib_cscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_sp,KIND=sp),x12(i,i)& + , ldx12 ) + call stdlib_caxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_sp,KIND=sp),x22(i,i),& + ldx22, x12(i,i), ldx12 ) + if( i < q )phi(i) = atan2( stdlib_scnrm2( q-i, x11(i,i+1), ldx11 ),stdlib_scnrm2(& + m-q-i+1, x12(i,i), ldx12 ) ) + if( i < q ) then + call stdlib_clacgv( q-i, x11(i,i+1), ldx11 ) + if ( i == q-1 ) then + call stdlib_clarfgp( q-i, x11(i,i+1), x11(i,i+1), ldx11,tauq1(i) ) + else + call stdlib_clarfgp( q-i, x11(i,i+1), x11(i,i+2), ldx11,tauq1(i) ) + end if + x11(i,i+1) = cone + end if + if ( m-q+1 > i ) then + call stdlib_clacgv( m-q-i+1, x12(i,i), ldx12 ) + if ( m-q == i ) then + call stdlib_clarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) + else + call stdlib_clarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) + + end if + end if + x12(i,i) = cone + if( i < q ) then + call stdlib_clarf( 'R', p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x11(i+1,i+1), & + ldx11, work ) + call stdlib_clarf( 'R', m-p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x21(i+1,i+1), & + ldx21, work ) + end if + if ( p > i ) then + call stdlib_clarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & + ldx12, work ) + end if + if ( m-p > i ) then + call stdlib_clarf( 'R', m-p-i, m-q-i+1, x12(i,i), ldx12,tauq2(i), x22(i+1,i), & + ldx22, work ) + end if + if( i < q )call stdlib_clacgv( q-i, x11(i,i+1), ldx11 ) + call stdlib_clacgv( m-q-i+1, x12(i,i), ldx12 ) + end do + ! reduce columns q + 1, ..., p of x12, x22 + do i = q + 1, p + call stdlib_cscal( m-q-i+1, cmplx( -z1*z4, 0.0_sp,KIND=sp), x12(i,i),ldx12 ) + + call stdlib_clacgv( m-q-i+1, x12(i,i), ldx12 ) + if ( i >= m-q ) then + call stdlib_clarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) + else + call stdlib_clarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) + end if + x12(i,i) = cone + if ( p > i ) then + call stdlib_clarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & + ldx12, work ) + end if + if( m-p-q >= 1 )call stdlib_clarf( 'R', m-p-q, m-q-i+1, x12(i,i), ldx12,tauq2(i),& + x22(q+1,i), ldx22, work ) + call stdlib_clacgv( m-q-i+1, x12(i,i), ldx12 ) + end do + ! reduce columns p + 1, ..., m - q of x12, x22 + do i = 1, m - p - q + call stdlib_cscal( m-p-q-i+1, cmplx( z2*z4, 0.0_sp,KIND=sp),x22(q+i,p+i), ldx22 ) + + call stdlib_clacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) + call stdlib_clarfgp( m-p-q-i+1, x22(q+i,p+i), x22(q+i,p+i+1),ldx22, tauq2(p+i) ) + + x22(q+i,p+i) = cone + call stdlib_clarf( 'R', m-p-q-i, m-p-q-i+1, x22(q+i,p+i), ldx22,tauq2(p+i), x22(& + q+i+1,p+i), ldx22, work ) + call stdlib_clacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) + end do + else + ! reduce columns 1, ..., q of x11, x12, x21, x22 + do i = 1, q + if( i == 1 ) then + call stdlib_cscal( p-i+1, cmplx( z1, 0.0_sp,KIND=sp), x11(i,i),ldx11 ) + else + call stdlib_cscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_sp,KIND=sp),x11(i,i), & + ldx11 ) + call stdlib_caxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), x12(& + i-1,i), ldx12, x11(i,i), ldx11 ) + end if + if( i == 1 ) then + call stdlib_cscal( m-p-i+1, cmplx( z2, 0.0_sp,KIND=sp), x21(i,i),ldx21 ) + + else + call stdlib_cscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_sp,KIND=sp),x21(i,i),& + ldx21 ) + call stdlib_caxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), & + x22(i-1,i), ldx22, x21(i,i), ldx21 ) + end if + theta(i) = atan2( stdlib_scnrm2( m-p-i+1, x21(i,i), ldx21 ),stdlib_scnrm2( p-i+1,& + x11(i,i), ldx11 ) ) + call stdlib_clacgv( p-i+1, x11(i,i), ldx11 ) + call stdlib_clacgv( m-p-i+1, x21(i,i), ldx21 ) + call stdlib_clarfgp( p-i+1, x11(i,i), x11(i,i+1), ldx11, taup1(i) ) + x11(i,i) = cone + if ( i == m-p ) then + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i,i), ldx21,taup2(i) ) + else + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i,i+1), ldx21,taup2(i) ) + end if + x21(i,i) = cone + call stdlib_clarf( 'R', q-i, p-i+1, x11(i,i), ldx11, taup1(i),x11(i+1,i), ldx11, & + work ) + call stdlib_clarf( 'R', m-q-i+1, p-i+1, x11(i,i), ldx11, taup1(i),x12(i,i), & + ldx12, work ) + call stdlib_clarf( 'R', q-i, m-p-i+1, x21(i,i), ldx21, taup2(i),x21(i+1,i), & + ldx21, work ) + call stdlib_clarf( 'R', m-q-i+1, m-p-i+1, x21(i,i), ldx21,taup2(i), x22(i,i), & + ldx22, work ) + call stdlib_clacgv( p-i+1, x11(i,i), ldx11 ) + call stdlib_clacgv( m-p-i+1, x21(i,i), ldx21 ) + if( i < q ) then + call stdlib_cscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_sp,KIND=sp),x11(i+1,& + i), 1 ) + call stdlib_caxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_sp,KIND=sp),x21(i+1,i)& + , 1, x11(i+1,i), 1 ) + end if + call stdlib_cscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_sp,KIND=sp),x12(i,i)& + , 1 ) + call stdlib_caxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_sp,KIND=sp),x22(i,i),& + 1, x12(i,i), 1 ) + if( i < q )phi(i) = atan2( stdlib_scnrm2( q-i, x11(i+1,i), 1 ),stdlib_scnrm2( m-& + q-i+1, x12(i,i), 1 ) ) + if( i < q ) then + call stdlib_clarfgp( q-i, x11(i+1,i), x11(i+2,i), 1, tauq1(i) ) + x11(i+1,i) = cone + end if + call stdlib_clarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1, tauq2(i) ) + x12(i,i) = cone + if( i < q ) then + call stdlib_clarf( 'L', q-i, p-i, x11(i+1,i), 1,conjg(tauq1(i)), x11(i+1,i+1),& + ldx11, work ) + call stdlib_clarf( 'L', q-i, m-p-i, x11(i+1,i), 1,conjg(tauq1(i)), x21(i+1,i+& + 1), ldx21, work ) + end if + call stdlib_clarf( 'L', m-q-i+1, p-i, x12(i,i), 1, conjg(tauq2(i)),x12(i,i+1), & + ldx12, work ) + if ( m-p > i ) then + call stdlib_clarf( 'L', m-q-i+1, m-p-i, x12(i,i), 1,conjg(tauq2(i)), x22(i,i+& + 1), ldx22, work ) + end if + end do + ! reduce columns q + 1, ..., p of x12, x22 + do i = q + 1, p + call stdlib_cscal( m-q-i+1, cmplx( -z1*z4, 0.0_sp,KIND=sp), x12(i,i), 1 ) + call stdlib_clarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1, tauq2(i) ) + x12(i,i) = cone + if ( p > i ) then + call stdlib_clarf( 'L', m-q-i+1, p-i, x12(i,i), 1,conjg(tauq2(i)), x12(i,i+1),& + ldx12, work ) + end if + if( m-p-q >= 1 )call stdlib_clarf( 'L', m-q-i+1, m-p-q, x12(i,i), 1,conjg(tauq2(& + i)), x22(i,q+1), ldx22, work ) + end do + ! reduce columns p + 1, ..., m - q of x12, x22 + do i = 1, m - p - q + call stdlib_cscal( m-p-q-i+1, cmplx( z2*z4, 0.0_sp,KIND=sp),x22(p+i,q+i), 1 ) + + call stdlib_clarfgp( m-p-q-i+1, x22(p+i,q+i), x22(p+i+1,q+i), 1,tauq2(p+i) ) + + x22(p+i,q+i) = cone + if ( m-p-q /= i ) then + call stdlib_clarf( 'L', m-p-q-i+1, m-p-q-i, x22(p+i,q+i), 1,conjg(tauq2(p+i)),& + x22(p+i,q+i+1), ldx22,work ) + end if + end do + end if + return + end subroutine stdlib_cunbdb + + !> CUNBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + + pure subroutine stdlib_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: alphasq = 0.01_sp + real(sp), parameter :: realone = 1.0_sp + real(sp), parameter :: realzero = 0.0_sp + + + ! Local Scalars + integer(ilp) :: i + real(sp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNBDB6', -info ) + return + end if + ! first, project x onto the orthogonal complement of q's column + ! space + scl1 = realzero + ssq1 = realone + call stdlib_classq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_classq( m2, x2, incx2, scl2, ssq2 ) + normsq1 = scl1**2*ssq1 + scl2**2*ssq2 + if( m1 == 0 ) then + do i = 1, n + work(i) = czero + end do + else + call stdlib_cgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1 ) + end if + call stdlib_cgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1 ) + call stdlib_cgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1, cone, x1,incx1 ) + call stdlib_cgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1, cone, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_classq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_classq( m2, x2, incx2, scl2, ssq2 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if projection is sufficiently large in norm, then stop. + ! if projection is czero, then stop. + ! otherwise, project again. + if( normsq2 >= alphasq*normsq1 ) then + return + end if + if( normsq2 == czero ) then + return + end if + normsq1 = normsq2 + do i = 1, n + work(i) = czero + end do + if( m1 == 0 ) then + do i = 1, n + work(i) = czero + end do + else + call stdlib_cgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1 ) + end if + call stdlib_cgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1 ) + call stdlib_cgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1, cone, x1,incx1 ) + call stdlib_cgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1, cone, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_classq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_classq( m1, x1, incx1, scl1, ssq1 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if second projection is sufficiently large in norm, then do + ! nothing more. alternatively, if it shrunk significantly, then + ! truncate it to czero. + if( normsq2 < alphasq*normsq1 ) then + do i = 1, m1 + x1(i) = czero + end do + do i = 1, m2 + x2(i) = czero + end do + end if + return + end subroutine stdlib_cunbdb6 + + !> CUNG2L: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. + + pure subroutine stdlib_cung2l( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda CUNG2R: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. + + pure subroutine stdlib_cung2r( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda CUNGL2: generates an m-by-n complex matrix Q with orthonormal rows, + !> which is defined as the first m rows of a product of k elementary + !> reflectors of order n + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by CGELQF. + + pure subroutine stdlib_cungl2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldak .and. j<=m )a( j, j ) = cone + end do + end if + do i = k, 1, -1 + ! apply h(i)**h to a(i:m,i:n) from the right + if( i CUNGLQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by CGELQF. + + pure subroutine stdlib_cunglq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'CUNGLQ', ' ', m, n, k, -1 ) + lwkopt = max( 1, m )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=m ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_clarft( 'FORWARD', 'ROWWISE', n-i+1, ib, a( i, i ),lda, tau( i ), & + work, ldwork ) + ! apply h**h to a(i+ib:m,i:n) from the right + call stdlib_clarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'FORWARD','ROWWISE', m-i-& + ib+1, n-i+1, ib, a( i, i ),lda, work, ldwork, a( i+ib, i ), lda,work( ib+1 ), & + ldwork ) + end if + ! apply h**h to columns i:n of current block + call stdlib_cungl2( ib, n-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set columns 1:i-1 of current block to czero + do j = 1, i - 1 + do l = i, i + ib - 1 + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_cunglq + + !> CUNGQL: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. + + pure subroutine stdlib_cungql( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + if( n-k+i>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_clarfb( 'LEFT', 'NO TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-& + 1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + ! apply h to rows 1:m-k+i+ib-1 of current block + call stdlib_cung2l( m-k+i+ib-1, ib, ib, a( 1, n-k+i ), lda,tau( i ), work, iinfo & + ) + ! set rows m-k+i+ib:m of current block to czero + do j = n - k + i, n - k + i + ib - 1 + do l = m - k + i + ib, m + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_cungql + + !> CUNGQR: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. + + pure subroutine stdlib_cungqr( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'CUNGQR', ' ', m, n, k, -1 ) + lwkopt = max( 1, n )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=n ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i & + ), work, ldwork ) + ! apply h to a(i:m,i+ib:n) from the left + call stdlib_clarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-& + i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), & + ldwork ) + end if + ! apply h to rows i:m of current block + call stdlib_cung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set rows 1:i-1 of current block to czero + do j = i, i + ib - 1 + do l = 1, i - 1 + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_cungqr + + !> CUNGR2: generates an m by n complex matrix Q with orthonormal rows, + !> which is defined as the last m rows of a product of k elementary + !> reflectors of order n + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by CGERQF. + + pure subroutine stdlib_cungr2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldan-m .and. j<=n-k )a( m-n+j, j ) = cone + end do + end if + do i = 1, k + ii = m - k + i + ! apply h(i)**h to a(1:m-k+i,1:n-k+i) from the right + call stdlib_clacgv( n-m+ii-1, a( ii, 1 ), lda ) + a( ii, n-m+ii ) = cone + call stdlib_clarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1 ), lda,conjg( tau( i ) ), a, lda,& + work ) + call stdlib_cscal( n-m+ii-1, -tau( i ), a( ii, 1 ), lda ) + call stdlib_clacgv( n-m+ii-1, a( ii, 1 ), lda ) + a( ii, n-m+ii ) = cone - conjg( tau( i ) ) + ! set a(m-k+i,n-k+i+1:n) to czero + do l = n - m + ii + 1, n + a( ii, l ) = czero + end do + end do + return + end subroutine stdlib_cungr2 + + !> CUNGRQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by CGERQF. + + pure subroutine stdlib_cungrq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + ii = m - k + i + if( ii>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h**h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_clarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'BACKWARD','ROWWISE', ii-& + 1, n-k+i+ib-1, ib, a( ii, 1 ),lda, work, ldwork, a, lda, work( ib+1 ),ldwork ) + + end if + ! apply h**h to columns 1:n-k+i+ib-1 of current block + call stdlib_cungr2( ib, n-k+i+ib-1, ib, a( ii, 1 ), lda, tau( i ),work, iinfo ) + + ! set columns n-k+i+ib:n of current block to czero + do l = n - k + i + ib, n + do j = ii, ii + ib - 1 + a( j, l ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_cungrq + + !> CUNGTSQR_ROW: generates an M-by-N complex matrix Q_out with + !> orthonormal columns from the output of CLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by CLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of CLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine CLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which CLATSQR generates the output blocks. + + pure subroutine stdlib_cungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: t(ldt,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, & + num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1 + ! Local Arrays + complex(sp) :: dummy(1,1) + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m=m, then the loop is never executed. + if ( mb=m, then we have only one row block of a of size m + ! and we work on the entire matrix a. + mb1 = min( mb, m ) + ! apply column blocks of h in the top row block from right to left. + ! kb is the column index of the current block reflector in + ! the matrices t and v. + do kb = kb_last, 1, -nblocal + ! determine the size of the current column block knb in + ! the matrices t and v. + knb = min( nblocal, n - kb + 1 ) + if( mb1-kb-knb+1==0 ) then + ! in stdlib_slarfb_gett parameters, when m=0, then the matrix b + ! does not exist, hence we need to pass a dummy array + ! reference dummy(1,1) to b with lddummy=1. + call stdlib_clarfb_gett( 'N', 0, n-kb+1, knb,t( 1, kb ), ldt, a( kb, kb ), lda,& + dummy( 1, 1 ), 1, work, knb ) + else + call stdlib_clarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1, kb ), ldt, a( kb, & + kb ), lda,a( kb+knb, kb), lda, work, knb ) + end if + end do + work( 1 ) = cmplx( lworkopt,KIND=sp) + return + end subroutine stdlib_cungtsqr_row + + + pure subroutine stdlib_cunm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: m, n, n1, n2, ldq, ldc, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(in) :: q(ldq,*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, ldwork, len, lwkopt, nb, nq, nw + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q; + ! nw is the minimum dimension of work. + if( left ) then + nq = m + else + nq = n + end if + nw = nq + if( n1==0 .or. n2==0 ) nw = 1 + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( n1<0 .or. n1+n2/=nq ) then + info = -5 + else if( n2<0 ) then + info = -6 + else if( ldq CUNM2L: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_cunm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + complex(sp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda CUNM2R: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + complex(sp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda CUNML2: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_cunml2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + complex(sp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda CUNMLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_cunml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_clarft( 'FORWARD', 'ROWWISE', nq-i+1, ib, a( i, i ),lda, tau( i ), & + work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_clarfb( side, transt, 'FORWARD', 'ROWWISE', mi, ni, ib,a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunmlq + + !> CUNMQL: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_cunm2l( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarft( 'BACKWARD', 'COLUMNWISE', nq-k+i+ib-1, ib,a( 1, i ), lda, & + tau( i ), work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**h is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**h + call stdlib_clarfb( side, trans, 'BACKWARD', 'COLUMNWISE', mi, ni,ib, a( 1, i ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunmql + + !> CUNMQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_clarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),& + work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_clarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunmqr + + !> CUNMR2: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by CGERQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + complex(sp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda CUNMR3: overwrites the general complex m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CTZRZF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, m, n + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), tau(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, ja, jc, mi, ni, nq + complex(sp) :: taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda CUNMRQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by CGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_cunmr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1 ), lda, tau( & + i ), work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**h is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**h + call stdlib_clarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1 ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunmrq + + !> CUNMRZ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by CTZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_cunmrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, ja, jc, ldwork, lwkopt, mi, nb, & + nbmin, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_cunmr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, iinfo ) + + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + ja = m - l + 1 + else + mi = m + ic = 1 + ja = n - l + 1 + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarzt( 'BACKWARD', 'ROWWISE', l, ib, a( i, ja ), lda,tau( i ), work(& + iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_clarzb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, l, a( i, ja )& + , lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunmrz + + !> CBBCSD: computes the CS decomposition of a unitary matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See CUNCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The unitary matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + + pure subroutine stdlib_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & + lrwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q + ! Array Arguments + real(sp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& + b22e(*), rwork(*) + real(sp), intent(inout) :: phi(*), theta(*) + complex(sp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) + + ! =================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 6 + real(sp), parameter :: hundred = 100.0_sp + real(sp), parameter :: meighth = -0.125_sp + real(sp), parameter :: piover2 = 1.57079632679489661923132169163975144210_sp + + + + + ! Local Scalars + logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & + wantu2, wantv1t, wantv2t + integer(ilp) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & + iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini + real(sp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & + sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 + ! Intrinsic Functions + intrinsic :: abs,atan2,cos,max,min,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lrwork == -1 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( m < 0 ) then + info = -6 + else if( p < 0 .or. p > m ) then + info = -7 + else if( q < 0 .or. q > m ) then + info = -8 + else if( q > p .or. q > m-p .or. q > m-q ) then + info = -8 + else if( wantu1 .and. ldu1 < p ) then + info = -12 + else if( wantu2 .and. ldu2 < m-p ) then + info = -14 + else if( wantv1t .and. ldv1t < q ) then + info = -16 + else if( wantv2t .and. ldv2t < m-q ) then + info = -18 + end if + ! quick return if q = 0 + if( info == 0 .and. q == 0 ) then + lrworkmin = 1 + rwork(1) = lrworkmin + return + end if + ! compute workspace + if( info == 0 ) then + iu1cs = 1 + iu1sn = iu1cs + q + iu2cs = iu1sn + q + iu2sn = iu2cs + q + iv1tcs = iu2sn + q + iv1tsn = iv1tcs + q + iv2tcs = iv1tsn + q + iv2tsn = iv2tcs + q + lrworkopt = iv2tsn + q - 1 + lrworkmin = lrworkopt + rwork(1) = lrworkopt + if( lrwork < lrworkmin .and. .not. lquery ) then + info = -28 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CBBCSD', -info ) + return + else if( lquery ) then + return + end if + ! get machine constants + eps = stdlib_slamch( 'EPSILON' ) + unfl = stdlib_slamch( 'SAFE MINIMUM' ) + tolmul = max( ten, min( hundred, eps**meighth ) ) + tol = tolmul*eps + thresh = max( tol, maxitr*q*q*unfl ) + ! test for negligible sines or cosines + do i = 1, q + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = 1, q-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! initial deflation + imax = q + do while( imax > 1 ) + if( phi(imax-1) /= zero ) then + exit + end if + imax = imax - 1 + end do + imin = imax - 1 + if ( imin > 1 ) then + do while( phi(imin-1) /= zero ) + imin = imin - 1 + if ( imin <= 1 ) exit + end do + end if + ! initialize iteration counter + maxit = maxitr*q*q + iter = 0 + ! begin main iteration loop + do while( imax > 1 ) + ! compute the matrix entries + b11d(imin) = cos( theta(imin) ) + b21d(imin) = -sin( theta(imin) ) + do i = imin, imax - 1 + b11e(i) = -sin( theta(i) ) * sin( phi(i) ) + b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) + b12d(i) = sin( theta(i) ) * cos( phi(i) ) + b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) + b21e(i) = -cos( theta(i) ) * sin( phi(i) ) + b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) + b22d(i) = cos( theta(i) ) * cos( phi(i) ) + b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) + end do + b12d(imax) = sin( theta(imax) ) + b22d(imax) = cos( theta(imax) ) + ! abort if not converging; otherwise, increment iter + if( iter > maxit ) then + info = 0 + do i = 1, q + if( phi(i) /= zero )info = info + 1 + end do + return + end if + iter = iter + imax - imin + ! compute shifts + thetamax = theta(imin) + thetamin = theta(imin) + do i = imin+1, imax + if( theta(i) > thetamax )thetamax = theta(i) + if( theta(i) < thetamin )thetamin = theta(i) + end do + if( thetamax > piover2 - thresh ) then + ! zero on diagonals of b11 and b22; induce deflation with a + ! zero shift + mu = zero + nu = one + else if( thetamin < thresh ) then + ! zero on diagonals of b12 and b22; induce deflation with a + ! zero shift + mu = one + nu = zero + else + ! compute shifts for b11 and b21 and use the lesser + call stdlib_slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) + + call stdlib_slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) + + if( sigma11 <= sigma21 ) then + mu = sigma11 + nu = sqrt( one - mu**2 ) + if( mu < thresh ) then + mu = zero + nu = one + end if + else + nu = sigma21 + mu = sqrt( 1.0_sp - nu**2 ) + if( nu < thresh ) then + mu = one + nu = zero + end if + end if + end if + ! rotate to produce bulges in b11 and b21 + if( mu <= nu ) then + call stdlib_slartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1) ) + else + call stdlib_slartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1) ) + end if + temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) + b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) + + b11d(imin) = temp + b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) + b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) + temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) + b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) + + b21d(imin) = temp + b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) + b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) + ! compute theta(imin) + theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),sqrt( b11d(imin)**2+& + b11bulge**2 ) ) + ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) + if( b11d(imin)**2+b11bulge**2 > thresh**2 ) then + call stdlib_slartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& + 1), r ) + else if( mu <= nu ) then + call stdlib_slartgs( b11e( imin ), b11d( imin + 1 ), mu,rwork(iu1cs+imin-1), & + rwork(iu1sn+imin-1) ) + else + call stdlib_slartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1) ) + end if + if( b21d(imin)**2+b21bulge**2 > thresh**2 ) then + call stdlib_slartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& + 1), r ) + else if( nu < mu ) then + call stdlib_slartgs( b21e( imin ), b21d( imin + 1 ), nu,rwork(iu2cs+imin-1), & + rwork(iu2sn+imin-1) ) + else + call stdlib_slartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& + imin-1) ) + end if + rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) + rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) + temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) + b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) + + b11e(imin) = temp + if( imax > imin+1 ) then + b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) + b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) + end if + temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) + b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) + b12d(imin) = temp + b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) + b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) + temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) + b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) + + b21e(imin) = temp + if( imax > imin+1 ) then + b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) + b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) + end if + temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) + b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) + b22d(imin) = temp + b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) + b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) + ! inner loop: chase bulges from b11(imin,imin+2), + ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to + ! bottom-right + do i = imin+1, imax-1 + ! compute phi(i-1) + x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) + x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge + y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) + y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge + phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11e(i-1)**2 + b11bulge**2 <= thresh**2 + restart21 = b21e(i-1)**2 + b21bulge**2 <= thresh**2 + restart12 = b12d(i-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), + ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart21 ) then + call stdlib_slartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) + else if( .not. restart11 .and. restart21 ) then + call stdlib_slartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& + r ) + else if( restart11 .and. .not. restart21 ) then + call stdlib_slartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& + r ) + else if( mu <= nu ) then + call stdlib_slartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& + ) + else + call stdlib_slartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& + ) + end if + rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) + rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_slartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_slartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& + 1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_slartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& + 1-1), r ) + else if( nu < mu ) then + call stdlib_slartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& + i-1-1) ) + else + call stdlib_slartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& + i-1-1) ) + end if + temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) + b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) + b11d(i) = temp + b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) + b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) + temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) + b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) + b21d(i) = temp + b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) + b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) + temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) + b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) + b12e(i-1) = temp + b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) + b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) + temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) + b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) + b22e(i-1) = temp + b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) + b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) + ! compute theta(i) + x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) + x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge + y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) + y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge + theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11d(i)**2 + b11bulge**2 <= thresh**2 + restart12 = b12e(i-1)**2 + b12bulge**2 <= thresh**2 + restart21 = b21d(i)**2 + b21bulge**2 <= thresh**2 + restart22 = b22e(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), + ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart12 ) then + call stdlib_slartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) + else if( .not. restart11 .and. restart12 ) then + call stdlib_slartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) + + else if( restart11 .and. .not. restart12 ) then + call stdlib_slartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & + r ) + else if( mu <= nu ) then + call stdlib_slartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& + ) + else + call stdlib_slartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) + + end if + if( .not. restart21 .and. .not. restart22 ) then + call stdlib_slartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) + else if( .not. restart21 .and. restart22 ) then + call stdlib_slartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) + + else if( restart21 .and. .not. restart22 ) then + call stdlib_slartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & + r ) + else if( nu < mu ) then + call stdlib_slartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& + ) + else + call stdlib_slartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) + + end if + rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) + rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) + temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) + b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) + b11e(i) = temp + if( i < imax - 1 ) then + b11bulge = rwork(iu1sn+i-1)*b11e(i+1) + b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) + end if + temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) + b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) + b21e(i) = temp + if( i < imax - 1 ) then + b21bulge = rwork(iu2sn+i-1)*b21e(i+1) + b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) + end if + temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) + b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) + b12d(i) = temp + b12bulge = rwork(iu1sn+i-1)*b12d(i+1) + b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) + temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) + b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) + b22d(i) = temp + b22bulge = rwork(iu2sn+i-1)*b22d(i+1) + b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) + end do + ! compute phi(imax-1) + x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) + y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) + y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge + phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) ) + ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) + restart12 = b12d(imax-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(imax-1)**2 + b22bulge**2 <= thresh**2 + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_slartgp( y2, y1, rwork(iv2tsn+imax-1-1),rwork(iv2tcs+imax-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_slartgp( b12bulge, b12d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& + imax-1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_slartgp( b22bulge, b22d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& + imax-1-1), r ) + else if( nu < mu ) then + call stdlib_slartgs( b12e(imax-1), b12d(imax), nu,rwork(iv2tcs+imax-1-1),rwork(& + iv2tsn+imax-1-1) ) + else + call stdlib_slartgs( b22e(imax-1), b22d(imax), mu,rwork(iv2tcs+imax-1-1),rwork(& + iv2tsn+imax-1-1) ) + end if + temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +rwork(iv2tsn+imax-1-1)*b12d(imax) + + b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -rwork(iv2tsn+imax-1-1)*b12e(imax-1) + + b12e(imax-1) = temp + temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +rwork(iv2tsn+imax-1-1)*b22d(imax) + + b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -rwork(iv2tsn+imax-1-1)*b22e(imax-1) + + b22e(imax-1) = temp + ! update singular vectors + if( wantu1 ) then + if( colmajor ) then + call stdlib_clasr( 'R', 'V', 'F', p, imax-imin+1,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1),u1(1,imin), ldu1 ) + else + call stdlib_clasr( 'L', 'V', 'F', imax-imin+1, p,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1),u1(imin,1), ldu1 ) + end if + end if + if( wantu2 ) then + if( colmajor ) then + call stdlib_clasr( 'R', 'V', 'F', m-p, imax-imin+1,rwork(iu2cs+imin-1), rwork(& + iu2sn+imin-1),u2(1,imin), ldu2 ) + else + call stdlib_clasr( 'L', 'V', 'F', imax-imin+1, m-p,rwork(iu2cs+imin-1), rwork(& + iu2sn+imin-1),u2(imin,1), ldu2 ) + end if + end if + if( wantv1t ) then + if( colmajor ) then + call stdlib_clasr( 'L', 'V', 'F', imax-imin+1, q,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1),v1t(imin,1), ldv1t ) + else + call stdlib_clasr( 'R', 'V', 'F', q, imax-imin+1,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1),v1t(1,imin), ldv1t ) + end if + end if + if( wantv2t ) then + if( colmajor ) then + call stdlib_clasr( 'L', 'V', 'F', imax-imin+1, m-q,rwork(iv2tcs+imin-1), & + rwork(iv2tsn+imin-1),v2t(imin,1), ldv2t ) + else + call stdlib_clasr( 'R', 'V', 'F', m-q, imax-imin+1,rwork(iv2tcs+imin-1), & + rwork(iv2tsn+imin-1),v2t(1,imin), ldv2t ) + end if + end if + ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) + if( b11e(imax-1)+b21e(imax-1) > 0 ) then + b11d(imax) = -b11d(imax) + b21d(imax) = -b21d(imax) + if( wantv1t ) then + if( colmajor ) then + call stdlib_cscal( q, cnegone, v1t(imax,1), ldv1t ) + else + call stdlib_cscal( q, cnegone, v1t(1,imax), 1 ) + end if + end if + end if + ! compute theta(imax) + x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) + y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) + theta(imax) = atan2( abs(y1), abs(x1) ) + ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), + ! and b22(imax,imax-1) + if( b11d(imax)+b12e(imax-1) < 0 ) then + b12d(imax) = -b12d(imax) + if( wantu1 ) then + if( colmajor ) then + call stdlib_cscal( p, cnegone, u1(1,imax), 1 ) + else + call stdlib_cscal( p, cnegone, u1(imax,1), ldu1 ) + end if + end if + end if + if( b21d(imax)+b22e(imax-1) > 0 ) then + b22d(imax) = -b22d(imax) + if( wantu2 ) then + if( colmajor ) then + call stdlib_cscal( m-p, cnegone, u2(1,imax), 1 ) + else + call stdlib_cscal( m-p, cnegone, u2(imax,1), ldu2 ) + end if + end if + end if + ! fix signs on b12(imax,imax) and b22(imax,imax) + if( b12d(imax)+b22d(imax) < 0 ) then + if( wantv2t ) then + if( colmajor ) then + call stdlib_cscal( m-q, cnegone, v2t(imax,1), ldv2t ) + else + call stdlib_cscal( m-q, cnegone, v2t(1,imax), 1 ) + end if + end if + end if + ! test for negligible sines or cosines + do i = imin, imax + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = imin, imax-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! deflate + if (imax > 1) then + do while( phi(imax-1) == zero ) + imax = imax - 1 + if (imax <= 1) exit + end do + end if + if( imin > imax - 1 )imin = imax - 1 + if (imin > 1) then + do while (phi(imin-1) /= zero) + imin = imin - 1 + if (imin <= 1) exit + end do + end if + ! repeat main iteration loop + end do + ! postprocessing: order theta from least to greatest + do i = 1, q + mini = i + thetamin = theta(i) + do j = i+1, q + if( theta(j) < thetamin ) then + mini = j + thetamin = theta(j) + end if + end do + if( mini /= i ) then + theta(mini) = theta(i) + theta(i) = thetamin + if( colmajor ) then + if( wantu1 )call stdlib_cswap( p, u1(1,i), 1, u1(1,mini), 1 ) + if( wantu2 )call stdlib_cswap( m-p, u2(1,i), 1, u2(1,mini), 1 ) + if( wantv1t )call stdlib_cswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t ) + + if( wantv2t )call stdlib_cswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),ldv2t ) + + else + if( wantu1 )call stdlib_cswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 ) + if( wantu2 )call stdlib_cswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 ) + if( wantv1t )call stdlib_cswap( q, v1t(1,i), 1, v1t(1,mini), 1 ) + if( wantv2t )call stdlib_cswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 ) + end if + end if + end do + return + end subroutine stdlib_cbbcsd + + !> CBDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**H + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**H*VT instead of + !> P**H, for given complex input matrices U and VT. When U and VT are + !> the unitary matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by CGEBRD, then + !> A = (U*Q) * S * (P**H*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**H*C + !> for a given complex input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + + pure subroutine stdlib_cbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, rwork,& + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru + ! Array Arguments + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: c(ldc,*), u(ldu,*), vt(ldvt,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: hndrth = 0.01_sp + real(sp), parameter :: hndrd = 100.0_sp + real(sp), parameter :: meigth = -0.125_sp + integer(ilp), parameter :: maxitr = 6 + + + + + + + + + ! Local Scalars + logical(lk) :: lower, rotate + integer(ilp) :: i, idir, isub, iter, j, ll, lll, m, maxit, nm1, nm12, nm13, oldll, & + oldm + real(sp) :: abse, abss, cosl, cosr, cs, eps, f, g, h, mu, oldcs, oldsn, r, shift, & + sigmn, sigmx, sinl, sinr, sll, smax, smin, sminl, sminoa, sn, thresh, tol, tolmul, & + unfl + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.lower ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ncvt<0 ) then + info = -3 + else if( nru<0 ) then + info = -4 + else if( ncc<0 ) then + info = -5 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + ! if no singular vectors desired, use qd algorithm + if( .not.rotate ) then + call stdlib_slasq1( n, d, e, rwork, info ) + ! if info equals 2, dqds didn't finish, try to finish + if( info /= 2 ) return + info = 0 + end if + nm1 = n - 1 + nm12 = nm1 + nm1 + nm13 = nm12 + nm1 + idir = 0 + ! get machine constants + eps = stdlib_slamch( 'EPSILON' ) + unfl = stdlib_slamch( 'SAFE MINIMUM' ) + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left + if( lower ) then + do i = 1, n - 1 + call stdlib_slartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + rwork( i ) = cs + rwork( nm1+i ) = sn + end do + ! update singular vectors if desired + if( nru>0 )call stdlib_clasr( 'R', 'V', 'F', nru, n, rwork( 1 ), rwork( n ),u, ldu ) + + if( ncc>0 )call stdlib_clasr( 'L', 'V', 'F', n, ncc, rwork( 1 ), rwork( n ),c, ldc ) + + end if + ! compute singular values to relative accuracy tol + ! (by setting tol to be negative, algorithm will compute + ! singular values to absolute accuracy abs(tol)*norm(input matrix)) + tolmul = max( ten, min( hndrd, eps**meigth ) ) + tol = tolmul*eps + ! compute approximate maximum, minimum singular values + smax = zero + do i = 1, n + smax = max( smax, abs( d( i ) ) ) + end do + do i = 1, n - 1 + smax = max( smax, abs( e( i ) ) ) + end do + sminl = zero + if( tol>=zero ) then + ! relative accuracy desired + sminoa = abs( d( 1 ) ) + if( sminoa==zero )go to 50 + mu = sminoa + do i = 2, n + mu = abs( d( i ) )*( mu / ( mu+abs( e( i-1 ) ) ) ) + sminoa = min( sminoa, mu ) + if( sminoa==zero )go to 50 + end do + 50 continue + sminoa = sminoa / sqrt( real( n,KIND=sp) ) + thresh = max( tol*sminoa, maxitr*n*n*unfl ) + else + ! absolute accuracy desired + thresh = max( abs( tol )*smax, maxitr*n*n*unfl ) + end if + ! prepare for main iteration loop for the singular values + ! (maxit is the maximum number of passes through the inner + ! loop permitted before nonconvergence signalled.) + maxit = maxitr*n*n + iter = 0 + oldll = -1 + oldm = -1 + ! m points to last element of unconverged part of matrix + m = n + ! begin main iteration loop + 60 continue + ! check for convergence or exceeding iteration count + if( m<=1 )go to 160 + if( iter>maxit )go to 200 + ! find diagonal block of matrix to work on + if( tol0 )call stdlib_csrot( ncvt, vt( m-1, 1 ), ldvt, vt( m, 1 ), ldvt,cosr, & + sinr ) + if( nru>0 )call stdlib_csrot( nru, u( 1, m-1 ), 1, u( 1, m ), 1, cosl, sinl ) + + if( ncc>0 )call stdlib_csrot( ncc, c( m-1, 1 ), ldc, c( m, 1 ), ldc, cosl,sinl ) + + m = m - 2 + go to 60 + end if + ! if working on new submatrix, choose shift direction + ! (from larger end diagonal element towards smaller) + if( ll>oldm .or. m=abs( d( m ) ) ) then + ! chase bulge from top (big end) to bottom (small end) + idir = 1 + else + ! chase bulge from bottom (big end) to top (small end) + idir = 2 + end if + end if + ! apply convergence tests + if( idir==1 ) then + ! run convergence test in forward direction + ! first apply standard test to bottom of matrix + if( abs( e( m-1 ) )<=abs( tol )*abs( d( m ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion forward + mu = abs( d( ll ) ) + sminl = mu + do lll = ll, m - 1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll+1 ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + else + ! run convergence test in backward direction + ! first apply standard test to top of matrix + if( abs( e( ll ) )<=abs( tol )*abs( d( ll ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion backward + mu = abs( d( m ) ) + sminl = mu + do lll = m - 1, ll, -1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + end if + oldll = ll + oldm = m + ! compute shift. first, test if shifting would ruin relative + ! accuracy, and if so set the shift to zero. + if( tol>=zero .and. n*tol*( sminl / smax )<=max( eps, hndrth*tol ) ) then + ! use a zero shift to avoid loss of relative accuracy + shift = zero + else + ! compute the shift from 2-by-2 block at end of matrix + if( idir==1 ) then + sll = abs( d( ll ) ) + call stdlib_slas2( d( m-1 ), e( m-1 ), d( m ), shift, r ) + else + sll = abs( d( m ) ) + call stdlib_slas2( d( ll ), e( ll ), d( ll+1 ), shift, r ) + end if + ! test if shift negligible, and if so set to zero + if( sll>zero ) then + if( ( shift / sll )**2ll )e( i-1 ) = oldsn*r + call stdlib_slartg( oldcs*r, d( i+1 )*sn, oldcs, oldsn, d( i ) ) + rwork( i-ll+1 ) = cs + rwork( i-ll+1+nm1 ) = sn + rwork( i-ll+1+nm12 ) = oldcs + rwork( i-ll+1+nm13 ) = oldsn + end do + h = d( m )*cs + d( m ) = h*oldcs + e( m-1 ) = h*oldsn + ! update singular vectors + if( ncvt>0 )call stdlib_clasr( 'L', 'V', 'F', m-ll+1, ncvt, rwork( 1 ),rwork( n )& + , vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_clasr( 'R', 'V', 'F', nru, m-ll+1, rwork( nm12+1 ),rwork( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_clasr( 'L', 'V', 'F', m-ll+1, ncc, rwork( nm12+1 ),rwork( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + cs = one + oldcs = one + do i = m, ll + 1, -1 + call stdlib_slartg( d( i )*cs, e( i-1 ), cs, sn, r ) + if( i0 )call stdlib_clasr( 'L', 'V', 'B', m-ll+1, ncvt, rwork( nm12+1 ),& + rwork( nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_clasr( 'R', 'V', 'B', nru, m-ll+1, rwork( 1 ),rwork( n ), & + u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_clasr( 'L', 'V', 'B', m-ll+1, ncc, rwork( 1 ),rwork( n ), & + c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + end if + else + ! use nonzero shift + if( idir==1 ) then + ! chase bulge from top to bottom + ! save cosines and sines for later singular vector updates + f = ( abs( d( ll ) )-shift )*( sign( one, d( ll ) )+shift / d( ll ) ) + g = e( ll ) + do i = ll, m - 1 + call stdlib_slartg( f, g, cosr, sinr, r ) + if( i>ll )e( i-1 ) = r + f = cosr*d( i ) + sinr*e( i ) + e( i ) = cosr*e( i ) - sinr*d( i ) + g = sinr*d( i+1 ) + d( i+1 ) = cosr*d( i+1 ) + call stdlib_slartg( f, g, cosl, sinl, r ) + d( i ) = r + f = cosl*e( i ) + sinl*d( i+1 ) + d( i+1 ) = cosl*d( i+1 ) - sinl*e( i ) + if( i0 )call stdlib_clasr( 'L', 'V', 'F', m-ll+1, ncvt, rwork( 1 ),rwork( n )& + , vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_clasr( 'R', 'V', 'F', nru, m-ll+1, rwork( nm12+1 ),rwork( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_clasr( 'L', 'V', 'F', m-ll+1, ncc, rwork( nm12+1 ),rwork( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + f = ( abs( d( m ) )-shift )*( sign( one, d( m ) )+shift /d( m ) ) + g = e( m-1 ) + do i = m, ll + 1, -1 + call stdlib_slartg( f, g, cosr, sinr, r ) + if( ill+1 ) then + g = sinl*e( i-2 ) + e( i-2 ) = cosl*e( i-2 ) + end if + rwork( i-ll ) = cosr + rwork( i-ll+nm1 ) = -sinr + rwork( i-ll+nm12 ) = cosl + rwork( i-ll+nm13 ) = -sinl + end do + e( ll ) = f + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + ! update singular vectors if desired + if( ncvt>0 )call stdlib_clasr( 'L', 'V', 'B', m-ll+1, ncvt, rwork( nm12+1 ),& + rwork( nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_clasr( 'R', 'V', 'B', nru, m-ll+1, rwork( 1 ),rwork( n ), & + u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_clasr( 'L', 'V', 'B', m-ll+1, ncc, rwork( 1 ),rwork( n ), & + c( ll, 1 ), ldc ) + end if + end if + ! qr iteration finished, go back and check convergence + go to 60 + ! all singular values converged, so make them positive + 160 continue + do i = 1, n + if( d( i )0 )call stdlib_csscal( ncvt, negone, vt( i, 1 ), ldvt ) + end if + end do + ! sort the singular values into decreasing order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n - 1 + ! scan for smallest d(i) + isub = 1 + smin = d( 1 ) + do j = 2, n + 1 - i + if( d( j )<=smin ) then + isub = j + smin = d( j ) + end if + end do + if( isub/=n+1-i ) then + ! swap singular values and vectors + d( isub ) = d( n+1-i ) + d( n+1-i ) = smin + if( ncvt>0 )call stdlib_cswap( ncvt, vt( isub, 1 ), ldvt, vt( n+1-i, 1 ),ldvt ) + + if( nru>0 )call stdlib_cswap( nru, u( 1, isub ), 1, u( 1, n+1-i ), 1 ) + if( ncc>0 )call stdlib_cswap( ncc, c( isub, 1 ), ldc, c( n+1-i, 1 ), ldc ) + + end if + end do + go to 220 + ! maximum number of iterations exceeded, failure to converge + 200 continue + info = 0 + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + 220 continue + return + end subroutine stdlib_cbdsqr + + !> CGBCON: estimates the reciprocal of the condition number of a complex + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by CGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_cgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, rwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, onenrm + character :: normin + integer(ilp) :: ix, j, jp, kase, kase1, kd, lm + real(sp) :: ainvnm, scale, smlnum + complex(sp) :: t, zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( anorm0 + kase = 0 + 10 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(l). + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + jp = ipiv( j ) + t = work( jp ) + if( jp/=j ) then + work( jp ) = work( j ) + work( j ) = t + end if + call stdlib_caxpy( lm, -t, ab( kd+1, j ), 1, work( j+1 ), 1 ) + end do + end if + ! multiply by inv(u). + call stdlib_clatbs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, & + ldab, work, scale, rwork, info ) + else + ! multiply by inv(u**h). + call stdlib_clatbs( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',normin, n, kl+ku, & + ab, ldab, work, scale, rwork,info ) + ! multiply by inv(l**h). + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + work( j ) = work( j ) - stdlib_cdotc( lm, ab( kd+1, j ), 1,work( j+1 ), 1 ) + + jp = ipiv( j ) + if( jp/=j ) then + t = work( jp ) + work( jp ) = work( j ) + work( j ) = t + end if + end do + end if + end if + ! divide x by 1/scale if doing so will not cause overflow. + normin = 'Y' + if( scale/=one ) then + ix = stdlib_icamax( n, work, 1 ) + if( scale CGBTRF: computes an LU factorization of a complex m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_cgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp, ju, k2, km, kv, nb, & + nw + complex(sp) :: temp + ! Local Arrays + complex(sp) :: work13(ldwork,nbmax), work31(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabkl ) then + ! use unblocked code + call stdlib_cgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + else + ! use blocked code + ! czero the superdiagonal elements of the work array work13 + do j = 1, nb + do i = 1, j - 1 + work13( i, j ) = czero + end do + end do + ! czero the subdiagonal elements of the work array work31 + do j = 1, nb + do i = j + 1, nb + work31( i, j ) = czero + end do + end do + ! gaussian elimination with partial pivoting + ! set fill-in elements in columns ku+2 to kv to czero + do j = ku + 2, min( kv, n ) + do i = kv - j + 2, kl + ab( i, j ) = czero + end do + end do + ! ju is the index of the last column affected by the current + ! stage of the factorization + ju = 1 + loop_180: do j = 1, min( m, n ), nb + jb = min( nb, min( m, n )-j+1 ) + ! the active part of the matrix is partitioned + ! a11 a12 a13 + ! a21 a22 a23 + ! a31 a32 a33 + ! here a11, a21 and a31 denote the current block of jb columns + ! which is about to be factorized. the number of rows in the + ! partitioning are jb, i2, i3 respectively, and the numbers + ! of columns are jb, j2, j3. the superdiagonal elements of a13 + ! and the subdiagonal elements of a31 lie outside the band. + i2 = min( kl-jb, m-j-jb+1 ) + i3 = min( jb, m-j-kl+1 ) + ! j2 and j3 are computed after ju has been updated. + ! factorize the current block of jb columns + loop_80: do jj = j, j + jb - 1 + ! set fill-in elements in column jj+kv to czero + if( jj+kv<=n ) then + do i = 1, kl + ab( i, jj+kv ) = czero + end do + end if + ! find pivot and test for singularity. km is the number of + ! subdiagonal elements in the current column. + km = min( kl, m-jj ) + jp = stdlib_icamax( km+1, ab( kv+1, jj ), 1 ) + ipiv( jj ) = jp + jj - j + if( ab( kv+jp, jj )/=czero ) then + ju = max( ju, min( jj+ku+jp-1, n ) ) + if( jp/=1 ) then + ! apply interchange to columns j to j+jb-1 + if( jp+jj-1jj )call stdlib_cgeru( km, jm-jj, -cone, ab( kv+2, jj ), 1,ab( kv, & + jj+1 ), ldab-1,ab( kv+1, jj+1 ), ldab-1 ) + else + ! if pivot is czero, set info to the index of the pivot + ! unless a czero pivot has already been found. + if( info==0 )info = jj + end if + ! copy current column of a31 into the work array work31 + nw = min( jj-j+1, i3 ) + if( nw>0 )call stdlib_ccopy( nw, ab( kv+kl+1-jj+j, jj ), 1,work31( 1, jj-j+1 )& + , 1 ) + end do loop_80 + if( j+jb<=n ) then + ! apply the row interchanges to the other blocks. + j2 = min( ju-j+1, kv ) - jb + j3 = max( 0, ju-j-kv+1 ) + ! use stdlib_claswp to apply the row interchanges to a12, a22, and + ! a32. + call stdlib_claswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,ipiv( j ), 1 ) + + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + ! apply the row interchanges to a13, a23, and a33 + ! columnwise. + k2 = j - 1 + jb + j2 + do i = 1, j3 + jj = k2 + i + do ii = j + i - 1, j + jb - 1 + ip = ipiv( ii ) + if( ip/=ii ) then + temp = ab( kv+1+ii-jj, jj ) + ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj ) + ab( kv+1+ip-jj, jj ) = temp + end if + end do + end do + ! update the relevant part of the trailing submatrix + if( j2>0 ) then + ! update a12 + call stdlib_ctrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j2, cone, & + ab( kv+1, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1 ) + if( i2>0 ) then + ! update a22 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j2,jb, -cone, ab(& + kv+1+jb, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1, cone,ab( kv+1, j+jb )& + , ldab-1 ) + end if + if( i3>0 ) then + ! update a32 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j2,jb, -cone, & + work31, ldwork,ab( kv+1-jb, j+jb ), ldab-1, cone,ab( kv+kl+1-jb, j+jb ),& + ldab-1 ) + end if + end if + if( j3>0 ) then + ! copy the lower triangle of a13 into the work array + ! work13 + do jj = 1, j3 + do ii = jj, jb + work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 ) + end do + end do + ! update a13 in the work array + call stdlib_ctrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j3, cone, & + ab( kv+1, j ), ldab-1,work13, ldwork ) + if( i2>0 ) then + ! update a23 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j3,jb, -cone, ab(& + kv+1+jb, j ), ldab-1,work13, ldwork, cone, ab( 1+jb, j+kv ),ldab-1 ) + + end if + if( i3>0 ) then + ! update a33 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j3,jb, -cone, & + work31, ldwork, work13,ldwork, cone, ab( 1+kl, j+kv ), ldab-1 ) + end if + ! copy the lower triangle of a13 back into place + do jj = 1, j3 + do ii = jj, jb + ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj ) + end do + end do + end if + else + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + end if + ! partially undo the interchanges in the current block to + ! restore the upper triangular form of a31 and copy the upper + ! triangle of a31 back into place + do jj = j + jb - 1, j, -1 + jp = ipiv( jj ) - jj + 1 + if( jp/=1 ) then + ! apply interchange to columns j to jj-1 + if( jp+jj-10 )call stdlib_ccopy( nw, work31( 1, jj-j+1 ), 1,ab( kv+kl+1-jj+j, jj )& + , 1 ) + end do + end do loop_180 + end if + return + end subroutine stdlib_cgbtrf + + !> CGBTRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general band matrix A using the LU factorization computed + !> by CGBTRF. + + pure subroutine stdlib_cgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, notran + integer(ilp) :: i, j, kd, l, lm + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab<( 2*kl+ku+1 ) ) then + info = -7 + else if( ldb0 + if( notran ) then + ! solve a*x = b. + ! solve l*x = b, overwriting b with x. + ! l is represented as a product of permutations and unit lower + ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1), + ! where each transformation l(i) is a rank-cone modification of + ! the identity matrix. + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + l = ipiv( j ) + if( l/=j )call stdlib_cswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + call stdlib_cgeru( lm, nrhs, -cone, ab( kd+1, j ), 1, b( j, 1 ),ldb, b( j+1, & + 1 ), ldb ) + end do + end if + do i = 1, nrhs + ! solve u*x = b, overwriting b with x. + call stdlib_ctbsv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, kl+ku,ab, ldab, b( 1, & + i ), 1 ) + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! solve a**t * x = b. + do i = 1, nrhs + ! solve u**t * x = b, overwriting b with x. + call stdlib_ctbsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, kl+ku, ab,ldab, b( 1, i )& + , 1 ) + end do + ! solve l**t * x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_cgemv( 'TRANSPOSE', lm, nrhs, -cone, b( j+1, 1 ),ldb, ab( kd+1, j & + ), 1, cone, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_cswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + else + ! solve a**h * x = b. + do i = 1, nrhs + ! solve u**h * x = b, overwriting b with x. + call stdlib_ctbsv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT', n,kl+ku, ab, ldab,& + b( 1, i ), 1 ) + end do + ! solve l**h * x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_clacgv( nrhs, b( j, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', lm, nrhs, -cone,b( j+1, 1 ), ldb, & + ab( kd+1, j ), 1, cone,b( j, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_cswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_cgbtrs + + !> CGEBD2: reduces a complex general m by n matrix A to upper or lower + !> real bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_cgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! reduce to upper bidiagonal form + do i = 1, n + ! generate elementary reflector h(i) to annihilate a(i+1:m,i) + alpha = a( i, i ) + call stdlib_clarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1,tauq( i ) ) + d( i ) = real( alpha,KIND=sp) + a( i, i ) = cone + ! apply h(i)**h to a(i:m,i+1:n) from the left + if( i CGECON: estimates the reciprocal of the condition number of a general + !> complex matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by CGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_cgecon( norm, n, a, lda, anorm, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, scale, sl, smlnum, su + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGEHD2: reduces a complex general matrix A to upper Hessenberg form H + !> by a unitary similarity transformation: Q**H * A * Q = H . + + pure subroutine stdlib_cgehd2( n, ilo, ihi, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda CGELQ2: computes an LQ factorization of a complex m-by-n matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a n-by-n orthogonal matrix; + !> L is a lower-triangular m-by-m matrix; + !> 0 is a m-by-(n-m) zero matrix, if m < n. + + pure subroutine stdlib_cgelq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGELQF: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_cgelqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'CGELQF', ' ', m, n, -1, -1 ) + lwkopt = m*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb CGELQT3: recursively computes a LQ factorization of a complex M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_cgelqt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, m1, m2, iinfo + ! Executable Statements + info = 0 + if( m < 0 ) then + info = -1 + else if( n < m ) then + info = -2 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, m ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CGELQT3', -info ) + return + end if + if( m==1 ) then + ! compute householder transform when m=1 + call stdlib_clarfg( n, a(1,1), a( 1, min( 2, n ) ), lda, t(1,1) ) + t(1,1)=conjg(t(1,1)) + else + ! otherwise, split a into blocks... + m1 = m/2 + m2 = m-m1 + i1 = min( m1+1, m ) + j1 = min( m+1, n ) + ! compute a(1:m1,1:n) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_cgelqt3( m1, n, a, lda, t, ldt, iinfo ) + ! compute a(j1:m,1:n) = a(j1:m,1:n) q1^h [workspace: t(1:n1,j1:n)] + do i=1,m2 + do j=1,m1 + t( i+m1, j ) = a( i+m1, j ) + end do + end do + call stdlib_ctrmm( 'R', 'U', 'C', 'U', m2, m1, cone,a, lda, t( i1, 1 ), ldt ) + + call stdlib_cgemm( 'N', 'C', m2, m1, n-m1, cone, a( i1, i1 ), lda,a( 1, i1 ), lda, & + cone, t( i1, 1 ), ldt) + call stdlib_ctrmm( 'R', 'U', 'N', 'N', m2, m1, cone,t, ldt, t( i1, 1 ), ldt ) + + call stdlib_cgemm( 'N', 'N', m2, n-m1, m1, -cone, t( i1, 1 ), ldt,a( 1, i1 ), lda, & + cone, a( i1, i1 ), lda ) + call stdlib_ctrmm( 'R', 'U', 'N', 'U', m2, m1 , cone,a, lda, t( i1, 1 ), ldt ) + + do i=1,m2 + do j=1,m1 + a( i+m1, j ) = a( i+m1, j ) - t( i+m1, j ) + t( i+m1, j )= czero + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_cgelqt3( m2, n-m1, a( i1, i1 ), lda,t( i1, i1 ), ldt, iinfo ) + ! compute t3 = t(j1:n1,1:n) = -t1 y1^h y2 t2 + do i=1,m2 + do j=1,m1 + t( j, i+m1 ) = (a( j, i+m1 )) + end do + end do + call stdlib_ctrmm( 'R', 'U', 'C', 'U', m1, m2, cone,a( i1, i1 ), lda, t( 1, i1 ), & + ldt ) + call stdlib_cgemm( 'N', 'C', m1, m2, n-m, cone, a( 1, j1 ), lda,a( i1, j1 ), lda, & + cone, t( 1, i1 ), ldt ) + call stdlib_ctrmm( 'L', 'U', 'N', 'N', m1, m2, -cone, t, ldt,t( 1, i1 ), ldt ) + + call stdlib_ctrmm( 'R', 'U', 'N', 'N', m1, m2, cone,t( i1, i1 ), ldt, t( 1, i1 ), & + ldt ) + ! y = (y1,y2); l = [ l1 0 ]; t = [t1 t3] + ! [ a(1:n1,j1:n) l2 ] [ 0 t2] + end if + return + end subroutine stdlib_cgelqt3 + + !> CGEMLQT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex unitary matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by CGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_cgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, mb, ldt + ! Array Arguments + complex(sp), intent(in) :: v(ldv,*), t(ldt,*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( mb<1 .or. (mb>k .and. k>0)) then + info = -6 + else if( ldv CGEMQRT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by CGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_cgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, nb, ldt + ! Array Arguments + complex(sp), intent(in) :: v(ldv,*), t(ldt,*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( nb<1 .or. (nb>k .and. k>0)) then + info = -6 + else if( ldv CGEQL2: computes a QL factorization of a complex m by n matrix A: + !> A = Q * L. + + pure subroutine stdlib_cgeql2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGEQLF: computes a QL factorization of a complex M-by-N matrix A: + !> A = Q * L. + + pure subroutine stdlib_cgeqlf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h**h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_clarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'BACKWARD','COLUMNWISE', m-& + k+i+ib-1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), & + ldwork ) + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_cgeql2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_cgeqlf + + !> CGEQR2: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + pure subroutine stdlib_cgeqr2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGEQR2P: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + subroutine stdlib_cgeqr2p( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGEQRF: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_cgeqrf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + k = min( m, n ) + info = 0 + nb = stdlib_ilaenv( 1, 'CGEQRF', ' ', m, n, -1, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb CGEQR2P computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + subroutine stdlib_cgeqrfp( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'CGEQRF', ' ', m, n, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb CGEQRT2: computes a QR factorization of a complex M-by-N matrix A, + !> using the compact WY representation of Q. + + pure subroutine stdlib_cgeqrt2( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(sp) :: aii, alpha + ! Executable Statements + ! test the input arguments + info = 0 + if( n<0 ) then + info = -2 + else if( m t(i,1) + call stdlib_clarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,t( i, 1 ) ) + if( i CGEQRT3: recursively computes a QR factorization of a complex M-by-N matrix A, + !> using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_cgeqrt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, n1, n2, iinfo + ! Executable Statements + info = 0 + if( n < 0 ) then + info = -2 + else if( m < n ) then + info = -1 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, n ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CGEQRT3', -info ) + return + end if + if( n==1 ) then + ! compute householder transform when n=1 + call stdlib_clarfg( m, a(1,1), a( min( 2, m ), 1 ), 1, t(1,1) ) + else + ! otherwise, split a into blocks... + n1 = n/2 + n2 = n-n1 + j1 = min( n1+1, n ) + i1 = min( n+1, m ) + ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1**h + call stdlib_cgeqrt3( m, n1, a, lda, t, ldt, iinfo ) + ! compute a(1:m,j1:n) = q1**h a(1:m,j1:n) [workspace: t(1:n1,j1:n)] + do j=1,n2 + do i=1,n1 + t( i, j+n1 ) = a( i, j+n1 ) + end do + end do + call stdlib_ctrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1, j1 ), ldt ) + + call stdlib_cgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1 ), lda,a( j1, j1 ), lda, & + cone, t( 1, j1 ), ldt) + call stdlib_ctrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1, j1 ), ldt ) + + call stdlib_cgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1 ), lda,t( 1, j1 ), ldt, & + cone, a( j1, j1 ), lda ) + call stdlib_ctrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1, j1 ), ldt ) + + do j=1,n2 + do i=1,n1 + a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 ) + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2**h + call stdlib_cgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo ) + ! compute t3 = t(1:n1,j1:n) = -t1 y1**h y2 t2 + do i=1,n1 + do j=1,n2 + t( i, j+n1 ) = conjg(a( j+n1, i )) + end do + end do + call stdlib_ctrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1, j1 ), & + ldt ) + call stdlib_cgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1 ), lda,a( i1, j1 ), lda, & + cone, t( 1, j1 ), ldt ) + call stdlib_ctrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1, j1 ), ldt ) + + call stdlib_ctrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1, j1 ), & + ldt ) + ! y = (y1,y2); r = [ r1 a(1:n1,j1:n) ]; t = [t1 t3] + ! [ 0 r2 ] [ 0 t2] + end if + return + end subroutine stdlib_cgeqrt3 + + !> CGERQ2: computes an RQ factorization of a complex m by n matrix A: + !> A = R * Q. + + pure subroutine stdlib_cgerq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CGERQF: computes an RQ factorization of a complex M-by-N matrix A: + !> A = R * Q. + + pure subroutine stdlib_cgerqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_clarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_clarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-& + k+i+ib-1, ib,a( m-k+i, 1 ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_cgerq2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_cgerqf + + !> CGESC2: solves a system of linear equations + !> A * X = scale* RHS + !> with a general N-by-N matrix A using the LU factorization with + !> complete pivoting computed by CGETC2. + + pure subroutine stdlib_cgesc2( n, a, lda, rhs, ipiv, jpiv, scale ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: rhs(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: bignum, eps, smlnum + complex(sp) :: temp + ! Intrinsic Functions + intrinsic :: abs,cmplx,real + ! Executable Statements + ! set constant to control overflow + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! apply permutations ipiv to rhs + call stdlib_claswp( 1, rhs, lda, 1, n-1, ipiv, 1 ) + ! solve for l part + do i = 1, n - 1 + do j = i + 1, n + rhs( j ) = rhs( j ) - a( j, i )*rhs( i ) + end do + end do + ! solve for u part + scale = one + ! check for scaling + i = stdlib_icamax( n, rhs, 1 ) + if( two*smlnum*abs( rhs( i ) )>abs( a( n, n ) ) ) then + temp = cmplx( one / two, zero,KIND=sp) / abs( rhs( i ) ) + call stdlib_cscal( n, temp, rhs( 1 ), 1 ) + scale = scale*real( temp,KIND=sp) + end if + do i = n, 1, -1 + temp = cmplx( one, zero,KIND=sp) / a( i, i ) + rhs( i ) = rhs( i )*temp + do j = i + 1, n + rhs( i ) = rhs( i ) - rhs( j )*( a( i, j )*temp ) + end do + end do + ! apply permutations jpiv to the solution (rhs) + call stdlib_claswp( 1, rhs, lda, 1, n-1, jpiv, -1 ) + return + end subroutine stdlib_cgesc2 + + !> CGETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + + pure recursive subroutine stdlib_cgetrf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: sfmin + complex(sp) :: temp + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_cscal( m-1, cone / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 1, m-1 + a( 1+i, 1 ) = a( 1+i, 1 ) / a( 1, 1 ) + end do + end if + else + info = 1 + end if + else + ! use recursive code + n1 = min( m, n ) / 2 + n2 = n-n1 + ! [ a11 ] + ! factor [ --- ] + ! [ a21 ] + call stdlib_cgetrf2( m, n1, a, lda, ipiv, iinfo ) + if ( info==0 .and. iinfo>0 )info = iinfo + ! [ a12 ] + ! apply interchanges to [ --- ] + ! [ a22 ] + call stdlib_claswp( n2, a( 1, n1+1 ), lda, 1, n1, ipiv, 1 ) + ! solve a12 + call stdlib_ctrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1, n1+1 ), lda ) + + ! update a22 + call stdlib_cgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, cone, a( n1+1, n1+1 ), lda ) + ! factor a22 + call stdlib_cgetrf2( m-n1, n2, a( n1+1, n1+1 ), lda, ipiv( n1+1 ),iinfo ) + ! adjust info and the pivot indices + if ( info==0 .and. iinfo>0 )info = iinfo + n1 + do i = n1+1, min( m, n ) + ipiv( i ) = ipiv( i ) + n1 + end do + ! apply interchanges to a21 + call stdlib_claswp( n1, a( 1, 1 ), lda, n1+1, min( m, n), ipiv, 1 ) + end if + return + end subroutine stdlib_cgetrf2 + + !> CGETRI: computes the inverse of a matrix using the LU factorization + !> computed by CGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + + pure subroutine stdlib_cgetri( n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iws, j, jb, jj, jp, ldwork, lwkopt, nb, nbmin, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'CGETRI', ' ', n, -1, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( lda 0 from stdlib_ctrtri, then u is singular, + ! and the inverse is not computed. + call stdlib_ctrtri( 'UPPER', 'NON-UNIT', n, a, lda, info ) + if( info>0 )return + nbmin = 2 + ldwork = n + if( nb>1 .and. nb=n ) then + ! use unblocked code. + do j = n, 1, -1 + ! copy current column of l to work and replace with zeros. + do i = j + 1, n + work( i ) = a( i, j ) + a( i, j ) = czero + end do + ! compute current column of inv(a). + if( j CGETRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by CGETRF. + + pure subroutine stdlib_cgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CGGHRD: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the generalized + !> eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then CGGHRD reduces the original + !> problem to generalized Hessenberg form. + + pure subroutine stdlib_cgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilq, ilz + integer(ilp) :: icompq, icompz, jcol, jrow + real(sp) :: c + complex(sp) :: ctemp, s + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! decode compq + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + ! decode compz + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! test the input parameters. + info = 0 + if( icompq<=0 ) then + info = -1 + else if( icompz<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi CGGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, + !> and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**H * (inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z' denotes the + !> conjugate transpose of matrix Z. + + pure subroutine stdlib_cggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'CGEQRF', ' ', n, m, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'CGERQF', ' ', n, p, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'CUNMQR', ' ', n, m, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p)*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( p<0 ) then + info = -3 + else if( lda CGGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**H + !> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the + !> conjugate transpose of the matrix Z. + + pure subroutine stdlib_cggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'CGERQF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'CGEQRF', ' ', p, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'CUNMRQ', ' ', m, n, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p)*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( p<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda CGTTRS: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by CGTTRF. + + pure subroutine stdlib_cgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: itrans, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + notran = ( trans=='N' .or. trans=='N' ) + if( .not.notran .and. .not.( trans=='T' .or. trans=='T' ) .and. .not.( trans=='C' .or. & + trans=='C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_cgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_cgtts2( itrans, n, jb, dl, d, du, du2, ipiv, b( 1, j ),ldb ) + end do + end if + end subroutine stdlib_cgttrs + + !> CHB2ST_KERNELS: is an internal routine used by the CHETRD_HB2ST + !> subroutine. + + pure subroutine stdlib_chb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, lda, & + v, tau, ldvt, work) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype, st, ed, sweep, n, nb, ib, lda, ldvt + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: v(*), tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j1, j2, lm, ln, vpos, taupos, dpos, ofdpos, ajeter + complex(sp) :: ctmp + ! Intrinsic Functions + intrinsic :: conjg,mod + ! Executable Statements + ajeter = ib + ldvt + upper = stdlib_lsame( uplo, 'U' ) + if( upper ) then + dpos = 2 * nb + 1 + ofdpos = 2 * nb + else + dpos = 1 + ofdpos = 2 + endif + ! upper case + if( upper ) then + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + else + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + endif + if( ttype==1 ) then + lm = ed - st + 1 + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = conjg( a( ofdpos-i, st+i ) ) + a( ofdpos-i, st+i ) = czero + end do + ctmp = conjg( a( ofdpos, st ) ) + call stdlib_clarfg( lm, ctmp, v( vpos+1 ), 1,tau( taupos ) ) + a( ofdpos, st ) = ctmp + lm = ed - st + 1 + call stdlib_clarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==3 ) then + lm = ed - st + 1 + call stdlib_clarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==2 ) then + j1 = ed+1 + j2 = min( ed+nb, n ) + ln = ed-st+1 + lm = j2-j1+1 + if( lm>0) then + call stdlib_clarfx( 'LEFT', ln, lm, v( vpos ),conjg( tau( taupos ) ),a( & + dpos-nb, j1 ), lda-1, work) + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + else + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + endif + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) =conjg( a( dpos-nb-i, j1+i ) ) + a( dpos-nb-i, j1+i ) = czero + end do + ctmp = conjg( a( dpos-nb, j1 ) ) + call stdlib_clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) ) + a( dpos-nb, j1 ) = ctmp + call stdlib_clarfx( 'RIGHT', ln-1, lm, v( vpos ),tau( taupos ),a( dpos-nb+& + 1, j1 ), lda-1, work) + endif + endif + ! lower case + else + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + else + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + endif + if( ttype==1 ) then + lm = ed - st + 1 + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = a( ofdpos+i, st-1 ) + a( ofdpos+i, st-1 ) = czero + end do + call stdlib_clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,tau( taupos ) ) + + lm = ed - st + 1 + call stdlib_clarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==3 ) then + lm = ed - st + 1 + call stdlib_clarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==2 ) then + j1 = ed+1 + j2 = min( ed+nb, n ) + ln = ed-st+1 + lm = j2-j1+1 + if( lm>0) then + call stdlib_clarfx( 'RIGHT', lm, ln, v( vpos ),tau( taupos ), a( dpos+nb, & + st ),lda-1, work) + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + else + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + endif + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = a( dpos+nb+i, st ) + a( dpos+nb+i, st ) = czero + end do + call stdlib_clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,tau( taupos ) ) + + call stdlib_clarfx( 'LEFT', lm, ln-1, v( vpos ),conjg( tau( taupos ) ),a( & + dpos+nb-1, st+1 ), lda-1, work) + endif + endif + endif + return + end subroutine stdlib_chb2st_kernels + + !> CHEEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_cheequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(sp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,int,log,max,min,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'CHEEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_sp / s( j ) + end do + tol = one / sqrt( 2.0_sp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + real( s( i )*work( i ),KIND=sp) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_classq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = cabs1( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = real( ( n-2 ) * ( work( i ) - t*si ),KIND=sp) + c0 = real( -(t*si)*si + 2*work( i )*si - n*avg,KIND=sp) + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + real( ( u + work( i ) ) * d / n,KIND=sp) + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_slamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_slamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_cheequb + + !> CHEGS2: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. + !> B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. + + pure subroutine stdlib_chegs2( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k + real(sp) :: akk, bkk + complex(sp) :: ct + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda CHEGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by CPOTRF. + + pure subroutine stdlib_chegst( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_chegs2( itype, uplo, n, a, lda, b, ldb, info ) + else + ! use blocked code + if( itype==1 ) then + if( upper ) then + ! compute inv(u**h)*a*inv(u) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(k:n,k:n) + call stdlib_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_ctrsm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, & + n-k-kb+1, cone,b( k, k ), ldb, a( k, k+kb ), lda ) + call stdlib_chemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(& + k, k+kb ), ldb,cone, a( k, k+kb ), lda ) + call stdlib_cher2k( uplo, 'CONJUGATE TRANSPOSE', n-k-kb+1,kb, -cone, a( & + k, k+kb ), lda,b( k, k+kb ), ldb, one,a( k+kb, k+kb ), lda ) + call stdlib_chemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(& + k, k+kb ), ldb,cone, a( k, k+kb ), lda ) + call stdlib_ctrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+& + 1, cone,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda ) + end if + end do + else + ! compute inv(l)*a*inv(l**h) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(k:n,k:n) + call stdlib_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_ctrsm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', n-k-& + kb+1, kb, cone,b( k, k ), ldb, a( k+kb, k ), lda ) + call stdlib_chemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, & + b( k+kb, k ), ldb,cone, a( k+kb, k ), lda ) + call stdlib_cher2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-cone, a( k+kb, & + k ), lda,b( k+kb, k ), ldb, one,a( k+kb, k+kb ), lda ) + call stdlib_chemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, & + b( k+kb, k ), ldb,cone, a( k+kb, k ), lda ) + call stdlib_ctrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, & + kb, cone,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda ) + end if + end do + end if + else + if( upper ) then + ! compute u*a*u**h + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_ctrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, cone, & + b, ldb, a( 1, k ), lda ) + call stdlib_chemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1, k ),& + ldb, cone, a( 1, k ),lda ) + call stdlib_cher2k( uplo, 'NO TRANSPOSE', k-1, kb, cone,a( 1, k ), lda, b( & + 1, k ), ldb, one, a,lda ) + call stdlib_chemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1, k ),& + ldb, cone, a( 1, k ),lda ) + call stdlib_ctrmm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', k-1, & + kb, cone, b( k, k ), ldb,a( 1, k ), lda ) + call stdlib_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + else + ! compute l**h*a*l + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_ctrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, cone,& + b, ldb, a( k, 1 ), lda ) + call stdlib_chemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1 ), & + ldb, cone, a( k, 1 ),lda ) + call stdlib_cher2k( uplo, 'CONJUGATE TRANSPOSE', k-1, kb,cone, a( k, 1 ), & + lda, b( k, 1 ), ldb,one, a, lda ) + call stdlib_chemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1 ), & + ldb, cone, a( k, 1 ),lda ) + call stdlib_ctrmm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, k-1,& + cone, b( k, k ), ldb,a( k, 1 ), lda ) + call stdlib_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + end if + end if + end if + return + end subroutine stdlib_chegst + + !> CHETD2: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_chetd2( uplo, n, a, lda, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(sp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CHETRD: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_chetrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, iws, j, kk, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb CHETRD_HB2ST: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_chetrd_hb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, lhous, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: stage1, uplo, vect + integer(ilp), intent(in) :: n, kd, ldab, lhous, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: hous(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: rzero = 0.0e+0_sp + + + ! Local Scalars + logical(lk) :: lquery, wantq, upper, afters1 + integer(ilp) :: i, m, k, ib, sweepid, myid, shift, stt, st, ed, stind, edind, & + blklastind, colpt, thed, stepercol, grsiz, thgrsiz, thgrnb, thgrid, nbtiles, ttype, & + tid, nthreads, debug, abdpos, abofdpos, dpos, ofdpos, awpos, inda, indw, apos, sizea, & + lda, indv, indtau, sicev, sizetau, ldv, lhmin, lwmin + real(sp) :: abstmp + complex(sp) :: tmp + ! Intrinsic Functions + intrinsic :: min,max,ceiling,real + ! Executable Statements + ! determine the minimal workspace size required. + ! test the input parameters + debug = 0 + info = 0 + afters1 = stdlib_lsame( stage1, 'Y' ) + wantq = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) .or. ( lhous==-1 ) + ! determine the block size, the workspace size and the hous size. + ib = stdlib_ilaenv2stage( 2, 'CHETRD_HB2ST', vect, n, kd, -1, -1 ) + lhmin = stdlib_ilaenv2stage( 3, 'CHETRD_HB2ST', vect, n, kd, ib, -1 ) + lwmin = stdlib_ilaenv2stage( 4, 'CHETRD_HB2ST', vect, n, kd, ib, -1 ) + if( .not.afters1 .and. .not.stdlib_lsame( stage1, 'N' ) ) then + info = -1 + else if( .not.stdlib_lsame( vect, 'N' ) ) then + info = -2 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab<(kd+1) ) then + info = -7 + else if( lhoused ) exit + loop_120: do m = 1, stepercol + st = stt + loop_130: do sweepid = st, ed + loop_140: do k = 1, grsiz + myid = (i-sweepid)*(stepercol*grsiz)+ (m-1)*grsiz + k + if ( myid==1 ) then + ttype = 1 + else + ttype = mod( myid, 2 ) + 2 + endif + if( ttype==2 ) then + colpt = (myid/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + blklastind = colpt + else + colpt = ((myid+1)/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + if( ( stind>=edind-1 ).and.( edind==n ) ) then + blklastind = n + else + blklastind = 0 + endif + endif + ! call the kernel + !$ if( ttype/=1 ) then + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(in:WORK(MYID-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + !$ call stdlib_chb2st_kernels( uplo, wantq, ttype,stind, edind, & + !$ sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + !$ indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ else + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + call stdlib_chb2st_kernels( uplo, wantq, ttype,stind, edind, & + sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ endif + if ( blklastind>=(n-1) ) then + stt = stt + 1 + exit + endif + end do loop_140 + end do loop_130 + end do loop_120 + end do loop_110 + end do loop_100 + !$OMP END MASTER + !$OMP END PARALLEL + ! copy the diagonal from a to d. note that d is real thus only + ! the real part is needed, the imaginary part should be czero. + do i = 1, n + d( i ) = real( work( dpos+(i-1)*lda ),KIND=sp) + end do + ! copy the off diagonal from a to e. note that e is real thus only + ! the real part is needed, the imaginary part should be czero. + if( upper ) then + do i = 1, n-1 + e( i ) = real( work( ofdpos+i*lda ),KIND=sp) + end do + else + do i = 1, n-1 + e( i ) = real( work( ofdpos+(i-1)*lda ),KIND=sp) + end do + endif + hous( 1 ) = lhmin + work( 1 ) = lwmin + return + end subroutine stdlib_chetrd_hb2st + + !> CHETRD_HE2HB: reduces a complex Hermitian matrix A to complex Hermitian + !> band-diagonal form AB by a unitary similarity transformation: + !> Q**H * A * Q = AB. + + pure subroutine stdlib_chetrd_he2hb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldab, lwork, n, kd + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: ab(ldab,*), tau(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: rone = 1.0e+0_sp + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, j, iinfo, lwmin, pn, pk, lk, ldt, ldw, lds2, lds1, ls2, ls1, lw, lt,& + tpos, wpos, s2pos, s1pos + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + ! determine the minimal workspace size required + ! and test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + lwmin = stdlib_ilaenv2stage( 4, 'CHETRD_HE2HB', '', n, kd, -1, -1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( lda CHETRF: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_chetrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_clahef( uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_chetf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_clahef; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_clahef( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, n, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_chetf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_chetrf + + !> CHETRF_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_chetrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_clahef_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_chetf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_clahef_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_chetf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_cswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_chetrf_rk + + !> CHETRF_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_chetrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_clahef_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_chetf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_clahef_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_clahef_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_chetf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_chetrf_rook + + !> CHETRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF. + + pure subroutine stdlib_chetrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(sp) :: s + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=sp) / real( a( k, k ),KIND=sp) + call stdlib_csscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_cgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_cgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 )& + , 1, cone, b( k+1, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k CHETRS2: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF and converted by CSYCONV. + + pure subroutine stdlib_chetrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + real(sp) :: s + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_ctrsm('L','U','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + s = real( cone,KIND=sp) / real( a( i, i ),KIND=sp) + call stdlib_csscal( nrhs, s, b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / conjg( akm1k ) + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**h \ b) -> b [ u**h \ (d \ (u \p**t * b) ) ] + call stdlib_ctrsm('L','U','C','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (u**h \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**h. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_cswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ctrsm('L','L','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + s = real( cone,KIND=sp) / real( a( i, i ),KIND=sp) + call stdlib_csscal( nrhs, s, b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / conjg( akm1k ) + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i, j ) / conjg( akm1k ) + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**h \ b) -> b [ l**h \ (d \ (l \p**t * b) ) ] + call stdlib_ctrsm('L','L','C','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (l**h \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_csyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_chetrs2 + + !> CHETRS_AA: solves a system of linear equations A*X = B with a complex + !> hermitian matrix A using the factorization A = U**H*T*U or + !> A = L*T*L**H computed by CHETRF_AA. + + pure subroutine stdlib_chetrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + k = 1 + do while ( k<=n ) + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + end do + ! compute u**h \ b -> b [ (u**h \p**t * b) ] + call stdlib_ctrsm( 'L', 'U', 'C', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**h \p**t * b) ] + call stdlib_clacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_clacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1) + call stdlib_clacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1) + call stdlib_clacgv( n-1, work( 1 ), 1 ) + end if + call stdlib_cgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb,info) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**h \p**t * b) ) ] + call stdlib_ctrsm( 'L', 'U', 'N', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b(2, 1), & + ldb) + ! pivot, p * b -> b [ p * (u \ (t \ (u**h \p**t * b) )) ] + k = n + do while ( k>=1 ) + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k - 1 + end do + end if + else + ! solve a*x = b, where a = l*t*l**h. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + k = 1 + do while ( k<=n ) + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_ctrsm( 'L', 'L', 'N', 'U', n-1, nrhs, cone, a( 2, 1),lda, b(2, 1), & + ldb ) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_clacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_clacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1 ) + call stdlib_clacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1) + call stdlib_clacgv( n-1, work( 2*n ), 1 ) + end if + call stdlib_cgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb,info) + ! 3) backward substitution with l**h + if( n>1 ) then + ! compute (l**h \ b) -> b [ l**h \ (t \ (l \p**t * b) ) ] + call stdlib_ctrsm( 'L', 'L', 'C', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb ) + ! pivot, p * b -> b [ p * (l**h \ (t \ (l \p**t * b) )) ] + k = n + do while ( k>=1 ) + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k - 1 + end do + end if + end if + return + end subroutine stdlib_chetrs_aa + + !> CHETRS_ROOK: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF_ROOK. + + pure subroutine stdlib_chetrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(sp) :: s + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=sp) / real( a( k, k ),KIND=sp) + call stdlib_csscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k), then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1) + if( kp/=k-1 )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_cgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_cgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 )& + , 1, cone, b( k+1, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k), then k+1 and -ipiv(k+1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_cswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k CHPTRD: reduces a complex Hermitian matrix A stored in packed form to + !> real symmetric tridiagonal form T by a unitary similarity + !> transformation: Q**H * A * Q = T. + + pure subroutine stdlib_chptrd( uplo, n, ap, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, i1, i1i1, ii + complex(sp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CHPTRD', -info ) + return + end if + ! quick return if possible + if( n<=0 )return + if( upper ) then + ! reduce the upper triangle of a. + ! i1 is the index in ap of a(1,i+1). + i1 = n*( n-1 ) / 2 + 1 + ap( i1+n-1 ) = real( ap( i1+n-1 ),KIND=sp) + do i = n - 1, 1, -1 + ! generate elementary reflector h(i) = i - tau * v * v**h + ! to annihilate a(1:i-1,i+1) + alpha = ap( i1+i-1 ) + call stdlib_clarfg( i, alpha, ap( i1 ), 1, taui ) + e( i ) = real( alpha,KIND=sp) + if( taui/=czero ) then + ! apply h(i) from both sides to a(1:i,1:i) + ap( i1+i-1 ) = cone + ! compute y := tau * a * v storing y in tau(1:i) + call stdlib_chpmv( uplo, i, taui, ap, ap( i1 ), 1, czero, tau,1 ) + ! compute w := y - 1/2 * tau * (y**h *v) * v + alpha = -chalf*taui*stdlib_cdotc( i, tau, 1, ap( i1 ), 1 ) + call stdlib_caxpy( i, alpha, ap( i1 ), 1, tau, 1 ) + ! apply the transformation as a rank-2 update: + ! a := a - v * w**h - w * v**h + call stdlib_chpr2( uplo, i, -cone, ap( i1 ), 1, tau, 1, ap ) + end if + ap( i1+i-1 ) = e( i ) + d( i+1 ) = real( ap( i1+i ),KIND=sp) + tau( i ) = taui + i1 = i1 - i + end do + d( 1 ) = real( ap( 1 ),KIND=sp) + else + ! reduce the lower triangle of a. ii is the index in ap of + ! a(i,i) and i1i1 is the index of a(i+1,i+1). + ii = 1 + ap( 1 ) = real( ap( 1 ),KIND=sp) + do i = 1, n - 1 + i1i1 = ii + n - i + 1 + ! generate elementary reflector h(i) = i - tau * v * v**h + ! to annihilate a(i+2:n,i) + alpha = ap( ii+1 ) + call stdlib_clarfg( n-i, alpha, ap( ii+2 ), 1, taui ) + e( i ) = real( alpha,KIND=sp) + if( taui/=czero ) then + ! apply h(i) from both sides to a(i+1:n,i+1:n) + ap( ii+1 ) = cone + ! compute y := tau * a * v storing y in tau(i:n-1) + call stdlib_chpmv( uplo, n-i, taui, ap( i1i1 ), ap( ii+1 ), 1,czero, tau( i ),& + 1 ) + ! compute w := y - 1/2 * tau * (y**h *v) * v + alpha = -chalf*taui*stdlib_cdotc( n-i, tau( i ), 1, ap( ii+1 ),1 ) + call stdlib_caxpy( n-i, alpha, ap( ii+1 ), 1, tau( i ), 1 ) + ! apply the transformation as a rank-2 update: + ! a := a - v * w**h - w * v**h + call stdlib_chpr2( uplo, n-i, -cone, ap( ii+1 ), 1, tau( i ), 1,ap( i1i1 ) ) + + end if + ap( ii+1 ) = e( i ) + d( i ) = real( ap( ii ),KIND=sp) + tau( i ) = taui + ii = i1i1 + end do + d( n ) = real( ap( ii ),KIND=sp) + end if + return + end subroutine stdlib_chptrd + + !> CHPTRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A stored in packed format using the factorization + !> A = U*D*U**H or A = L*D*L**H computed by CHPTRF. + + pure subroutine stdlib_chptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + real(sp) :: s + complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_cgeru( k-1, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=sp) / real( ap( kc+k-1 ),KIND=sp) + call stdlib_csscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_cswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_cgeru( k-2, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_cgeru( k-2, nrhs, -cone, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, & + 1 ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc+k ),& + 1, cone, b( k+1, 1 ), ldb ) + call stdlib_clacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_cswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k CLA_GBRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + + real(sp) function stdlib_cla_gbrcond_c( trans, n, kl, ku, ab, ldab, afb,ldafb, ipiv, c, & + capply, info, work,rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, kl, ku, ldab, ldafb + integer(ilp) :: kd, ke + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: ab(ldab,*), afb(ldafb,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(sp) :: ainvnm, anorm, tmp + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_cla_gbrcond_c = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 .or. kl>n-1 ) then + info = -3 + else if( ku<0 .or. ku>n-1 ) then + info = -4 + else if( ldab CLA_GERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + + real(sp) function stdlib_cla_gercond_c( trans, n, a, lda, af, ldaf, ipiv, c,capply, info, & + work, rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(sp) :: ainvnm, anorm, tmp + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_cla_gercond_c = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CLA_HERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + + real(sp) function stdlib_cla_hercond_c( uplo, n, a, lda, af, ldaf, ipiv, c,capply, info, & + work, rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase, i, j + real(sp) :: ainvnm, anorm, tmp + logical(lk) :: up, upper + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_cla_hercond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CLA_HERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(sp) function stdlib_cla_herpvgrw( uplo, n, info, a, lda, af, ldaf, ipiv,work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(sp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper, lsame + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if (upper) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( cabs1( a( i,j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i,j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_csytrs. + ! calls to stdlib_sswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k-1 ) =max( cabs1( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k+1 ) =max( cabs1( af( i, k+1 ) ) , work( k+1 ) ) + end do + work(k) = max( cabs1( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_cla_herpvgrw = rpvgrw + end function stdlib_cla_herpvgrw + + !> CLA_PORCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector + + real(sp) function stdlib_cla_porcond_c( uplo, n, a, lda, af, ldaf, c, capply,info, work, & + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase + real(sp) :: ainvnm, anorm, tmp + integer(ilp) :: i, j + logical(lk) :: up, upper + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real,aimag + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_cla_porcond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CLA_SYRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a REAL vector. + + real(sp) function stdlib_cla_syrcond_c( uplo, n, a, lda, af, ldaf, ipiv, c,capply, info, & + work, rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(in) :: c(*) + real(sp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase + real(sp) :: ainvnm, anorm, tmp + integer(ilp) :: i, j + logical(lk) :: up, upper + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_cla_syrcond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CLA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(sp) function stdlib_cla_syrpvgrw( uplo, n, info, a, lda, af, ldaf, ipiv,work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), af(ldaf,*) + real(sp), intent(out) :: work(*) + integer(ilp), intent(in) :: ipiv(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(sp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper + complex(sp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if ( upper ) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_csytrs. + ! calls to stdlib_sswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k-1 ) =max( cabs1( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k+1 ) =max( cabs1( af( i, k+1 ) ), work( k+1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_cla_syrpvgrw = rpvgrw + end function stdlib_cla_syrpvgrw + + !> CLABRD: reduces the first NB rows and columns of a complex general + !> m by n matrix A to upper or lower real bidiagonal form by a unitary + !> transformation Q**H * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by CGEBRD + + pure subroutine stdlib_clabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldx, ldy, m, n, nb + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: taup(*), tauq(*), x(ldx,*), y(ldy,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( m>=n ) then + ! reduce to upper bidiagonal form + loop_10: do i = 1, nb + ! update a(i:m,i) + call stdlib_clacgv( i-1, y( i, 1 ), ldy ) + call stdlib_cgemv( 'NO TRANSPOSE', m-i+1, i-1, -cone, a( i, 1 ),lda, y( i, 1 ), & + ldy, cone, a( i, i ), 1 ) + call stdlib_clacgv( i-1, y( i, 1 ), ldy ) + call stdlib_cgemv( 'NO TRANSPOSE', m-i+1, i-1, -cone, x( i, 1 ),ldx, a( 1, i ), & + 1, cone, a( i, i ), 1 ) + ! generate reflection q(i) to annihilate a(i+1:m,i) + alpha = a( i, i ) + call stdlib_clarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1,tauq( i ) ) + d( i ) = real( alpha,KIND=sp) + if( i CLAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense or banded + !> Hermitian matrix that has been reduced to tridiagonal form. + !> T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) + !> where Z = Q**Hu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine SLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine SLAED4 (as called by SLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_claed7( n, cutpnt, qsiz, tlvls, curlvl, curpbm, d, q,ldq, rho, indxq, & + qstore, qptr, prmptr, perm,givptr, givcol, givnum, work, rwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, cutpnt, ldq, n, qsiz, tlvls + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + + integer(ilp), intent(out) :: indxq(*), iwork(*) + real(sp), intent(inout) :: d(*), givnum(2,*), qstore(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: q(ldq,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: coltyp, curr, i, idlmda, indx, indxc, indxp, iq, iw, iz, k, n1, n2, & + ptr + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + ! if( icompq<0 .or. icompq>1 ) then + ! info = -1 + ! else if( n<0 ) then + if( n<0 ) then + info = -1 + else if( min( 1, n )>cutpnt .or. n CLAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue W of a complex upper Hessenberg + !> matrix H. + + pure subroutine stdlib_claein( rightv, noinit, n, h, ldh, w, v, b, ldb, rwork,eps3, smlnum, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: noinit, rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldh, n + real(sp), intent(in) :: eps3, smlnum + complex(sp), intent(in) :: w + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: b(ldb,*) + complex(sp), intent(in) :: h(ldh,*) + complex(sp), intent(inout) :: v(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: tenth = 1.0e-1_sp + + + ! Local Scalars + character :: normin, trans + integer(ilp) :: i, ierr, its, j + real(sp) :: growto, nrmsml, rootn, rtemp, scale, vnorm + complex(sp) :: cdum, ei, ej, temp, x + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! growto is the threshold used in the acceptance test for an + ! eigenvector. + rootn = sqrt( real( n,KIND=sp) ) + growto = tenth / rootn + nrmsml = max( one, eps3*rootn )*smlnum + ! form b = h - w*i (except that the subdiagonal elements are not + ! stored). + do j = 1, n + do i = 1, j - 1 + b( i, j ) = h( i, j ) + end do + b( j, j ) = h( j, j ) - w + end do + if( noinit ) then + ! initialize v. + do i = 1, n + v( i ) = eps3 + end do + else + ! scale supplied initial vector. + vnorm = stdlib_scnrm2( n, v, 1 ) + call stdlib_csscal( n, ( eps3*rootn ) / max( vnorm, nrmsml ), v, 1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing czero + ! pivots by eps3. + do i = 1, n - 1 + ei = h( i+1, i ) + if( cabs1( b( i, i ) )=growto*scale )go to 120 + ! choose new orthogonal starting vector and try again. + rtemp = eps3 / ( rootn+one ) + v( 1 ) = eps3 + do i = 2, n + v( i ) = rtemp + end do + v( n-its+1 ) = v( n-its+1 ) - eps3*rootn + end do + ! failure to find eigenvector in n iterations. + info = 1 + 120 continue + ! normalize eigenvector. + i = stdlib_icamax( n, v, 1 ) + call stdlib_csscal( n, one / cabs1( v( i ) ), v, 1 ) + return + end subroutine stdlib_claein + + !> CLAGS2: computes 2-by-2 unitary matrices U, V and Q, such + !> that if ( UPPER ) then + !> U**H *A*Q = U**H *( A1 A2 )*Q = ( x 0 ) + !> ( 0 A3 ) ( x x ) + !> and + !> V**H*B*Q = V**H *( B1 B2 )*Q = ( x 0 ) + !> ( 0 B3 ) ( x x ) + !> or if ( .NOT.UPPER ) then + !> U**H *A*Q = U**H *( A1 0 )*Q = ( x x ) + !> ( A2 A3 ) ( 0 x ) + !> and + !> V**H *B*Q = V**H *( B1 0 )*Q = ( x x ) + !> ( B2 B3 ) ( 0 x ) + !> where + !> U = ( CSU SNU ), V = ( CSV SNV ), + !> ( -SNU**H CSU ) ( -SNV**H CSV ) + !> Q = ( CSQ SNQ ) + !> ( -SNQ**H CSQ ) + !> The rows of the transformed A and B are parallel. Moreover, if the + !> input 2-by-2 matrix A is not zero, then the transformed (1,1) entry + !> of A is not zero. If the input matrices A and B are both not zero, + !> then the transformed (2,2) element of B is not zero, except when the + !> first rows of input A and B are parallel and the second rows are + !> zero. + + pure subroutine stdlib_clags2( upper, a1, a2, a3, b1, b2, b3, csu, snu, csv,snv, csq, snq ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: upper + real(sp), intent(in) :: a1, a3, b1, b3 + real(sp), intent(out) :: csq, csu, csv + complex(sp), intent(in) :: a2, b2 + complex(sp), intent(out) :: snq, snu, snv + ! ===================================================================== + + ! Local Scalars + real(sp) :: a, aua11, aua12, aua21, aua22, avb11, avb12, avb21, avb22, csl, csr, d, fb,& + fc, s1, s2, snl, snr, ua11r, ua22r, vb11r, vb22r + complex(sp) :: b, c, d1, r, t, ua11, ua12, ua21, ua22, vb11, vb12, vb21, vb22 + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,real + ! Statement Functions + real(sp) :: abs1 + ! Statement Function Definitions + abs1( t ) = abs( real( t,KIND=sp) ) + abs( aimag( t ) ) + ! Executable Statements + if( upper ) then + ! input matrices a and b are upper triangular matrices + ! form matrix c = a*adj(b) = ( a b ) + ! ( 0 d ) + a = a1*b3 + d = a3*b1 + b = a2*b1 - a1*b2 + fb = abs( b ) + ! transform complex 2-by-2 matrix c to real matrix by unitary + ! diagonal matrix diag(1,d1). + d1 = one + if( fb/=zero )d1 = b / fb + ! the svd of real 2 by 2 triangular c + ! ( csl -snl )*( a b )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( 0 d ) ( -snr csr ) ( 0 t ) + call stdlib_slasv2( a, fb, d, s1, s2, snr, csr, snl, csl ) + if( abs( csl )>=abs( snl ) .or. abs( csr )>=abs( snr ) )then + ! compute the (1,1) and (1,2) elements of u**h *a and v**h *b, + ! and (1,2) element of |u|**h *|a| and |v|**h *|b|. + ua11r = csl*a1 + ua12 = csl*a2 + d1*snl*a3 + vb11r = csr*b1 + vb12 = csr*b2 + d1*snr*b3 + aua12 = abs( csl )*abs1( a2 ) + abs( snl )*abs( a3 ) + avb12 = abs( csr )*abs1( b2 ) + abs( snr )*abs( b3 ) + ! zero (1,2) elements of u**h *a and v**h *b + if( ( abs( ua11r )+abs1( ua12 ) )==zero ) then + call stdlib_clartg( -cmplx( vb11r,KIND=sp), conjg( vb12 ), csq, snq,r ) + + else if( ( abs( vb11r )+abs1( vb12 ) )==zero ) then + call stdlib_clartg( -cmplx( ua11r,KIND=sp), conjg( ua12 ), csq, snq,r ) + + else if( aua12 / ( abs( ua11r )+abs1( ua12 ) )<=avb12 /( abs( vb11r )+abs1( vb12 & + ) ) ) then + call stdlib_clartg( -cmplx( ua11r,KIND=sp), conjg( ua12 ), csq, snq,r ) + + else + call stdlib_clartg( -cmplx( vb11r,KIND=sp), conjg( vb12 ), csq, snq,r ) + + end if + csu = csl + snu = -d1*snl + csv = csr + snv = -d1*snr + else + ! compute the (2,1) and (2,2) elements of u**h *a and v**h *b, + ! and (2,2) element of |u|**h *|a| and |v|**h *|b|. + ua21 = -conjg( d1 )*snl*a1 + ua22 = -conjg( d1 )*snl*a2 + csl*a3 + vb21 = -conjg( d1 )*snr*b1 + vb22 = -conjg( d1 )*snr*b2 + csr*b3 + aua22 = abs( snl )*abs1( a2 ) + abs( csl )*abs( a3 ) + avb22 = abs( snr )*abs1( b2 ) + abs( csr )*abs( b3 ) + ! zero (2,2) elements of u**h *a and v**h *b, and then swap. + if( ( abs1( ua21 )+abs1( ua22 ) )==zero ) then + call stdlib_clartg( -conjg( vb21 ), conjg( vb22 ), csq, snq, r ) + else if( ( abs1( vb21 )+abs( vb22 ) )==zero ) then + call stdlib_clartg( -conjg( ua21 ), conjg( ua22 ), csq, snq, r ) + else if( aua22 / ( abs1( ua21 )+abs1( ua22 ) )<=avb22 /( abs1( vb21 )+abs1( vb22 & + ) ) ) then + call stdlib_clartg( -conjg( ua21 ), conjg( ua22 ), csq, snq, r ) + else + call stdlib_clartg( -conjg( vb21 ), conjg( vb22 ), csq, snq, r ) + end if + csu = snl + snu = d1*csl + csv = snr + snv = d1*csr + end if + else + ! input matrices a and b are lower triangular matrices + ! form matrix c = a*adj(b) = ( a 0 ) + ! ( c d ) + a = a1*b3 + d = a3*b1 + c = a2*b3 - a3*b2 + fc = abs( c ) + ! transform complex 2-by-2 matrix c to real matrix by unitary + ! diagonal matrix diag(d1,1). + d1 = one + if( fc/=zero )d1 = c / fc + ! the svd of real 2 by 2 triangular c + ! ( csl -snl )*( a 0 )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( c d ) ( -snr csr ) ( 0 t ) + call stdlib_slasv2( a, fc, d, s1, s2, snr, csr, snl, csl ) + if( abs( csr )>=abs( snr ) .or. abs( csl )>=abs( snl ) )then + ! compute the (2,1) and (2,2) elements of u**h *a and v**h *b, + ! and (2,1) element of |u|**h *|a| and |v|**h *|b|. + ua21 = -d1*snr*a1 + csr*a2 + ua22r = csr*a3 + vb21 = -d1*snl*b1 + csl*b2 + vb22r = csl*b3 + aua21 = abs( snr )*abs( a1 ) + abs( csr )*abs1( a2 ) + avb21 = abs( snl )*abs( b1 ) + abs( csl )*abs1( b2 ) + ! zero (2,1) elements of u**h *a and v**h *b. + if( ( abs1( ua21 )+abs( ua22r ) )==zero ) then + call stdlib_clartg( cmplx( vb22r,KIND=sp), vb21, csq, snq, r ) + else if( ( abs1( vb21 )+abs( vb22r ) )==zero ) then + call stdlib_clartg( cmplx( ua22r,KIND=sp), ua21, csq, snq, r ) + else if( aua21 / ( abs1( ua21 )+abs( ua22r ) )<=avb21 /( abs1( vb21 )+abs( vb22r & + ) ) ) then + call stdlib_clartg( cmplx( ua22r,KIND=sp), ua21, csq, snq, r ) + else + call stdlib_clartg( cmplx( vb22r,KIND=sp), vb21, csq, snq, r ) + end if + csu = csr + snu = -conjg( d1 )*snr + csv = csl + snv = -conjg( d1 )*snl + else + ! compute the (1,1) and (1,2) elements of u**h *a and v**h *b, + ! and (1,1) element of |u|**h *|a| and |v|**h *|b|. + ua11 = csr*a1 + conjg( d1 )*snr*a2 + ua12 = conjg( d1 )*snr*a3 + vb11 = csl*b1 + conjg( d1 )*snl*b2 + vb12 = conjg( d1 )*snl*b3 + aua11 = abs( csr )*abs( a1 ) + abs( snr )*abs1( a2 ) + avb11 = abs( csl )*abs( b1 ) + abs( snl )*abs1( b2 ) + ! zero (1,1) elements of u**h *a and v**h *b, and then swap. + if( ( abs1( ua11 )+abs1( ua12 ) )==zero ) then + call stdlib_clartg( vb12, vb11, csq, snq, r ) + else if( ( abs1( vb11 )+abs1( vb12 ) )==zero ) then + call stdlib_clartg( ua12, ua11, csq, snq, r ) + else if( aua11 / ( abs1( ua11 )+abs1( ua12 ) )<=avb11 /( abs1( vb11 )+abs1( vb12 & + ) ) ) then + call stdlib_clartg( ua12, ua11, csq, snq, r ) + else + call stdlib_clartg( vb12, vb11, csq, snq, r ) + end if + csu = snr + snu = conjg( d1 )*csr + csv = snl + snv = conjg( d1 )*csl + end if + end if + return + end subroutine stdlib_clags2 + + !> CLAHQR: is an auxiliary routine called by CHSEQR to update the + !> eigenvalues and Schur decomposition already computed by CHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + + pure subroutine stdlib_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info & + ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(sp), intent(out) :: w(*) + ! ========================================================= + ! Parameters + real(sp), parameter :: rzero = 0.0_sp + real(sp), parameter :: rone = 1.0_sp + real(sp), parameter :: dat1 = 3.0_sp/4.0_sp + integer(ilp), parameter :: kexsh = 10 + + + + + ! Local Scalars + complex(sp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y + real(sp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, & + ulp + integer(ilp) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl + ! Local Arrays + complex(sp) :: v(2) + ! Statement Functions + real(sp) :: cabs1 + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + if( ilo==ihi ) then + w( ilo ) = h( ilo, ilo ) + return + end if + ! ==== clear out the trash ==== + do j = ilo, ihi - 3 + h( j+2, j ) = czero + h( j+3, j ) = czero + end do + if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero + ! ==== ensure that subdiagonal entries are real ==== + if( wantt ) then + jlo = 1 + jhi = n + else + jlo = ilo + jhi = ihi + end if + do i = ilo + 1, ihi + if( aimag( h( i, i-1 ) )/=rzero ) then + ! ==== the following redundant normalization + ! . avoids problems with both gradual and + ! . sudden underflow in abs(h(i,i-1)) ==== + sc = h( i, i-1 ) / cabs1( h( i, i-1 ) ) + sc = conjg( sc ) / abs( sc ) + h( i, i-1 ) = abs( h( i, i-1 ) ) + call stdlib_cscal( jhi-i+1, sc, h( i, i ), ldh ) + call stdlib_cscal( min( jhi, i+1 )-jlo+1, conjg( sc ), h( jlo, i ),1 ) + if( wantz )call stdlib_cscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1 ) + end if + end do + nh = ihi - ilo + 1 + nz = ihiz - iloz + 1 + ! set machine-dependent constants for the stopping criterion. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( nh,KIND=sp) / ulp ) + ! i1 and i2 are the indices of the first row and last column of h + ! to which transformations must be applied. if eigenvalues only are + ! being computed, i1 and i2 are set inside the main loop. + if( wantt ) then + i1 = 1 + i2 = n + end if + ! itmax is the total number of qr iterations allowed. + itmax = 30 * max( 10, nh ) + ! kdefl counts the number of iterations since a deflation + kdefl = 0 + ! the main loop begins here. i is the loop index and decreases from + ! ihi to ilo in steps of 1. each iteration of the loop works + ! with the active submatrix in rows and columns l to i. + ! eigenvalues i+1 to ihi have already converged. either l = ilo, or + ! h(l,l-1) is negligible so that the matrix splits. + i = ihi + 30 continue + if( i=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=sp) ) + if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=sp) ) + end if + ! ==== the following is a conservative small subdiagonal + ! . deflation criterion due to ahues + ! . 1997). it has better mathematical foundation and + ! . improves accuracy in some examples. ==== + if( abs( real( h( k, k-1 ),KIND=sp) )<=ulp*tst ) then + ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) + ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) + aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) + bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) + s = aa + ab + if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50 + end if + end do + 50 continue + l = k + if( l>ilo ) then + ! h(l,l-1) is negligible + h( l, l-1 ) = czero + end if + ! exit from loop if a submatrix of order 1 has split off. + if( l>=i )go to 140 + kdefl = kdefl + 1 + ! now the active submatrix is in rows and columns l to i. if + ! eigenvalues only are being computed, only the active submatrix + ! need be transformed. + if( .not.wantt ) then + i1 = l + i2 = i + end if + if( mod(kdefl,2*kexsh)==0 ) then + ! exceptional shift. + s = dat1*abs( real( h( i, i-1 ),KIND=sp) ) + t = s + h( i, i ) + else if( mod(kdefl,kexsh)==0 ) then + ! exceptional shift. + s = dat1*abs( real( h( l+1, l ),KIND=sp) ) + t = s + h( l, l ) + else + ! wilkinson's shift. + t = h( i, i ) + u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) ) + s = cabs1( u ) + if( s/=rzero ) then + x = half*( h( i-1, i-1 )-t ) + sx = cabs1( x ) + s = max( s, cabs1( x ) ) + y = s*sqrt( ( x / s )**2+( u / s )**2 ) + if( sx>rzero ) then + if( real( x / sx,KIND=sp)*real( y,KIND=sp)+aimag( x / sx )*aimag( y )& + m )call stdlib_ccopy( 2, h( k, k-1 ), 1, v, 1 ) + call stdlib_clarfg( 2, v( 1 ), v( 2 ), 1, t1 ) + if( k>m ) then + h( k, k-1 ) = v( 1 ) + h( k+1, k-1 ) = czero + end if + v2 = v( 2 ) + t2 = real( t1*v2,KIND=sp) + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j ) + h( k, j ) = h( k, j ) - sum + h( k+1, j ) = h( k+1, j ) - sum*v2 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+2,i). + do j = i1, min( k+2, i ) + sum = t1*h( j, k ) + t2*h( j, k+1 ) + h( j, k ) = h( j, k ) - sum + h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 ) + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = t1*z( j, k ) + t2*z( j, k+1 ) + z( j, k ) = z( j, k ) - sum + z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 ) + end do + end if + if( k==m .and. m>l ) then + ! if the qr step was started at row m > l because two + ! consecutive small subdiagonals were found, then extra + ! scaling must be performed to ensure that h(m,m-1) remains + ! real. + temp = cone - t1 + temp = temp / abs( temp ) + h( m+1, m ) = h( m+1, m )*conjg( temp ) + if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp + do j = m, i + if( j/=m+1 ) then + if( i2>j )call stdlib_cscal( i2-j, temp, h( j, j+1 ), ldh ) + call stdlib_cscal( j-i1, conjg( temp ), h( i1, j ), 1 ) + if( wantz ) then + call stdlib_cscal( nz, conjg( temp ), z( iloz, j ), 1 ) + end if + end if + end do + end if + end do loop_120 + ! ensure that h(i,i-1) is real. + temp = h( i, i-1 ) + if( aimag( temp )/=rzero ) then + rtemp = abs( temp ) + h( i, i-1 ) = rtemp + temp = temp / rtemp + if( i2>i )call stdlib_cscal( i2-i, conjg( temp ), h( i, i+1 ), ldh ) + call stdlib_cscal( i-i1, temp, h( i1, i ), 1 ) + if( wantz ) then + call stdlib_cscal( nz, temp, z( iloz, i ), 1 ) + end if + end if + end do loop_130 + ! failure to converge in remaining number of iterations + info = i + return + 140 continue + ! h(i,i-1) is negligible: cone eigenvalue has converged. + w( i ) = h( i, i ) + ! reset deflation counter + kdefl = 0 + ! return to start of the main loop with new value of i. + i = l - 1 + go to 30 + 150 continue + return + end subroutine stdlib_clahqr + + !> CLAHR2: reduces the first NB columns of A complex general n-BY-(n-k+1) + !> matrix A so that elements below the k-th subdiagonal are zero. The + !> reduction is performed by an unitary similarity transformation + !> Q**H * A * Q. The routine returns the matrices V and T which determine + !> Q as a block reflector I - V*T*v**H, and also the matrix Y = A * V * T. + !> This is an auxiliary routine called by CGEHRD. + + pure subroutine stdlib_clahr2( n, k, nb, a, lda, tau, t, ldt, y, ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: k, lda, ldt, ldy, n, nb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,nb), tau(nb), y(ldy,nb) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(sp) :: ei + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( n<=1 )return + loop_10: do i = 1, nb + if( i>1 ) then + ! update a(k+1:n,i) + ! update i-th column of a - y * v**h + call stdlib_clacgv( i-1, a( k+i-1, 1 ), lda ) + call stdlib_cgemv( 'NO TRANSPOSE', n-k, i-1, -cone, y(k+1,1), ldy,a( k+i-1, 1 ), & + lda, cone, a( k+1, i ), 1 ) + call stdlib_clacgv( i-1, a( k+i-1, 1 ), lda ) + ! apply i - v * t**h * v**h to this column (call it b) from the + ! left, using the last column of t as workspace + ! let v = ( v1 ) and b = ( b1 ) (first i-1 rows) + ! ( v2 ) ( b2 ) + ! where v1 is unit lower triangular + ! w := v1**h * b1 + call stdlib_ccopy( i-1, a( k+1, i ), 1, t( 1, nb ), 1 ) + call stdlib_ctrmv( 'LOWER', 'CONJUGATE TRANSPOSE', 'UNIT',i-1, a( k+1, 1 ),lda, & + t( 1, nb ), 1 ) + ! w := w + v2**h * b2 + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', n-k-i+1, i-1,cone, a( k+i, 1 ),lda, a( & + k+i, i ), 1, cone, t( 1, nb ), 1 ) + ! w := t**h * w + call stdlib_ctrmv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, & + nb ), 1 ) + ! b2 := b2 - v2*w + call stdlib_cgemv( 'NO TRANSPOSE', n-k-i+1, i-1, -cone,a( k+i, 1 ),lda, t( 1, nb & + ), 1, cone, a( k+i, i ), 1 ) + ! b1 := b1 - v1*w + call stdlib_ctrmv( 'LOWER', 'NO TRANSPOSE','UNIT', i-1,a( k+1, 1 ), lda, t( 1, & + nb ), 1 ) + call stdlib_caxpy( i-1, -cone, t( 1, nb ), 1, a( k+1, i ), 1 ) + a( k+i-1, i-1 ) = ei + end if + ! generate the elementary reflector h(i) to annihilate + ! a(k+i+1:n,i) + call stdlib_clarfg( n-k-i+1, a( k+i, i ), a( min( k+i+1, n ), i ), 1,tau( i ) ) + + ei = a( k+i, i ) + a( k+i, i ) = cone + ! compute y(k+1:n,i) + call stdlib_cgemv( 'NO TRANSPOSE', n-k, n-k-i+1,cone, a( k+1, i+1 ),lda, a( k+i, i )& + , 1, czero, y( k+1, i ), 1 ) + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', n-k-i+1, i-1,cone, a( k+i, 1 ), lda,a( k+& + i, i ), 1, czero, t( 1, i ), 1 ) + call stdlib_cgemv( 'NO TRANSPOSE', n-k, i-1, -cone,y( k+1, 1 ), ldy,t( 1, i ), 1, & + cone, y( k+1, i ), 1 ) + call stdlib_cscal( n-k, tau( i ), y( k+1, i ), 1 ) + ! compute t(1:i,i) + call stdlib_cscal( i-1, -tau( i ), t( 1, i ), 1 ) + call stdlib_ctrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, i ), 1 ) + + t( i, i ) = tau( i ) + end do loop_10 + a( k+nb, nb ) = ei + ! compute y(1:k,1:nb) + call stdlib_clacpy( 'ALL', k, nb, a( 1, 2 ), lda, y, ldy ) + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE','UNIT', k, nb,cone, a( k+1, 1 ), & + lda, y, ldy ) + if( n>k+nb )call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', k,nb, n-k-nb, cone,a( 1,& + 2+nb ), lda, a( k+1+nb, 1 ), lda, cone, y,ldy ) + call stdlib_ctrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE','NON-UNIT', k, nb,cone, t, ldt, y, & + ldy ) + return + end subroutine stdlib_clahr2 + + !> CLALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + + pure subroutine stdlib_clals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: givptr, icompq, k, ldb, ldbx, ldgcol, ldgnum, nl, nr, nrhs,& + sqre + integer(ilp), intent(out) :: info + real(sp), intent(in) :: c, s + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), perm(*) + real(sp), intent(in) :: difl(*), difr(ldgnum,*), givnum(ldgnum,*), poles(ldgnum,*), z(& + *) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: bx(ldbx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jcol, jrow, m, n, nlp1 + real(sp) :: diflj, difrj, dj, dsigj, dsigjp, temp + ! Intrinsic Functions + intrinsic :: aimag,cmplx,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( nrhs<1 ) then + info = -5 + else if( ldb CLALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, CLALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by CLALSA. + + pure subroutine stdlib_clalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, difl,& + difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, rwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldb, ldbx, ldgcol, ldu, n, nrhs, smlsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), givptr(*), k(*), perm(ldgcol,*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), s(& + *), u(ldu,*), vt(ldu,*), z(ldu,*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: bx(ldbx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, im1, inode, j, jcol, jimag, jreal, jrow, lf, ll, lvl, lvl2, & + nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqre + ! Intrinsic Functions + intrinsic :: aimag,cmplx,real + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n CLALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_clalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, rwork, & + iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: ldb, n, nrhs, smlsiz + real(sp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: bx, bxst, c, difl, difr, givcol, givnum, givptr, i, icmpq1, icmpq2, & + irwb, irwib, irwrb, irwu, irwvt, irwwrk, iwk, j, jcol, jimag, jreal, jrow, k, nlvl, & + nm1, nrwork, nsize, nsub, perm, poles, s, sizei, smlszp, sqre, st, st1, u, vt, & + z + real(sp) :: cs, eps, orgnrm, r, rcnd, sn, tol + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,int,log,real,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -3 + else if( nrhs<1 ) then + info = -4 + else if( ( ldb<1 ) .or. ( ldb=one ) ) then + rcnd = eps + else + rcnd = rcond + end if + rank = 0 + ! quick return if possible. + if( n==0 ) then + return + else if( n==1 ) then + if( d( 1 )==zero ) then + call stdlib_claset( 'A', 1, nrhs, czero, czero, b, ldb ) + else + rank = 1 + call stdlib_clascl( 'G', 0, 0, d( 1 ), one, 1, nrhs, b, ldb, info ) + d( 1 ) = abs( d( 1 ) ) + end if + return + end if + ! rotate the matrix if it is lower bidiagonal. + if( uplo=='L' ) then + do i = 1, n - 1 + call stdlib_slartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( nrhs==1 ) then + call stdlib_csrot( 1, b( i, 1 ), 1, b( i+1, 1 ), 1, cs, sn ) + else + rwork( i*2-1 ) = cs + rwork( i*2 ) = sn + end if + end do + if( nrhs>1 ) then + do i = 1, nrhs + do j = 1, n - 1 + cs = rwork( j*2-1 ) + sn = rwork( j*2 ) + call stdlib_csrot( 1, b( j, i ), 1, b( j+1, i ), 1, cs, sn ) + end do + end do + end if + end if + ! scale. + nm1 = n - 1 + orgnrm = stdlib_slanst( 'M', n, d, e ) + if( orgnrm==zero ) then + call stdlib_claset( 'A', n, nrhs, czero, czero, b, ldb ) + return + end if + call stdlib_slascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, nm1, 1, e, nm1, info ) + ! if n is smaller than the minimum divide size smlsiz, then solve + ! the problem with another solver. + if( n<=smlsiz ) then + irwu = 1 + irwvt = irwu + n*n + irwwrk = irwvt + n*n + irwrb = irwwrk + irwib = irwrb + n*nrhs + irwb = irwib + n*nrhs + call stdlib_slaset( 'A', n, n, zero, one, rwork( irwu ), n ) + call stdlib_slaset( 'A', n, n, zero, one, rwork( irwvt ), n ) + call stdlib_slasdq( 'U', 0, n, n, n, 0, d, e, rwork( irwvt ), n,rwork( irwu ), n, & + rwork( irwwrk ), 1,rwork( irwwrk ), info ) + if( info/=0 ) then + return + end if + ! in the real version, b is passed to stdlib_slasdq and multiplied + ! internally by q**h. here b is complex and that product is + ! computed below in two steps (real and imaginary parts). + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=sp) + end do + end do + call stdlib_sgemm( 'T', 'N', n, nrhs, n, one, rwork( irwu ), n,rwork( irwb ), n, & + zero, rwork( irwrb ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_sgemm( 'T', 'N', n, nrhs, n, one, rwork( irwu ), n,rwork( irwb ), n, & + zero, rwork( irwib ), n ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = 1, n + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ), rwork( jimag ),KIND=sp) + end do + end do + tol = rcnd*abs( d( stdlib_isamax( n, d, 1 ) ) ) + do i = 1, n + if( d( i )<=tol ) then + call stdlib_claset( 'A', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + else + call stdlib_clascl( 'G', 0, 0, d( i ), one, 1, nrhs, b( i, 1 ),ldb, info ) + + rank = rank + 1 + end if + end do + ! since b is complex, the following call to stdlib_sgemm is performed + ! in two steps (real and imaginary parts). that is for v * b + ! (in the real version of the code v**h is stored in work). + ! call stdlib_sgemm( 't', 'n', n, nrhs, n, one, work, n, b, ldb, zero, + ! $ work( nwork ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=sp) + end do + end do + call stdlib_sgemm( 'T', 'N', n, nrhs, n, one, rwork( irwvt ), n,rwork( irwb ), n, & + zero, rwork( irwrb ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_sgemm( 'T', 'N', n, nrhs, n, one, rwork( irwvt ), n,rwork( irwb ), n, & + zero, rwork( irwib ), n ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = 1, n + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ), rwork( jimag ),KIND=sp) + end do + end do + ! unscale. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_slasrt( 'D', n, d, info ) + call stdlib_clascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end if + ! book-keeping and setting up some constants. + nlvl = int( log( real( n,KIND=sp) / real( smlsiz+1,KIND=sp) ) / log( two ),KIND=ilp) + & + 1 + smlszp = smlsiz + 1 + u = 1 + vt = 1 + smlsiz*n + difl = vt + smlszp*n + difr = difl + nlvl*n + z = difr + nlvl*n*2 + c = z + nlvl*n + s = c + n + poles = s + n + givnum = poles + 2*nlvl*n + nrwork = givnum + 2*nlvl*n + bx = 1 + irwrb = nrwork + irwib = irwrb + smlsiz*nrhs + irwb = irwib + smlsiz*nrhs + sizei = 1 + n + k = sizei + n + givptr = k + n + perm = givptr + n + givcol = perm + nlvl*n + iwk = givcol + nlvl*n*2 + st = 1 + sqre = 0 + icmpq1 = 1 + icmpq2 = 0 + nsub = 0 + do i = 1, n + if( abs( d( i ) )=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - st + 1 + iwork( sizei+nsub-1 ) = nsize + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n), which is not solved + ! explicitly. + nsize = i - st + 1 + iwork( sizei+nsub-1 ) = nsize + nsub = nsub + 1 + iwork( nsub ) = n + iwork( sizei+nsub-1 ) = 1 + call stdlib_ccopy( nrhs, b( n, 1 ), ldb, work( bx+nm1 ), n ) + end if + st1 = st - 1 + if( nsize==1 ) then + ! this is a 1-by-1 subproblem and is not solved + ! explicitly. + call stdlib_ccopy( nrhs, b( st, 1 ), ldb, work( bx+st1 ), n ) + else if( nsize<=smlsiz ) then + ! this is a small subproblem and is solved by stdlib_slasdq. + call stdlib_slaset( 'A', nsize, nsize, zero, one,rwork( vt+st1 ), n ) + call stdlib_slaset( 'A', nsize, nsize, zero, one,rwork( u+st1 ), n ) + call stdlib_slasdq( 'U', 0, nsize, nsize, nsize, 0, d( st ),e( st ), rwork( & + vt+st1 ), n, rwork( u+st1 ),n, rwork( nrwork ), 1, rwork( nrwork ),info ) + + if( info/=0 ) then + return + end if + ! in the real version, b is passed to stdlib_slasdq and multiplied + ! internally by q**h. here b is complex and that product is + ! computed below in two steps (real and imaginary parts). + j = irwb - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=sp) + end do + end do + call stdlib_sgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( u+st1 ), n, rwork(& + irwb ), nsize,zero, rwork( irwrb ), nsize ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_sgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( u+st1 ), n, rwork(& + irwb ), nsize,zero, rwork( irwib ), nsize ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=sp) + end do + end do + call stdlib_clacpy( 'A', nsize, nrhs, b( st, 1 ), ldb,work( bx+st1 ), n ) + + else + ! a large problem. solve it using divide and conquer. + call stdlib_slasda( icmpq1, smlsiz, nsize, sqre, d( st ),e( st ), rwork( u+& + st1 ), n, rwork( vt+st1 ),iwork( k+st1 ), rwork( difl+st1 ),rwork( difr+st1 ),& + rwork( z+st1 ),rwork( poles+st1 ), iwork( givptr+st1 ),iwork( givcol+st1 ), & + n, iwork( perm+st1 ),rwork( givnum+st1 ), rwork( c+st1 ),rwork( s+st1 ), & + rwork( nrwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + bxst = bx + st1 + call stdlib_clalsa( icmpq2, smlsiz, nsize, nrhs, b( st, 1 ),ldb, work( bxst ),& + n, rwork( u+st1 ), n,rwork( vt+st1 ), iwork( k+st1 ),rwork( difl+st1 ), & + rwork( difr+st1 ),rwork( z+st1 ), rwork( poles+st1 ),iwork( givptr+st1 ), & + iwork( givcol+st1 ), n,iwork( perm+st1 ), rwork( givnum+st1 ),rwork( c+st1 ),& + rwork( s+st1 ),rwork( nrwork ), iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + st = i + 1 + end if + end do loop_240 + ! apply the singular values and treat the tiny ones as zero. + tol = rcnd*abs( d( stdlib_isamax( n, d, 1 ) ) ) + do i = 1, n + ! some of the elements in d can be negative because 1-by-1 + ! subproblems were not solved explicitly. + if( abs( d( i ) )<=tol ) then + call stdlib_claset( 'A', 1, nrhs, czero, czero, work( bx+i-1 ), n ) + else + rank = rank + 1 + call stdlib_clascl( 'G', 0, 0, d( i ), one, 1, nrhs,work( bx+i-1 ), n, info ) + + end if + d( i ) = abs( d( i ) ) + end do + ! now apply back the right singular vectors. + icmpq2 = 1 + loop_320: do i = 1, nsub + st = iwork( i ) + st1 = st - 1 + nsize = iwork( sizei+i-1 ) + bxst = bx + st1 + if( nsize==1 ) then + call stdlib_ccopy( nrhs, work( bxst ), n, b( st, 1 ), ldb ) + else if( nsize<=smlsiz ) then + ! since b and bx are complex, the following call to stdlib_sgemm + ! is performed in two steps (real and imaginary parts). + ! call stdlib_sgemm( 't', 'n', nsize, nrhs, nsize, one, + ! $ rwork( vt+st1 ), n, rwork( bxst ), n, zero, + ! $ b( st, 1 ), ldb ) + j = bxst - n - 1 + jreal = irwb - 1 + do jcol = 1, nrhs + j = j + n + do jrow = 1, nsize + jreal = jreal + 1 + rwork( jreal ) = real( work( j+jrow ),KIND=sp) + end do + end do + call stdlib_sgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( vt+st1 ), n, rwork( & + irwb ), nsize, zero,rwork( irwrb ), nsize ) + j = bxst - n - 1 + jimag = irwb - 1 + do jcol = 1, nrhs + j = j + n + do jrow = 1, nsize + jimag = jimag + 1 + rwork( jimag ) = aimag( work( j+jrow ) ) + end do + end do + call stdlib_sgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( vt+st1 ), n, rwork( & + irwb ), nsize, zero,rwork( irwib ), nsize ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=sp) + end do + end do + else + call stdlib_clalsa( icmpq2, smlsiz, nsize, nrhs, work( bxst ), n,b( st, 1 ), ldb,& + rwork( u+st1 ), n,rwork( vt+st1 ), iwork( k+st1 ),rwork( difl+st1 ), rwork( & + difr+st1 ),rwork( z+st1 ), rwork( poles+st1 ),iwork( givptr+st1 ), iwork( & + givcol+st1 ), n,iwork( perm+st1 ), rwork( givnum+st1 ),rwork( c+st1 ), rwork( s+& + st1 ),rwork( nrwork ), iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + end do loop_320 + ! unscale and sort the singular values. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_slasrt( 'D', n, d, info ) + call stdlib_clascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end subroutine stdlib_clalsd + + !> CLANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + + real(sp) function stdlib_clangb( norm, n, kl, ku, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: kl, ku, ldab, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k, l + real(sp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + temp = abs( ab( i, j ) ) + if( value CLANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex matrix A. + + real(sp) function stdlib_clange( norm, m, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, m + temp = abs( a( i, j ) ) + if( value CLANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex tridiagonal matrix A. + + pure real(sp) function stdlib_clangt( norm, n, dl, d, du ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(in) :: d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: anorm, scale, sum, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + if( anorm1 ) then + call stdlib_classq( n-1, dl, 1, scale, sum ) + call stdlib_classq( n-1, du, 1, scale, sum ) + end if + anorm = scale*sqrt( sum ) + end if + stdlib_clangt = anorm + return + end function stdlib_clangt + + !> CLANHB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n hermitian band matrix A, with k super-diagonals. + + real(sp) function stdlib_clanhb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + sum = abs( real( ab( k+1, j ),KIND=sp) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + sum = abs( real( ab( 1, j ),KIND=sp) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( real( ab( k+1, j ),KIND=sp) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( ab( 1, j ),KIND=sp) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_classq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_classq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + do j = 1, n + if( real( ab( l, j ),KIND=sp)/=zero ) then + absa = abs( real( ab( l, j ),KIND=sp) ) + if( scale CLANHE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A. + + real(sp) function stdlib_clanhe( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j - 1 + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + sum = abs( real( a( j, j ),KIND=sp) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + sum = abs( real( a( j, j ),KIND=sp) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + do i = j + 1, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( real( a( j, j ),KIND=sp) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( a( j, j ),KIND=sp) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_classq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_classq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + do i = 1, n + if( real( a( i, i ),KIND=sp)/=zero ) then + absa = abs( real( a( i, i ),KIND=sp) ) + if( scale CLANHF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian matrix A in RFP format. + + real(sp) function stdlib_clanhf( norm, transr, uplo, n, a, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, transr, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(out) :: work(0:*) + complex(sp), intent(in) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, ifm, ilu, noe, n1, k, l, lda + real(sp) :: scale, s, value, aa, temp + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + stdlib_clanhf = zero + return + else if( n==1 ) then + stdlib_clanhf = abs(real(a(0),KIND=sp)) + return + end if + ! set noe = 1 if n is odd. if n is even set noe=0 + noe = 1 + if( mod( n, 2 )==0 )noe = 0 + ! set ifm = 0 when form='c' or 'c' and 1 otherwise + ifm = 1 + if( stdlib_lsame( transr, 'C' ) )ifm = 0 + ! set ilu = 0 when uplo='u or 'u' and 1 otherwise + ilu = 1 + if( stdlib_lsame( uplo, 'U' ) )ilu = 0 + ! set lda = (n+1)/2 when ifm = 0 + ! set lda = n when ifm = 1 and noe = 1 + ! set lda = n+1 when ifm = 1 and noe = 0 + if( ifm==1 ) then + if( noe==1 ) then + lda = n + else + ! noe=0 + lda = n + 1 + end if + else + ! ifm=0 + lda = ( n+1 ) / 2 + end if + if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = ( n+1 ) / 2 + value = zero + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is n by k + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(0,0) + temp = abs( real( a( j+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = j - 1 + ! l(k+j,k+j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = j + ! -> l(j,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = j + 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k + j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = k + j - 1 + ! -> u(i,i) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = i + 1 + ! =k+j; i -> u(j,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = k + j + 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + do i = 0, n - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + ! j=k-1 + end do + ! i=n-1 -> u(n-1,n-1) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end if + else + ! xpose case; a is k by n + if( ilu==1 ) then + ! uplo ='l' + do j = 0, k - 2 + do i = 0, j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = j + ! l(i,i) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = j + 1 + ! l(j+k,j+k) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = j + 2, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + j = k - 1 + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = k - 1 + ! -> l(i,i) is at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do j = k, n - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + j = k - 1 + ! -> u(j,j) is at a(0,j) + temp = abs( real( a( 0+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + do j = k, n - 1 + do i = 0, j - k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = j - k + ! -> u(i,i) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = j - k + 1 + ! u(j,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = j - k + 2, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + end if + end if + else + ! n is even + if( ifm==1 ) then + ! a is n+1 by k + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(k,k) + temp = abs( real( a( j+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + temp = abs( real( a( j+1+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = j + ! l(k+j,k+j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = j + 1 + ! -> l(j,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = j + 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k + j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = k + j + ! -> u(i,i) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = i + 1 + ! =k+j+1; i -> u(j,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = k + j + 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + do i = 0, n - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + ! j=k-1 + end do + ! i=n-1 -> u(n-1,n-1) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = n + ! -> u(k-1,k-1) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end if + else + ! xpose case; a is k by n+1 + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(k,k) at a(0,0) + temp = abs( real( a( j+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = j - 1 + ! l(i,i) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = j + ! l(j+k,j+k) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = j + 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + j = k + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = k - 1 + ! -> l(i,i) is at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do j = k + 1, n + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + j = k + ! -> u(j,j) is at a(0,j) + temp = abs( real( a( 0+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + do j = k + 1, n - 1 + do i = 0, j - k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = j - k - 1 + ! -> u(i,i) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + i = j - k + ! u(j,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + do i = j - k + 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + j = n + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + i = k - 1 + ! u(k,k) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=sp) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end if + end if + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + if( ifm==1 ) then + ! a is 'n' + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + ! uplo = 'u' + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + if( i==k+k )go to 10 + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + 10 continue + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu = 1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + if( j>0 ) then + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + end if + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + ! uplo = 'u' + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu = 1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + end if + else + ! ifm=0 + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + ! uplo = 'u' + n1 = k + ! n/2 + k = k + 1 + ! k is the row size and lda + do i = n1, n - 1 + work( i ) = zero + end do + do j = 0, n1 - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,n1+i) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=n1=k-1 is special + s = abs( real( a( 0+j*lda ),KIND=sp) ) + ! a(k-1,k-1) + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,i+n1) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k, n - 1 + s = zero + do i = 0, j - k - 1 + aa = abs( a( i+j*lda ) ) + ! a(i,j-k) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-k + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(j-k,j-k) + s = s + aa + work( j-k ) = work( j-k ) + s + i = i + 1 + s = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(j,j) + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu=1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 2 + ! process + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! i=j so process of a(j,j) + s = s + aa + work( j ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( real( a( i+j*lda ),KIND=sp) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k-1 is special :process col a(k-1,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k, n - 1 + ! process col j of a = a(j,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + ! uplo = 'u' + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i+k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=k + aa = abs( real( a( 0+j*lda ),KIND=sp) ) + ! a(k,k) + s = aa + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k,k+i) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k + 1, n - 1 + s = zero + do i = 0, j - 2 - k + aa = abs( a( i+j*lda ) ) + ! a(i,j-k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-1-k + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(j-k-1,j-k-1) + s = s + aa + work( j-k-1 ) = work( j-k-1 ) + s + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(j,j) + s = aa + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + ! j=n + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(i,k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = work( i ) + s + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + ! j=0 is special :process col a(k:n-1,k) + s = abs( real( a( 0 ),KIND=sp) ) + ! a(k,k) + do i = 1, k - 1 + aa = abs( a( i ) ) + ! a(k+i,k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( k ) = work( k ) + s + do j = 1, k - 1 + ! process + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! i=j-1 so process of a(j-1,j-1) + s = s + aa + work( j-1 ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( real( a( i+j*lda ),KIND=sp) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k is special :process col a(k,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=sp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k + 1, n + ! process col j-1 of a = a(j-1,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j-1 ) = work( j-1 ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + end if + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + k = ( n+1 ) / 2 + scale = zero + s = one + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 3 + call stdlib_classq( k-j-2, a( k+j+1+j*lda ), 1, scale, s ) + ! l at a(k,0) + end do + do j = 0, k - 1 + call stdlib_classq( k+j-1, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + l = k - 1 + ! -> u(k,k) at a(k-1,0) + do i = 0, k - 2 + aa = real( a( l ),KIND=sp) + ! u(k+i,k+i) + if( aa/=zero ) then + if( scale l(k,k) at a(0,1) + do i = 1, k - 1 + aa = real( a( l ),KIND=sp) + ! l(k-1+i,k-1+i) + if( aa/=zero ) then + if( scale u(k-1,k-1) at a(0,k-1) + aa = real( a( l ),KIND=sp) + ! u(k-1,k-1) + if( aa/=zero ) then + if( scale u(0,0) at a(0,k) + do j = k, n - 1 + aa = real( a( l ),KIND=sp) + ! -> u(j-k,j-k) + if( aa/=zero ) then + if( scale u(j,j) + if( aa/=zero ) then + if( scale l(0,0) at a(0,0) + do i = 0, k - 2 + aa = real( a( l ),KIND=sp) + ! l(i,i) + if( aa/=zero ) then + if( scale k-1 + (k-1)*lda or l(k-1,k-1) at a(k-1,k-1) + aa = real( a( l ),KIND=sp) + ! l(k-1,k-1) at a(k-1,k-1) + if( aa/=zero ) then + if( scale u(k,k) at a(k,0) + do i = 0, k - 1 + aa = real( a( l ),KIND=sp) + ! u(k+i,k+i) + if( aa/=zero ) then + if( scale l(k,k) at a(0,0) + do i = 0, k - 1 + aa = real( a( l ),KIND=sp) + ! l(k-1+i,k-1+i) + if( aa/=zero ) then + if( scale u(k,k) at a(0,k) + aa = real( a( l ),KIND=sp) + ! u(k,k) + if( aa/=zero ) then + if( scale u(0,0) at a(0,k+1) + do j = k + 1, n - 1 + aa = real( a( l ),KIND=sp) + ! -> u(j-k-1,j-k-1) + if( aa/=zero ) then + if( scale u(j,j) + if( aa/=zero ) then + if( scale u(k-1,k-1) at a(k-1,n) + aa = real( a( l ),KIND=sp) + ! u(k,k) + if( aa/=zero ) then + if( scale l(k,k) at a(0,0) + aa = real( a( l ),KIND=sp) + ! l(k,k) at a(0,0) + if( aa/=zero ) then + if( scale l(0,0) at a(0,1) + do i = 0, k - 2 + aa = real( a( l ),KIND=sp) + ! l(i,i) + if( aa/=zero ) then + if( scale k - 1 + k*lda or l(k-1,k-1) at a(k-1,k) + aa = real( a( l ),KIND=sp) + ! l(k-1,k-1) at a(k-1,k) + if( aa/=zero ) then + if( scale CLANHP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A, supplied in packed form. + + real(sp) function stdlib_clanhp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 0 + do j = 1, n + do i = k + 1, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + sum = abs( real( ap( k ),KIND=sp) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + k = 1 + do j = 1, n + sum = abs( real( ap( k ),KIND=sp) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( real( ap( k ),KIND=sp) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( ap( k ),KIND=sp) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_classq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_classq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( real( ap( k ),KIND=sp)/=zero ) then + absa = abs( real( ap( k ),KIND=sp) ) + if( scale CLANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + + real(sp) function stdlib_clanhs( norm, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, min( n, j+1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + do j = 1, n + sum = zero + do i = 1, min( n, j+1 ) + sum = sum + abs( a( i, j ) ) + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, min( n, j+1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + do j = 1, n + call stdlib_classq( min( n, j+1 ), a( 1, j ), 1, scale, sum ) + end do + value = scale*sqrt( sum ) + end if + stdlib_clanhs = value + return + end function stdlib_clanhs + + !> CLANHT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian tridiagonal matrix A. + + pure real(sp) function stdlib_clanht( norm, n, d, e ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: d(*) + complex(sp), intent(in) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: anorm, scale, sum + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + sum = abs( d( i ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + sum = abs( e( i ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + end do + else if( stdlib_lsame( norm, 'O' ) .or. norm=='1' .or.stdlib_lsame( norm, 'I' ) ) & + then + ! find norm1(a). + if( n==1 ) then + anorm = abs( d( 1 ) ) + else + anorm = abs( d( 1 ) )+abs( e( 1 ) ) + sum = abs( e( n-1 ) )+abs( d( n ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + do i = 2, n - 1 + sum = abs( d( i ) )+abs( e( i ) )+abs( e( i-1 ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( n>1 ) then + call stdlib_classq( n-1, e, 1, scale, sum ) + sum = 2*sum + end if + call stdlib_slassq( n, d, 1, scale, sum ) + anorm = scale*sqrt( sum ) + end if + stdlib_clanht = anorm + return + end function stdlib_clanht + + !> CLANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + + real(sp) function stdlib_clansb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( ab( k+1, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ab( 1, j ) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_classq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_classq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + call stdlib_classq( n, ab( l, 1 ), ldab, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_clansb = value + return + end function stdlib_clansb + + !> CLANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A, supplied in packed form. + + real(sp) function stdlib_clansp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,aimag,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 1 + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + end do + else + k = 1 + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( ap( k ) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ap( k ) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_classq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_classq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( real( ap( k ),KIND=sp)/=zero ) then + absa = abs( real( ap( k ),KIND=sp) ) + if( scale CLANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A. + + real(sp) function stdlib_clansy( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( a( j, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( a( j, j ) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_classq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_classq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + call stdlib_classq( n, a, lda+1, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_clansy = value + return + end function stdlib_clansy + + !> CLANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + + real(sp) function stdlib_clantb( norm, uplo, diag, n, k, ab,ldab, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, l + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = max( k+2-j, 1 ), k + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = max( k+2-j, 1 ), k + 1 + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = 2, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = 1, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = 1 - j + do i = j + 1, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = 1 - j + do i = j, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + end if + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 2, n + call stdlib_classq( min( j-1, k ),ab( max( k+2-j, 1 ), j ), 1, scale,& + sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_classq( min( j, k+1 ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 1, n - 1 + call stdlib_classq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_classq( min( n-j+1, k+1 ), ab( 1, j ), 1, scale,sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_clantb = value + return + end function stdlib_clantb + + !> CLANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + + real(sp) function stdlib_clantp( norm, uplo, diag, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, k + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = 1 + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 2 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + k = 1 + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = k, k + j - 2 + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + j - 1 + sum = sum + abs( ap( i ) ) + end do + end if + k = k + j + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = k + 1, k + n - j + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + n - j + sum = sum + abs( ap( i ) ) + end do + end if + k = k + n - j + 1 + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + do i = 1, j - 1 + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + k = k + 1 + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, j + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + k = k + 1 + do i = j + 1, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = j, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + end if + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 2, n + call stdlib_classq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_classq( j, ap( k ), 1, scale, sum ) + k = k + j + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 1, n - 1 + call stdlib_classq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_classq( n-j+1, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_clantp = value + return + end function stdlib_clantp + + !> CLANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + + real(sp) function stdlib_clantr( norm, uplo, diag, m, n, a, lda,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(out) :: work(*) + complex(sp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j-1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j + 1, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( ( udiag ) .and. ( j<=m ) ) then + sum = one + do i = 1, j - 1 + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = 1, min( m, j ) + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = j + 1, m + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = j, m + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, m + work( i ) = one + end do + do j = 1, n + do i = 1, min( m, j-1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = 1, min( m, j ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, min( m, n ) + work( i ) = one + end do + do i = n + 1, m + work( i ) = zero + end do + do j = 1, n + do i = j + 1, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = j, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + end if + value = zero + do i = 1, m + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 2, n + call stdlib_classq( min( m, j-1 ), a( 1, j ), 1, scale, sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_classq( min( m, j ), a( 1, j ), 1, scale, sum ) + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 1, n + call stdlib_classq( m-j, a( min( m, j+1 ), j ), 1, scale,sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_classq( m-j+1, a( j, j ), 1, scale, sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_clantr = value + return + end function stdlib_clantr + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + + pure subroutine stdlib_clapll( n, x, incx, y, incy, ssmin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(sp), intent(out) :: ssmin + ! Array Arguments + complex(sp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + + ! Local Scalars + real(sp) :: ssmax + complex(sp) :: a11, a12, a22, c, tau + ! Intrinsic Functions + intrinsic :: abs,conjg + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + ssmin = zero + return + end if + ! compute the qr factorization of the n-by-2 matrix ( x y ) + call stdlib_clarfg( n, x( 1 ), x( 1+incx ), incx, tau ) + a11 = x( 1 ) + x( 1 ) = cone + c = -conjg( tau )*stdlib_cdotc( n, x, incx, y, incy ) + call stdlib_caxpy( n, c, x, incx, y, incy ) + call stdlib_clarfg( n-1, y( 1+incy ), y( 1+2*incy ), incy, tau ) + a12 = y( 1 ) + a22 = y( 1+incy ) + ! compute the svd of 2-by-2 upper triangular matrix. + call stdlib_slas2( abs( a11 ), abs( a12 ), abs( a22 ), ssmin, ssmax ) + return + end subroutine stdlib_clapll + + !> CLAQP2: computes a QR factorization with column pivoting of + !> the block A(OFFSET+1:M,1:N). + !> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_claqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, m, n, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: vn1(*), vn2(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, itemp, j, mn, offpi, pvt + real(sp) :: temp, temp2, tol3z + complex(sp) :: aii + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,sqrt + ! Executable Statements + mn = min( m-offset, n ) + tol3z = sqrt(stdlib_slamch('EPSILON')) + ! compute factorization. + loop_20: do i = 1, mn + offpi = offset + i + ! determine ith pivot column and swap if necessary. + pvt = ( i-1 ) + stdlib_isamax( n-i+1, vn1( i ), 1 ) + if( pvt/=i ) then + call stdlib_cswap( m, a( 1, pvt ), 1, a( 1, i ), 1 ) + itemp = jpvt( pvt ) + jpvt( pvt ) = jpvt( i ) + jpvt( i ) = itemp + vn1( pvt ) = vn1( i ) + vn2( pvt ) = vn2( i ) + end if + ! generate elementary reflector h(i). + if( offpi CLAQPS: computes a step of QR factorization with column pivoting + !> of a complex M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_claqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda, ldf, m, n, nb, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: vn1(*), vn2(*) + complex(sp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*) + complex(sp), intent(out) :: tau(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: itemp, j, k, lastrk, lsticc, pvt, rk + real(sp) :: temp, temp2, tol3z + complex(sp) :: akk + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,nint,real,sqrt + ! Executable Statements + lastrk = min( m, n+offset ) + lsticc = 0 + k = 0 + tol3z = sqrt(stdlib_slamch('EPSILON')) + ! beginning of while loop. + 10 continue + if( ( k1 ) then + do j = 1, k - 1 + f( k, j ) = conjg( f( k, j ) ) + end do + call stdlib_cgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1 ),lda, f( k, 1 ),& + ldf, cone, a( rk, k ), 1 ) + do j = 1, k - 1 + f( k, j ) = conjg( f( k, j ) ) + end do + end if + ! generate elementary reflector h(k). + if( rk1 ) then + call stdlib_cgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1 ), lda,& + a( rk, k ), 1, czero,auxv( 1 ), 1 ) + call stdlib_cgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1, 1 ), ldf,auxv( 1 ), 1, & + cone, f( 1, k ), 1 ) + end if + ! update the current row of a: + ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h. + if( k0 ) then + itemp = nint( vn2( lsticc ),KIND=ilp) + vn1( lsticc ) = stdlib_scnrm2( m-rk, a( rk+1, lsticc ), 1 ) + ! note: the computation of vn1( lsticc ) relies on the fact that + ! stdlib_snrm2 does not fail on vectors with norm below the value of + ! sqrt(stdlib_dlamch('s')) + vn2( lsticc ) = vn1( lsticc ) + lsticc = itemp + go to 60 + end if + return + end subroutine stdlib_claqps + + !> CLAQR5: called by CLAQR0 performs a + !> single small-bulge multi-shift QR sweep. + + pure subroutine stdlib_claqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, & + ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & + ldz, n, nh, nshfts, nv + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), s(*), z(ldz,*) + complex(sp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(sp), parameter :: rzero = 0.0_sp + real(sp), parameter :: rone = 1.0_sp + + + ! Local Scalars + complex(sp) :: alpha, beta, cdum, refsum + real(sp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp + integer(ilp) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,& + m, m22, mbot, mtop, nbmps, ndcol, ns, nu + logical(lk) :: accum, bmp22 + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,mod,real + ! Local Arrays + complex(sp) :: vt(3) + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== if there are no shifts, then there is nothing to do. ==== + if( nshfts<2 )return + ! ==== if the active block is empty or 1-by-1, then there + ! . is nothing to do. ==== + if( ktop>=kbot )return + ! ==== nshfts is supposed to be even, but if it is odd, + ! . then simply reduce it by cone. ==== + ns = nshfts - mod( nshfts, 2 ) + ! ==== machine constants for deflation ==== + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp) / ulp ) + ! ==== use accumulated reflections to update far-from-diagonal + ! . entries ? ==== + accum = ( kacc22==1 ) .or. ( kacc22==2 ) + ! ==== clear trash ==== + if( ktop+2<=kbot )h( ktop+2, ktop ) = czero + ! ==== nbmps = number of 2-shift bulges in the chain ==== + nbmps = ns / 2 + ! ==== kdu = width of slab ==== + kdu = 4*nbmps + ! ==== create and chase chains of nbmps bulges ==== + loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps + ! jtop = index from which updates from the right start. + if( accum ) then + jtop = max( ktop, incol ) + else if( wantt ) then + jtop = 1 + else + jtop = ktop + end if + ndcol = incol + kdu + if( accum )call stdlib_claset( 'ALL', kdu, kdu, czero, cone, u, ldu ) + ! ==== near-the-diagonal bulge chase. the following loop + ! . performs the near-the-diagonal part of a small bulge + ! . multi-shift qr sweep. each 4*nbmps column diagonal + ! . chunk extends from column incol to column ndcol + ! . (including both column incol and column ndcol). the + ! . following loop chases a 2*nbmps+1 column long chain of + ! . nbmps bulges 2*nbmps columns to the right. (incol + ! . may be less than ktop and and ndcol may be greater than + ! . kbot indicating phantom columns from which to chase + ! . bulges before they are actually introduced or to which + ! . to chase bulges beyond column kbot.) ==== + loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) + ! ==== bulges number mtop to mbot are active double implicit + ! . shift bulges. there may or may not also be small + ! . 2-by-2 bulge, if there is room. the inactive bulges + ! . (if any) must wait until the active bulges have moved + ! . down the diagonal to make room. the phantom matrix + ! . paradigm described above helps keep track. ==== + mtop = max( 1, ( ktop-krcol ) / 2+1 ) + mbot = min( nbmps, ( kbot-krcol-1 ) / 2 ) + m22 = mbot + 1 + bmp22 = ( mbot=ktop) then + if( h( k+1, k )/=czero ) then + tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) + if( tst1==rzero ) then + if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) + end if + if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then + h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==rzero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( & + k+1, k ) = czero + end if + end if + end if + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + kms = k - incol + do j = max( 1, ktop-incol ), kdu + refsum = v( 1, m22 )*( u( j, kms+1 )+v( 2, m22 )*u( j, kms+2 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2, m22 ) ) + end do + else if( wantz ) then + do j = iloz, ihiz + refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*z( j, k+2 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2, m22 ) ) + end do + end if + end if + ! ==== normal case: chain of 3-by-3 reflections ==== + loop_80: do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + if( k==ktop-1 ) then + call stdlib_claqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),s( 2*m ), v( 1, m )& + ) + alpha = v( 1, m ) + call stdlib_clarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) ) + else + ! ==== perform delayed transformation of row below + ! . mth bulge. exploit fact that first two elements + ! . of row are actually czero. ==== + refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 ) + h( k+3, k ) = -refsum + h( k+3, k+1 ) = -refsum*conjg( v( 2, m ) ) + h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3, m ) ) + ! ==== calculate reflection to move + ! . mth bulge cone step. ==== + beta = h( k+1, k ) + v( 2, m ) = h( k+2, k ) + v( 3, m ) = h( k+3, k ) + call stdlib_clarfg( 3, beta, v( 2, m ), 1, v( 1, m ) ) + ! ==== a bulge may collapse because of vigilant + ! . deflation or destructive underflow. in the + ! . underflow case, try the two-small-subdiagonals + ! . trick to try to reinflate the bulge. ==== + if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero & + ) then + ! ==== typical case: not collapsed (yet). ==== + h( k+1, k ) = beta + h( k+2, k ) = czero + h( k+3, k ) = czero + else + ! ==== atypical case: collapsed. attempt to + ! . reintroduce ignoring h(k+1,k) and h(k+2,k). + ! . if the fill resulting from the new + ! . reflector is too large, then abandon it. + ! . otherwise, use the new cone. ==== + call stdlib_claqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),s( 2*m ), vt ) + + alpha = vt( 1 ) + call stdlib_clarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) ) + refsum = conjg( vt( 1 ) )*( h( k+1, k )+conjg( vt( 2 ) )*h( k+2, k ) ) + + if( cabs1( h( k+2, k )-refsum*vt( 2 ) )+cabs1( refsum*vt( 3 ) )>ulp*( & + cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) & + then + ! ==== starting a new bulge here would + ! . create non-negligible fill. use + ! . the old cone with trepidation. ==== + h( k+1, k ) = beta + h( k+2, k ) = czero + h( k+3, k ) = czero + else + ! ==== starting a new bulge here would + ! . create only negligible fill. + ! . replace the old reflector with + ! . the new cone. ==== + h( k+1, k ) = h( k+1, k ) - refsum + h( k+2, k ) = czero + h( k+3, k ) = czero + v( 1, m ) = vt( 1 ) + v( 2, m ) = vt( 2 ) + v( 3, m ) = vt( 3 ) + end if + end if + end if + ! ==== apply reflection from the right and + ! . the first column of update from the left. + ! . these updates are required for the vigilant + ! . deflation check. we still delay most of the + ! . updates from the left for efficiency. ==== + do j = jtop, min( kbot, k+3 ) + refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*h( j, k+2 )+v( 3, m )*h( j, k+3 & + ) ) + h( j, k+1 ) = h( j, k+1 ) - refsum + h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2, m ) ) + h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3, m ) ) + end do + ! ==== perform update from left for subsequent + ! . column. ==== + refsum = conjg( v( 1, m ) )*( h( k+1, k+1 )+conjg( v( 2, m ) )*h( k+2, k+1 )+& + conjg( v( 3, m ) )*h( k+3, k+1 ) ) + h( k+1, k+1 ) = h( k+1, k+1 ) - refsum + h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m ) + h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m ) + ! ==== the following convergence test requires that + ! . the tradition small-compared-to-nearby-diagonals + ! . criterion and the ahues + ! . criteria both be satisfied. the latter improves + ! . accuracy in some examples. falling back on an + ! . alternate convergence criterion when tst1 or tst2 + ! . is czero (as done here) is traditional but probably + ! . unnecessary. ==== + if( k=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) + end if + if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==rzero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,& + k ) = czero + end if + end if + end do loop_80 + ! ==== multiply h by reflections from the left ==== + if( accum ) then + jbot = min( ndcol, kbot ) + else if( wantt ) then + jbot = n + else + jbot = kbot + end if + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = max( ktop, krcol + 2*m ), jbot + refsum = conjg( v( 1, m ) )*( h( k+1, j )+conjg( v( 2, m ) )*h( k+2, j )+& + conjg( v( 3, m ) )*h( k+3, j ) ) + h( k+1, j ) = h( k+1, j ) - refsum + h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m ) + h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m ) + end do + end do + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + ! ==== accumulate u. (if needed, update z later + ! . with an efficient matrix-matrix + ! . multiply.) ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + kms = k - incol + i2 = max( 1, ktop-incol ) + i2 = max( i2, kms-(krcol-incol)+1 ) + i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 ) + do j = i2, i4 + refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*u( j, kms+2 )+v( 3, m )*u( & + j, kms+3 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2, m ) ) + u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3, m ) ) + end do + end do + else if( wantz ) then + ! ==== u is not accumulated, so update z + ! . now by multiplying by reflections + ! . from the right. ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = iloz, ihiz + refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*z( j, k+2 )+v( 3, m )*z( j, & + k+3 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2, m ) ) + z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3, m ) ) + end do + end do + end if + ! ==== end of near-the-diagonal bulge chase. ==== + end do loop_145 + ! ==== use u (if accumulated) to update far-from-diagonal + ! . entries in h. if required, use u to update z as + ! . well. ==== + if( accum ) then + if( wantt ) then + jtop = 1 + jbot = n + else + jtop = ktop + jbot = kbot + end if + k1 = max( 1, ktop-incol ) + nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1 + ! ==== horizontal multiply ==== + do jcol = min( ndcol, kbot ) + 1, jbot, nh + jlen = min( nh, jbot-jcol+1 ) + call stdlib_cgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,& + jcol ), ldh, czero, wh,ldwh ) + call stdlib_clacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) + + end do + ! ==== vertical multiply ==== + do jrow = jtop, max( ktop, incol ) - 1, nv + jlen = min( nv, max( ktop, incol )-jrow ) + call stdlib_cgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( & + k1, k1 ),ldu, czero, wv, ldwv ) + call stdlib_clacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) + + end do + ! ==== z multiply (also vertical) ==== + if( wantz ) then + do jrow = iloz, ihiz, nv + jlen = min( nv, ihiz-jrow+1 ) + call stdlib_cgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, & + u( k1, k1 ),ldu, czero, wv, ldwv ) + call stdlib_clacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) + + end do + end if + end if + end do loop_180 + end subroutine stdlib_claqr5 + + !> CLAQZ1: chases a 1x1 shift bulge in a matrix pencil down a single position + + pure subroutine stdlib_claqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & + q, ldq, nz, zstart, z, ldz ) + ! arguments + logical(lk), intent( in ) :: ilq, ilz + integer(ilp), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & + zstart, ihi + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + + + ! local variables + real(sp) :: c + complex(sp) :: s, temp + if( k+1 == ihi ) then + ! shift is located on the edge of the matrix, remove it + call stdlib_clartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp ) + b( ihi, ihi ) = temp + b( ihi, ihi-1 ) = czero + call stdlib_crot( ihi-istartm, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c, s ) + + call stdlib_crot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c, s ) + + if ( ilz ) then + call stdlib_crot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c, s ) + + end if + else + ! normal operation, move bulge down + ! apply transformation from the right + call stdlib_clartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp ) + b( k+1, k+1 ) = temp + b( k+1, k ) = czero + call stdlib_crot( k+2-istartm+1, a( istartm, k+1 ), 1, a( istartm,k ), 1, c, s ) + + call stdlib_crot( k-istartm+1, b( istartm, k+1 ), 1, b( istartm, k ),1, c, s ) + + if ( ilz ) then + call stdlib_crot( nz, z( 1, k+1-zstart+1 ), 1, z( 1, k-zstart+1 ),1, c, s ) + + end if + ! apply transformation from the left + call stdlib_clartg( a( k+1, k ), a( k+2, k ), c, s, temp ) + a( k+1, k ) = temp + a( k+2, k ) = czero + call stdlib_crot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s ) + call stdlib_crot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s ) + if ( ilq ) then + call stdlib_crot( nq, q( 1, k+1-qstart+1 ), 1, q( 1, k+2-qstart+1 ), 1, c, conjg(& + s ) ) + end if + end if + end subroutine stdlib_claqz1 + + !> CLAQZ3: Executes a single multishift QZ sweep + + pure subroutine stdlib_claqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, alpha,& + beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info ) + ! function arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & + nblock_desired, ldqc, ldzc + complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), qc( & + ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * ) + integer(ilp), intent( out ) :: info + + + ! local scalars + integer(ilp) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & + ishift, nblock, npos + real(sp) :: safmin, safmax, c, scale + complex(sp) :: temp, temp2, temp3, s + info = 0 + if ( nblock_desired < nshifts+1 ) then + info = -8 + end if + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = n*nblock_desired + return + else if ( lwork < n*nblock_desired ) then + info = -25 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CLAQZ3', -info ) + return + end if + ! executable statements + ! get machine constants + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_slabad( safmin, safmax ) + if ( ilo >= ihi ) then + return + end if + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + ns = nshifts + npos = max( nblock_desired-ns, 1 ) + ! the following block introduces the shifts and chases + ! them down one by one just enough to make space for + ! the other shifts. the near-the-diagonal block is + ! of size (ns+1) x ns. + call stdlib_claset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc ) + call stdlib_claset( 'FULL', ns, ns, czero, cone, zc, ldzc ) + do i = 1, ns + ! introduce the shift + scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) ) + if( scale >= safmin .and. scale <= safmax ) then + alpha( i ) = alpha( i )/scale + beta( i ) = beta( i )/scale + end if + temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo ) + temp3 = beta( i )*a( ilo+1, ilo ) + if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then + temp2 = cone + temp3 = czero + end if + call stdlib_clartg( temp2, temp3, c, s, temp ) + call stdlib_crot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s ) + call stdlib_crot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s ) + call stdlib_crot( ns+1, qc( 1, 1 ), 1, qc( 1, 2 ), 1, c, conjg( s ) ) + ! chase the shift down + do j = 1, ns-i + call stdlib_claqz1( .true., .true., j, 1, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & + ilo, ilo ), ldb, ns+1, 1, qc,ldqc, ns, 1, zc, ldzc ) + end do + end do + ! update the rest of the pencil + ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) + ! from the left with qc(1:ns+1,1:ns+1)' + sheight = ns+1 + swidth = istopm-( ilo+ns )+1 + if ( swidth > 0 ) then + call stdlib_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+& + ns ), lda, czero, work, sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) + + call stdlib_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+& + ns ), ldb, czero, work, sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) + + end if + if ( ilq ) then + call stdlib_cgemm( 'N', 'N', n, sheight, sheight, cone, q( 1, ilo ),ldq, qc, ldqc, & + czero, work, n ) + call stdlib_clacpy( 'ALL', n, sheight, work, n, q( 1, ilo ), ldq ) + end if + ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) + ! from the right with zc(1:ns,1:ns) + sheight = ilo-1-istartm+1 + swidth = ns + if ( sheight > 0 ) then + call stdlib_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, & + zc, ldzc, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) + + call stdlib_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, & + zc, ldzc, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) + + end if + if ( ilz ) then + call stdlib_cgemm( 'N', 'N', n, swidth, swidth, cone, z( 1, ilo ),ldz, zc, ldzc, & + czero, work, n ) + call stdlib_clacpy( 'ALL', n, swidth, work, n, z( 1, ilo ), ldz ) + end if + ! the following block chases the shifts down to the bottom + ! right block. if possible, a shift is moved down npos + ! positions at a time + k = ilo + do while ( k < ihi-ns ) + np = min( ihi-ns-k, npos ) + ! size of the near-the-diagonal block + nblock = ns+np + ! istartb points to the first row we will be updating + istartb = k+1 + ! istopb points to the last column we will be updating + istopb = k+nblock-1 + call stdlib_claset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc ) + call stdlib_claset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc ) + ! near the diagonal shift chase + do i = ns-1, 0, -1 + do j = 0, np-1 + ! move down the block with index k+i+j, updating + ! the (ns+np x ns+np) block: + ! (k:k+ns+np,k:k+ns+np-1) + call stdlib_claqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, & + ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(k+1:k+ns+np, k+ns+np:istopm) and + ! b(k+1:k+ns+np, k+ns+np:istopm) + ! from the left with qc(1:ns+np,1:ns+np)' + sheight = ns+np + swidth = istopm-( k+ns+np )+1 + if ( swidth > 0 ) then + call stdlib_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+& + ns+np ), lda, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & + ) + call stdlib_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+& + ns+np ), ldb, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & + ) + end if + if ( ilq ) then + call stdlib_cgemm( 'N', 'N', n, nblock, nblock, cone, q( 1, k+1 ),ldq, qc, ldqc, & + czero, work, n ) + call stdlib_clacpy( 'ALL', n, nblock, work, n, q( 1, k+1 ), ldq ) + end if + ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) + ! from the right with zc(1:ns+np,1:ns+np) + sheight = k-istartm+1 + swidth = nblock + if ( sheight > 0 ) then + call stdlib_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, & + zc, ldzc, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) + + call stdlib_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, & + zc, ldzc, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) + + end if + if ( ilz ) then + call stdlib_cgemm( 'N', 'N', n, nblock, nblock, cone, z( 1, k ),ldz, zc, ldzc, & + czero, work, n ) + call stdlib_clacpy( 'ALL', n, nblock, work, n, z( 1, k ), ldz ) + end if + k = k+np + end do + ! the following block removes the shifts from the bottom right corner + ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). + call stdlib_claset( 'FULL', ns, ns, czero, cone, qc, ldqc ) + call stdlib_claset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc ) + ! istartb points to the first row we will be updating + istartb = ihi-ns+1 + ! istopb points to the last column we will be updating + istopb = ihi + do i = 1, ns + ! chase the shift down to the bottom right corner + do ishift = ihi-i, ihi-1 + call stdlib_claqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & + ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(ihi-ns+1:ihi, ihi+1:istopm) + ! from the left with qc(1:ns,1:ns)' + sheight = ns + swidth = istopm-( ihi+1 )+1 + if ( swidth > 0 ) then + call stdlib_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, & + ihi+1 ), lda, czero, work, sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & + ) + call stdlib_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, & + ihi+1 ), ldb, czero, work, sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & + ) + end if + if ( ilq ) then + call stdlib_cgemm( 'N', 'N', n, ns, ns, cone, q( 1, ihi-ns+1 ), ldq,qc, ldqc, czero,& + work, n ) + call stdlib_clacpy( 'ALL', n, ns, work, n, q( 1, ihi-ns+1 ), ldq ) + end if + ! update a(istartm:ihi-ns,ihi-ns:ihi) + ! from the right with zc(1:ns+1,1:ns+1) + sheight = ihi-ns-istartm+1 + swidth = ns+1 + if ( sheight > 0 ) then + call stdlib_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), & + lda, zc, ldzc, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & + ) + call stdlib_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), & + ldb, zc, ldzc, czero, work,sheight ) + call stdlib_clacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & + ) + end if + if ( ilz ) then + call stdlib_cgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1, ihi-ns ), ldz,zc, ldzc, & + czero, work, n ) + call stdlib_clacpy( 'ALL', n, ns+1, work, n, z( 1, ihi-ns ), ldz ) + end if + end subroutine stdlib_claqz3 + + !> CLARGV: generates a vector of complex plane rotations with real + !> cosines, determined by elements of the complex vectors x and y. + !> For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( r(i) ) + !> ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) + !> where c(i)**2 + ABS(s(i))**2 = 1 + !> The following conventions are used (these are the same as in CLARTG, + !> but differ from the BLAS1 routine CROTG): + !> If y(i)=0, then c(i)=1 and s(i)=0. + !> If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. + + pure subroutine stdlib_clargv( n, x, incx, y, incy, c, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(sp), intent(out) :: c(*) + complex(sp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + + ! Local Scalars + ! logical first + integer(ilp) :: count, i, ic, ix, iy, j + real(sp) :: cs, d, di, dr, eps, f2, f2s, g2, g2s, safmin, safmn2, safmx2, scale + complex(sp) :: f, ff, fs, g, gs, r, sn + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,int,log,max,real,sqrt + ! Statement Functions + real(sp) :: abs1, abssq + ! Save Statement + ! save first, safmx2, safmin, safmn2 + ! Data Statements + ! data first / .true. / + ! Statement Function Definitions + abs1( ff ) = max( abs( real( ff,KIND=sp) ), abs( aimag( ff ) ) ) + abssq( ff ) = real( ff,KIND=sp)**2 + aimag( ff )**2 + ! Executable Statements + ! if( first ) then + ! first = .false. + safmin = stdlib_slamch( 'S' ) + eps = stdlib_slamch( 'E' ) + safmn2 = stdlib_slamch( 'B' )**int( log( safmin / eps ) /log( stdlib_slamch( 'B' ) )& + / two,KIND=ilp) + safmx2 = one / safmn2 + ! end if + ix = 1 + iy = 1 + ic = 1 + loop_60: do i = 1, n + f = x( ix ) + g = y( iy ) + ! use identical algorithm as in stdlib_clartg + scale = max( abs1( f ), abs1( g ) ) + fs = f + gs = g + count = 0 + if( scale>=safmx2 ) then + 10 continue + count = count + 1 + fs = fs*safmn2 + gs = gs*safmn2 + scale = scale*safmn2 + if( scale>=safmx2 .and. count < 20 )go to 10 + else if( scale<=safmn2 ) then + if( g==czero ) then + cs = one + sn = czero + r = f + go to 50 + end if + 20 continue + count = count - 1 + fs = fs*safmx2 + gs = gs*safmx2 + scale = scale*safmx2 + if( scale<=safmn2 )go to 20 + end if + f2 = abssq( fs ) + g2 = abssq( gs ) + if( f2<=max( g2, one )*safmin ) then + ! this is a rare case: f is very small. + if( f==czero ) then + cs = zero + r = stdlib_slapy2( real( g,KIND=sp), aimag( g ) ) + ! do complex/real division explicitly with two real + ! divisions + d = stdlib_slapy2( real( gs,KIND=sp), aimag( gs ) ) + sn = cmplx( real( gs,KIND=sp) / d, -aimag( gs ) / d,KIND=sp) + go to 50 + end if + f2s = stdlib_slapy2( real( fs,KIND=sp), aimag( fs ) ) + ! g2 and g2s are accurate + ! g2 is at least safmin, and g2s is at least safmn2 + g2s = sqrt( g2 ) + ! error in cs from underflow in f2s is at most + ! unfl / safmn2 .lt. sqrt(unfl*eps) .lt. eps + ! if max(g2,one)=g2, then f2 .lt. g2*safmin, + ! and so cs .lt. sqrt(safmin) + ! if max(g2,one)=one, then f2 .lt. safmin + ! and so cs .lt. sqrt(safmin)/safmn2 = sqrt(eps) + ! therefore, cs = f2s/g2s / sqrt( 1 + (f2s/g2s)**2 ) = f2s/g2s + cs = f2s / g2s + ! make sure abs(ff) = 1 + ! do complex/real division explicitly with 2 real divisions + if( abs1( f )>one ) then + d = stdlib_slapy2( real( f,KIND=sp), aimag( f ) ) + ff = cmplx( real( f,KIND=sp) / d, aimag( f ) / d,KIND=sp) + else + dr = safmx2*real( f,KIND=sp) + di = safmx2*aimag( f ) + d = stdlib_slapy2( dr, di ) + ff = cmplx( dr / d, di / d,KIND=sp) + end if + sn = ff*cmplx( real( gs,KIND=sp) / g2s, -aimag( gs ) / g2s,KIND=sp) + r = cs*f + sn*g + else + ! this is the most common case. + ! neither f2 nor f2/g2 are less than safmin + ! f2s cannot overflow, and it is accurate + f2s = sqrt( one+g2 / f2 ) + ! do the f2s(real)*fs(complex) multiply with two real + ! multiplies + r = cmplx( f2s*real( fs,KIND=sp), f2s*aimag( fs ),KIND=sp) + cs = one / f2s + d = f2 + g2 + ! do complex/real division explicitly with two real divisions + sn = cmplx( real( r,KIND=sp) / d, aimag( r ) / d,KIND=sp) + sn = sn*conjg( gs ) + if( count/=0 ) then + if( count>0 ) then + do j = 1, count + r = r*safmx2 + end do + else + do j = 1, -count + r = r*safmn2 + end do + end if + end if + end if + 50 continue + c( ic ) = cs + y( iy ) = sn + x( ix ) = r + ic = ic + incc + iy = iy + incy + ix = ix + incx + end do loop_60 + return + end subroutine stdlib_clargv + + !> CLARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by SLARRE. + + pure subroutine stdlib_clarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dol, dou, ldz, m, n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: minrgp, pivmin, vl, vu + real(sp), intent(inout) :: rtol1, rtol2 + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), indexw(*), isplit(*) + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), l(*), w(*), werr(*), wgap(*) + real(sp), intent(in) :: gers(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 10 + + + + ! Local Scalars + logical(lk) :: eskip, needbs, stp2ii, tryrqc, usedbs, usedrq + integer(ilp) :: done, i, ibegin, idone, iend, ii, iindc1, iindc2, iindr, iindwk, iinfo,& + im, in, indeig, indld, indlld, indwrk, isupmn, isupmx, iter, itmp1, j, jblk, k, & + miniwsize, minwsize, nclus, ndepth, negcnt, newcls, newfst, newftt, newlst, newsiz, & + offset, oldcls, oldfst, oldien, oldlst, oldncl, p, parity, q, wbegin, wend, windex, & + windmn, windpl, zfrom, zto, zusedl, zusedu, zusedw + integer(ilp) :: indin1, indin2 + real(sp) :: bstres, bstw, eps, fudge, gap, gaptol, gl, gu, lambda, left, lgap, mingma, & + nrminv, resid, rgap, right, rqcorr, rqtol, savgap, sgndef, sigma, spdiam, ssigma, tau, & + tmp, tol, ztz + ! Intrinsic Functions + intrinsic :: abs,real,max,min + intrinsic :: cmplx + ! Executable Statements + info = 0 + ! quick return if possible + if( (n<=0).or.(m<=0) ) then + return + end if + ! the first n entries of work are reserved for the eigenvalues + indld = n+1 + indlld= 2*n+1 + indin1 = 3*n + 1 + indin2 = 4*n + 1 + indwrk = 5*n + 1 + minwsize = 12 * n + do i= 1,minwsize + work( i ) = zero + end do + ! iwork(iindr+1:iindr+n) hold the twist indices r for the + ! factorization used to compute the fp vector + iindr = 0 + ! iwork(iindc1+1:iinc2+n) are used to store the clusters of the current + ! layer and the one above. + iindc1 = n + iindc2 = 2*n + iindwk = 3*n + 1 + miniwsize = 7 * n + do i= 1,miniwsize + iwork( i ) = 0 + end do + zusedl = 1 + if(dol>1) then + ! set lower bound for use of z + zusedl = dol-1 + endif + zusedu = m + if(doudou) ) then + ibegin = iend + 1 + wbegin = wend + 1 + cycle loop_170 + end if + ! find local spectral diameter of the block + gl = gers( 2*ibegin-1 ) + gu = gers( 2*ibegin ) + do i = ibegin+1 , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + ! oldien is the last index of the previous block + oldien = ibegin - 1 + ! calculate the size of the current block + in = iend - ibegin + 1 + ! the number of eigenvalues in the current block + im = wend - wbegin + 1 + ! this is for a 1x1 block + if( ibegin==iend ) then + done = done+1 + z( ibegin, wbegin ) = cmplx( one, zero,KIND=sp) + isuppz( 2*wbegin-1 ) = ibegin + isuppz( 2*wbegin ) = ibegin + w( wbegin ) = w( wbegin ) + sigma + work( wbegin ) = w( wbegin ) + ibegin = iend + 1 + wbegin = wbegin + 1 + cycle loop_170 + end if + ! the desired (shifted) eigenvalues are stored in w(wbegin:wend) + ! note that these can be approximations, in this case, the corresp. + ! entries of werr give the size of the uncertainty interval. + ! the eigenvalue approximations will be refined when necessary as + ! high relative accuracy is required for the computation of the + ! corresponding eigenvectors. + call stdlib_scopy( im, w( wbegin ), 1,work( wbegin ), 1 ) + ! we store in w the eigenvalue approximations w.r.t. the original + ! matrix t. + do i=1,im + w(wbegin+i-1) = w(wbegin+i-1)+sigma + end do + ! ndepth is the current depth of the representation tree + ndepth = 0 + ! parity is either 1 or 0 + parity = 1 + ! nclus is the number of clusters for the next level of the + ! representation tree, we start with nclus = 1 for the root + nclus = 1 + iwork( iindc1+1 ) = 1 + iwork( iindc1+2 ) = im + ! idone is the number of eigenvectors already computed in the current + ! block + idone = 0 + ! loop while( idonem ) then + info = -2 + return + endif + ! breadth first processing of the current level of the representation + ! tree: oldncl = number of clusters on current level + oldncl = nclus + ! reset nclus to count the number of child clusters + nclus = 0 + parity = 1 - parity + if( parity==0 ) then + oldcls = iindc1 + newcls = iindc2 + else + oldcls = iindc2 + newcls = iindc1 + end if + ! process the clusters on the current level + loop_150: do i = 1, oldncl + j = oldcls + 2*i + ! oldfst, oldlst = first, last index of current cluster. + ! cluster indices start with 1 and are relative + ! to wbegin when accessing w, wgap, werr, z + oldfst = iwork( j-1 ) + oldlst = iwork( j ) + if( ndepth>0 ) then + ! retrieve relatively robust representation (rrr) of cluster + ! that has been computed at the previous level + ! the rrr is stored in z and overwritten once the eigenvectors + ! have been computed or when the cluster is refined + if((dol==1).and.(dou==m)) then + ! get representation from location of the leftmost evalue + ! of the cluster + j = wbegin + oldfst - 1 + else + if(wbegin+oldfst-1dou) then + ! get representation from the right end of z array + j = dou + else + j = wbegin + oldfst - 1 + endif + endif + do k = 1, in - 1 + d( ibegin+k-1 ) = real( z( ibegin+k-1,j ),KIND=sp) + l( ibegin+k-1 ) = real( z( ibegin+k-1,j+1 ),KIND=sp) + end do + d( iend ) = real( z( iend, j ),KIND=sp) + sigma = real( z( iend, j+1 ),KIND=sp) + ! set the corresponding entries in z to zero + call stdlib_claset( 'FULL', in, 2, czero, czero,z( ibegin, j), ldz ) + + end if + ! compute dl and dll of current rrr + do j = ibegin, iend-1 + tmp = d( j )*l( j ) + work( indld-1+j ) = tmp + work( indlld-1+j ) = tmp*l( j ) + end do + if( ndepth>0 ) then + ! p and q are index of the first and last eigenvalue to compute + ! within the current block + p = indexw( wbegin-1+oldfst ) + q = indexw( wbegin-1+oldlst ) + ! offset for the arrays work, wgap and werr, i.e., the p-offset + ! through the q-offset elements of these arrays are to be used. + ! offset = p-oldfst + offset = indexw( wbegin ) - 1 + ! perform limited bisection (if necessary) to get approximate + ! eigenvalues to the precision needed. + call stdlib_slarrb( in, d( ibegin ),work(indlld+ibegin-1),p, q, rtol1, & + rtol2, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ), iwork(& + iindwk ),pivmin, spdiam, in, iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + ! we also recompute the extremal gaps. w holds all eigenvalues + ! of the unshifted matrix and must be used for computation + ! of wgap, the entries of work might stem from rrrs with + ! different shifts. the gaps from wbegin-1+oldfst to + ! wbegin-1+oldlst are correctly computed in stdlib_slarrb. + ! however, we only allow the gaps to become greater since + ! this is what should happen when we decrease werr + if( oldfst>1) then + wgap( wbegin+oldfst-2 ) =max(wgap(wbegin+oldfst-2),w(wbegin+oldfst-1)-& + werr(wbegin+oldfst-1)- w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) ) + + endif + if( wbegin + oldlst -1 < wend ) then + wgap( wbegin+oldlst-1 ) =max(wgap(wbegin+oldlst-1),w(wbegin+oldlst)-& + werr(wbegin+oldlst)- w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) ) + endif + ! each time the eigenvalues in work get refined, we store + ! the newly found approximation with all shifts applied in w + do j=oldfst,oldlst + w(wbegin+j-1) = work(wbegin+j-1)+sigma + end do + end if + ! process the current node. + newfst = oldfst + loop_140: do j = oldfst, oldlst + if( j==oldlst ) then + ! we are at the right end of the cluster, this is also the + ! boundary of the child cluster + newlst = j + else if ( wgap( wbegin + j -1)>=minrgp* abs( work(wbegin + j -1) ) ) & + then + ! the right relative gap is big enough, the child cluster + ! (newfst,..,newlst) is well separated from the following + newlst = j + else + ! inside a child cluster, the relative gap is not + ! big enough. + cycle loop_140 + end if + ! compute size of child cluster found + newsiz = newlst - newfst + 1 + ! newftt is the place in z where the new rrr or the computed + ! eigenvector is to be stored + if((dol==1).and.(dou==m)) then + ! store representation at location of the leftmost evalue + ! of the cluster + newftt = wbegin + newfst - 1 + else + if(wbegin+newfst-1dou) then + ! store representation at the right end of z array + newftt = dou + else + newftt = wbegin + newfst - 1 + endif + endif + if( newsiz>1) then + ! current child is not a singleton but a cluster. + ! compute and store new representation of child. + ! compute left and right cluster gap. + ! lgap and rgap are not computed from work because + ! the eigenvalue approximations may stem from rrrs + ! different shifts. however, w hold all eigenvalues + ! of the unshifted matrix. still, the entries in wgap + ! have to be computed from work since the entries + ! in w might be of the same order so that gaps are not + ! exhibited correctly for very close eigenvalues. + if( newfst==1 ) then + lgap = max( zero,w(wbegin)-werr(wbegin) - vl ) + else + lgap = wgap( wbegin+newfst-2 ) + endif + rgap = wgap( wbegin+newlst-1 ) + ! compute left- and rightmost eigenvalue of child + ! to high precision in order to shift as close + ! as possible and obtain as large relative gaps + ! as possible + do k =1,2 + if(k==1) then + p = indexw( wbegin-1+newfst ) + else + p = indexw( wbegin-1+newlst ) + endif + offset = indexw( wbegin ) - 1 + call stdlib_slarrb( in, d(ibegin),work( indlld+ibegin-1 ),p,p,rqtol, & + rqtol, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ),& + iwork( iindwk ), pivmin, spdiam,in, iinfo ) + end do + if((wbegin+newlst-1dou)) then + ! if the cluster contains no desired eigenvalues + ! skip the computation of that branch of the rep. tree + ! we could skip before the refinement of the extremal + ! eigenvalues of the child, but then the representation + ! tree could be different from the one when nothing is + ! skipped. for this reason we skip at this place. + idone = idone + newlst - newfst + 1 + goto 139 + endif + ! compute rrr of child cluster. + ! note that the new rrr is stored in z + ! stdlib_slarrf needs lwork = 2*n + call stdlib_slarrf( in, d( ibegin ), l( ibegin ),work(indld+ibegin-1),& + newfst, newlst, work(wbegin),wgap(wbegin), werr(wbegin),spdiam, lgap, & + rgap, pivmin, tau,work( indin1 ), work( indin2 ),work( indwrk ), iinfo ) + + ! in the complex case, stdlib_slarrf cannot write + ! the new rrr directly into z and needs an intermediate + ! workspace + do k = 1, in-1 + z( ibegin+k-1, newftt ) =cmplx( work( indin1+k-1 ), zero,KIND=sp) + + z( ibegin+k-1, newftt+1 ) =cmplx( work( indin2+k-1 ), zero,KIND=sp) + + end do + z( iend, newftt ) =cmplx( work( indin1+in-1 ), zero,KIND=sp) + if( iinfo==0 ) then + ! a new rrr for the cluster was found by stdlib_slarrf + ! update shift and store it + ssigma = sigma + tau + z( iend, newftt+1 ) = cmplx( ssigma, zero,KIND=sp) + ! work() are the midpoints and werr() the semi-width + ! note that the entries in w are unchanged. + do k = newfst, newlst + fudge =three*eps*abs(work(wbegin+k-1)) + work( wbegin + k - 1 ) =work( wbegin + k - 1) - tau + fudge = fudge +four*eps*abs(work(wbegin+k-1)) + ! fudge errors + werr( wbegin + k - 1 ) =werr( wbegin + k - 1 ) + fudge + ! gaps are not fudged. provided that werr is small + ! when eigenvalues are close, a zero gap indicates + ! that a new representation is needed for resolving + ! the cluster. a fudge could lead to a wrong decision + ! of judging eigenvalues 'separated' which in + ! reality are not. this could have a negative impact + ! on the orthogonality of the computed eigenvectors. + end do + nclus = nclus + 1 + k = newcls + 2*nclus + iwork( k-1 ) = newfst + iwork( k ) = newlst + else + info = -2 + return + endif + else + ! compute eigenvector of singleton + iter = 0 + tol = four * log(real(in,KIND=sp)) * eps + k = newfst + windex = wbegin + k - 1 + windmn = max(windex - 1,1) + windpl = min(windex + 1,m) + lambda = work( windex ) + done = done + 1 + ! check if eigenvector computation is to be skipped + if((windexdou)) then + eskip = .true. + goto 125 + else + eskip = .false. + endif + left = work( windex ) - werr( windex ) + right = work( windex ) + werr( windex ) + indeig = indexw( windex ) + ! note that since we compute the eigenpairs for a child, + ! all eigenvalue approximations are w.r.t the same shift. + ! in this case, the entries in work should be used for + ! computing the gaps since they exhibit even very small + ! differences in the eigenvalues, as opposed to the + ! entries in w which might "look" the same. + if( k == 1) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vl, the formula + ! lgap = max( zero, (sigma - vl) + lambda ) + ! can lead to an overestimation of the left gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small left gap. + lgap = eps*max(abs(left),abs(right)) + else + lgap = wgap(windmn) + endif + if( k == im) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vu, the formula + ! can lead to an overestimation of the right gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small right gap. + rgap = eps*max(abs(left),abs(right)) + else + rgap = wgap(windex) + endif + gap = min( lgap, rgap ) + if(( k == 1).or.(k == im)) then + ! the eigenvector support can become wrong + ! because significant entries could be cut off due to a + ! large gaptol parameter in lar1v. prevent this. + gaptol = zero + else + gaptol = gap * eps + endif + isupmn = in + isupmx = 1 + ! update wgap so that it holds the minimum gap + ! to the left or the right. this is crucial in the + ! case where bisection is used to ensure that the + ! eigenvalue is refined up to the required precision. + ! the correct value is restored afterwards. + savgap = wgap(windex) + wgap(windex) = gap + ! we want to use the rayleigh quotient correction + ! as often as possible since it converges quadratically + ! when we are close enough to the desired eigenvalue. + ! however, the rayleigh quotient can have the wrong sign + ! and lead us away from the desired eigenvalue. in this + ! case, the best we can do is to use bisection. + usedbs = .false. + usedrq = .false. + ! bisection is initially turned off unless it is forced + needbs = .not.tryrqc + 120 continue + ! check if bisection should be used to refine eigenvalue + if(needbs) then + ! take the bisection as new iterate + usedbs = .true. + itmp1 = iwork( iindr+windex ) + offset = indexw( wbegin ) - 1 + call stdlib_slarrb( in, d(ibegin),work(indlld+ibegin-1),indeig,& + indeig,zero, two*eps, offset,work(wbegin),wgap(wbegin),werr(wbegin),& + work( indwrk ),iwork( iindwk ), pivmin, spdiam,itmp1, iinfo ) + if( iinfo/=0 ) then + info = -3 + return + endif + lambda = work( windex ) + ! reset twist index from inaccurate lambda to + ! force computation of true mingma + iwork( iindr+windex ) = 0 + endif + ! given lambda, compute the eigenvector. + call stdlib_clar1v( in, 1, in, lambda, d( ibegin ),l( ibegin ), work(& + indld+ibegin-1),work(indlld+ibegin-1),pivmin, gaptol, z( ibegin, windex & + ),.not.usedbs, negcnt, ztz, mingma,iwork( iindr+windex ), isuppz( & + 2*windex-1 ),nrminv, resid, rqcorr, work( indwrk ) ) + if(iter == 0) then + bstres = resid + bstw = lambda + elseif(residtol*gap .and. abs( rqcorr )>rqtol*abs( lambda ) .and. .not. & + usedbs)then + ! we need to check that the rqcorr update doesn't + ! move the eigenvalue away from the desired one and + ! towards a neighbor. -> protection with bisection + if(indeig<=negcnt) then + ! the wanted eigenvalue lies to the left + sgndef = -one + else + ! the wanted eigenvalue lies to the right + sgndef = one + endif + ! we only use the rqcorr if it improves the + ! the iterate reasonably. + if( ( rqcorr*sgndef>=zero ).and.( lambda + rqcorr<= right).and.( & + lambda + rqcorr>= left)) then + usedrq = .true. + ! store new midpoint of bisection interval in work + if(sgndef==one) then + ! the current lambda is on the left of the true + ! eigenvalue + left = lambda + ! we prefer to assume that the error estimate + ! is correct. we could make the interval not + ! as a bracket but to be modified if the rqcorr + ! chooses to. in this case, the right side should + ! be modified as follows: + ! right = max(right, lambda + rqcorr) + else + ! the current lambda is on the right of the true + ! eigenvalue + right = lambda + ! see comment about assuming the error estimate is + ! correct above. + ! left = min(left, lambda + rqcorr) + endif + work( windex ) =half * (right + left) + ! take rqcorr since it has the correct sign and + ! improves the iterate reasonably + lambda = lambda + rqcorr + ! update width of error interval + werr( windex ) =half * (right-left) + else + needbs = .true. + endif + if(right-leftzto) then + do ii = zto+1,isupmx + z( ii, windex ) = zero + end do + endif + call stdlib_csscal( zto-zfrom+1, nrminv,z( zfrom, windex ), 1 ) + 125 continue + ! update w + w( windex ) = lambda+sigma + ! recompute the gaps on the left and right + ! but only allow them to become larger and not + ! smaller (which can only happen through "bad" + ! cancellation and doesn't reflect the theory + ! where the initial gaps are underestimated due + ! to werr being too crude.) + if(.not.eskip) then + if( k>1) then + wgap( windmn ) = max( wgap(windmn),w(windex)-werr(windex)- w(& + windmn)-werr(windmn) ) + endif + if( windex CLATDF: computes the contribution to the reciprocal Dif-estimate + !> by solving for x in Z * x = b, where b is chosen such that the norm + !> of x is as large as possible. It is assumed that LU decomposition + !> of Z has been computed by CGETC2. On entry RHS = f holds the + !> contribution from earlier solved sub-systems, and on return RHS = x. + !> The factorization of Z returned by CGETC2 has the form + !> Z = P * L * U * Q, where P and Q are permutation matrices. L is lower + !> triangular with unit diagonal elements and U is upper triangular. + + pure subroutine stdlib_clatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ijob, ldz, n + real(sp), intent(inout) :: rdscal, rdsum + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + complex(sp), intent(inout) :: rhs(*), z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxdim = 2 + + + + ! Local Scalars + integer(ilp) :: i, info, j, k + real(sp) :: rtemp, scale, sminu, splus + complex(sp) :: bm, bp, pmone, temp + ! Local Arrays + real(sp) :: rwork(maxdim) + complex(sp) :: work(4*maxdim), xm(maxdim), xp(maxdim) + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( ijob/=2 ) then + ! apply permutations ipiv to rhs + call stdlib_claswp( 1, rhs, ldz, 1, n-1, ipiv, 1 ) + ! solve for l-part choosing rhs either to +1 or -1. + pmone = -cone + loop_10: do j = 1, n - 1 + bp = rhs( j ) + cone + bm = rhs( j ) - cone + splus = one + ! lockahead for l- part rhs(1:n-1) = +-1 + ! splus and smin computed more efficiently than in bsolve[1]. + splus = splus + real( stdlib_cdotc( n-j, z( j+1, j ), 1, z( j+1,j ), 1 ),KIND=sp) + + sminu = real( stdlib_cdotc( n-j, z( j+1, j ), 1, rhs( j+1 ), 1 ),KIND=sp) + splus = splus*real( rhs( j ),KIND=sp) + if( splus>sminu ) then + rhs( j ) = bp + else if( sminu>splus ) then + rhs( j ) = bm + else + ! in this case the updating sums are equal and we can + ! choose rhs(j) +1 or -1. the first time this happens we + ! choose -1, thereafter +1. this is a simple way to get + ! good estimates of matrices like byers well-known example + ! (see [1]). (not done in bsolve.) + rhs( j ) = rhs( j ) + pmone + pmone = cone + end if + ! compute the remaining r.h.s. + temp = -rhs( j ) + call stdlib_caxpy( n-j, temp, z( j+1, j ), 1, rhs( j+1 ), 1 ) + end do loop_10 + ! solve for u- part, lockahead for rhs(n) = +-1. this is not done + ! in bsolve and will hopefully give us a better estimate because + ! any ill-conditioning of the original matrix is transferred to u + ! and not to l. u(n, n) is an approximation to sigma_min(lu). + call stdlib_ccopy( n-1, rhs, 1, work, 1 ) + work( n ) = rhs( n ) + cone + rhs( n ) = rhs( n ) - cone + splus = zero + sminu = zero + do i = n, 1, -1 + temp = cone / z( i, i ) + work( i ) = work( i )*temp + rhs( i ) = rhs( i )*temp + do k = i + 1, n + work( i ) = work( i ) - work( k )*( z( i, k )*temp ) + rhs( i ) = rhs( i ) - rhs( k )*( z( i, k )*temp ) + end do + splus = splus + abs( work( i ) ) + sminu = sminu + abs( rhs( i ) ) + end do + if( splus>sminu )call stdlib_ccopy( n, work, 1, rhs, 1 ) + ! apply the permutations jpiv to the computed solution (rhs) + call stdlib_claswp( 1, rhs, ldz, 1, n-1, jpiv, -1 ) + ! compute the sum of squares + call stdlib_classq( n, rhs, 1, rdscal, rdsum ) + return + end if + ! entry ijob = 2 + ! compute approximate nullvector xm of z + call stdlib_cgecon( 'I', n, z, ldz, one, rtemp, work, rwork, info ) + call stdlib_ccopy( n, work( n+1 ), 1, xm, 1 ) + ! compute rhs + call stdlib_claswp( 1, xm, ldz, 1, n-1, ipiv, -1 ) + temp = cone / sqrt( stdlib_cdotc( n, xm, 1, xm, 1 ) ) + call stdlib_cscal( n, temp, xm, 1 ) + call stdlib_ccopy( n, xm, 1, xp, 1 ) + call stdlib_caxpy( n, cone, rhs, 1, xp, 1 ) + call stdlib_caxpy( n, -cone, xm, 1, rhs, 1 ) + call stdlib_cgesc2( n, z, ldz, rhs, ipiv, jpiv, scale ) + call stdlib_cgesc2( n, z, ldz, xp, ipiv, jpiv, scale ) + if( stdlib_scasum( n, xp, 1 )>stdlib_scasum( n, rhs, 1 ) )call stdlib_ccopy( n, xp, 1, & + rhs, 1 ) + ! compute the sum of squares + call stdlib_classq( n, rhs, 1, rdscal, rdsum ) + return + end subroutine stdlib_clatdf + + !> CLAUNHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine CLAUNHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + + pure subroutine stdlib_claunhr_col_getrfnp( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_claunhr_col_getrfnp2( m, n, a, lda, d, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks. + call stdlib_claunhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo ) + + if( j+jb<=n ) then + ! compute block row of u. + call stdlib_ctrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,& + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_claunhr_col_getrfnp + + !> CPBCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite band matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> CPBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_cpbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,rwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(sp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab CPBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and banded, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_cpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, l, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_cpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_cpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_cpbrfs + + !> CPBTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_cpbtrf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 32 + integer(ilp), parameter :: ldwork = nbmax+1 + + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ib, ii, j, jj, nb + ! Local Arrays + complex(sp) :: work(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( .not.stdlib_lsame( uplo, 'U' ) ) .and.( .not.stdlib_lsame( uplo, 'L' ) ) ) & + then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldabkd ) then + ! use unblocked code + call stdlib_cpbtf2( uplo, n, kd, ab, ldab, info ) + else + ! use blocked code + if( stdlib_lsame( uplo, 'U' ) ) then + ! compute the cholesky factorization of a hermitian band + ! matrix, given the upper triangle of the matrix in band + ! storage. + ! zero the upper triangle of the work array. + do j = 1, nb + do i = 1, j - 1 + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_70: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_cpotf2( uplo, ib, ab( kd+1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 a12 a13 + ! a22 a23 + ! a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a12, a22 and + ! a23 are empty if ib = kd. the upper triangle of a13 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a12 + call stdlib_ctrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', & + ib, i2, cone,ab( kd+1, i ), ldab-1,ab( kd+1-ib, i+ib ), ldab-1 ) + ! update a22 + call stdlib_cherk( 'UPPER', 'CONJUGATE TRANSPOSE', i2, ib,-one, ab( kd+& + 1-ib, i+ib ), ldab-1, one,ab( kd+1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the lower triangle of a13 into the work array. + do jj = 1, i3 + do ii = jj, ib + work( ii, jj ) = ab( ii-jj+1, jj+i+kd-1 ) + end do + end do + ! update a13 (in the work array). + call stdlib_ctrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', & + ib, i3, cone,ab( kd+1, i ), ldab-1, work, ldwork ) + ! update a23 + if( i2>0 )call stdlib_cgemm( 'CONJUGATE TRANSPOSE','NO TRANSPOSE', i2, & + i3, ib, -cone,ab( kd+1-ib, i+ib ), ldab-1, work,ldwork, cone, ab( 1+ib, & + i+kd ),ldab-1 ) + ! update a33 + call stdlib_cherk( 'UPPER', 'CONJUGATE TRANSPOSE', i3, ib,-one, work, & + ldwork, one,ab( kd+1, i+kd ), ldab-1 ) + ! copy the lower triangle of a13 back into place. + do jj = 1, i3 + do ii = jj, ib + ab( ii-jj+1, jj+i+kd-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_70 + else + ! compute the cholesky factorization of a hermitian band + ! matrix, given the lower triangle of the matrix in band + ! storage. + ! zero the lower triangle of the work array. + do j = 1, nb + do i = j + 1, nb + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_140: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_cpotf2( uplo, ib, ab( 1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 + ! a21 a22 + ! a31 a32 a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a21, a22 and + ! a32 are empty if ib = kd. the lower triangle of a31 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a21 + call stdlib_ctrsm( 'RIGHT', 'LOWER','CONJUGATE TRANSPOSE', 'NON-UNIT', & + i2,ib, cone, ab( 1, i ), ldab-1,ab( 1+ib, i ), ldab-1 ) + ! update a22 + call stdlib_cherk( 'LOWER', 'NO TRANSPOSE', i2, ib, -one,ab( 1+ib, i ), & + ldab-1, one,ab( 1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the upper triangle of a31 into the work array. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + work( ii, jj ) = ab( kd+1-jj+ii, jj+i-1 ) + end do + end do + ! update a31 (in the work array). + call stdlib_ctrsm( 'RIGHT', 'LOWER','CONJUGATE TRANSPOSE', 'NON-UNIT', & + i3,ib, cone, ab( 1, i ), ldab-1, work,ldwork ) + ! update a32 + if( i2>0 )call stdlib_cgemm( 'NO TRANSPOSE','CONJUGATE TRANSPOSE', i3, & + i2, ib,-cone, work, ldwork, ab( 1+ib, i ),ldab-1, cone, ab( 1+kd-ib, i+& + ib ),ldab-1 ) + ! update a33 + call stdlib_cherk( 'LOWER', 'NO TRANSPOSE', i3, ib, -one,work, ldwork, & + one, ab( 1, i+kd ),ldab-1 ) + ! copy the upper triangle of a31 back into place. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + ab( kd+1-jj+ii, jj+i-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_140 + end if + end if + return + 150 continue + return + end subroutine stdlib_cpbtrf + + !> CPFTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by CPFTRF. + + pure subroutine stdlib_cpftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(sp), intent(in) :: a(0:*) + complex(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, normaltransr + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb CPOCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite matrix using the + !> Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_cpocon( uplo, n, a, lda, anorm, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(sp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda CPORFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite, + !> and provides error bounds and backward error estimates for the + !> solution. + + pure subroutine stdlib_cporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ==================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cpotrs( uplo, n, 1, af, ldaf, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_cpotrs( uplo, n, 1, af, ldaf, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_cpotrs( uplo, n, 1, af, ldaf, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_cporfs + + !> CPOTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_cpotrf( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code. + call stdlib_cpotrf2( uplo, n, a, lda, info ) + else + ! use blocked code. + if( upper ) then + ! compute the cholesky factorization a = u**h *u. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_cherk( 'UPPER', 'CONJUGATE TRANSPOSE', jb, j-1,-one, a( 1, j ), & + lda, one, a( j, j ), lda ) + call stdlib_cpotrf2( 'UPPER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block row. + call stdlib_cgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', jb,n-j-jb+1, j-1,& + -cone, a( 1, j ), lda,a( 1, j+jb ), lda, cone, a( j, j+jb ),lda ) + call stdlib_ctrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', jb, & + n-j-jb+1, cone, a( j, j ),lda, a( j, j+jb ), lda ) + end if + end do + else + ! compute the cholesky factorization a = l*l**h. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_cherk( 'LOWER', 'NO TRANSPOSE', jb, j-1, -one,a( j, 1 ), lda, one,& + a( j, j ), lda ) + call stdlib_cpotrf2( 'LOWER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block column. + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',n-j-jb+1, jb, j-1,& + -cone, a( j+jb, 1 ),lda, a( j, 1 ), lda, cone, a( j+jb, j ),lda ) + call stdlib_ctrsm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','NON-UNIT', n-j-& + jb+1, jb, cone, a( j, j ),lda, a( j+jb, j ), lda ) + end if + end do + end if + end if + go to 40 + 30 continue + info = info + j - 1 + 40 continue + return + end subroutine stdlib_cpotrf + + !> CPOTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by CPOTRF. + + pure subroutine stdlib_cpotri( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 )return + ! form inv(u) * inv(u)**h or inv(l)**h * inv(l). + call stdlib_clauum( uplo, n, a, lda, info ) + return + end subroutine stdlib_cpotri + + !> CPPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite packed matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> CPPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_cppcon( uplo, n, ap, anorm, rcond, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(sp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm CPPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_cpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ==================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cpptrs( uplo, n, 1, afp, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_cpptrs( uplo, n, 1, afp, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_cpptrs( uplo, n, 1, afp, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_cpprfs + + !> CPPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_cppsv( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(sp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb CPPSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_cppsvx( fact, uplo, n, nrhs, ap, afp, equed, s, b, ldb,x, ldx, rcond, ferr,& + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(inout) :: s(*) + complex(sp), intent(inout) :: afp(*), ap(*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(sp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( stdlib_lsame( fact, 'F' ) .and. .not.( rcequ .or. stdlib_lsame( equed, 'N' ) )& + ) then + info = -7 + else + if( rcequ ) then + smin = bignum + smax = zero + do j = 1, n + smin = min( smin, s( j ) ) + smax = max( smax, s( j ) ) + end do + if( smin<=zero ) then + info = -8 + else if( n>0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clanhp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_cppcon( uplo, n, afp, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cpptrs( uplo, n, nrhs, afp, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_cpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr, berr,work, rwork, & + info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CPPTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by CPPTRF. + + pure subroutine stdlib_cpptri( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj, jjn + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CPPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_ctptri( uplo, 'NON-UNIT', n, ap, info ) + if( info>0 )return + if( upper ) then + ! compute the product inv(u) * inv(u)**h. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + if( j>1 )call stdlib_chpr( 'UPPER', j-1, one, ap( jc ), 1, ap ) + ajj = real( ap( jj ),KIND=sp) + call stdlib_csscal( j, ajj, ap( jc ), 1 ) + end do + else + ! compute the product inv(l)**h * inv(l). + jj = 1 + do j = 1, n + jjn = jj + n - j + 1 + ap( jj ) = real( stdlib_cdotc( n-j+1, ap( jj ), 1, ap( jj ), 1 ),KIND=sp) + if( j CPTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric positive definite tridiagonal matrix by first factoring the + !> matrix using SPTTRF and then calling CBDSQR to compute the singular + !> values of the bidiagonal factor. + !> This routine computes the eigenvalues of the positive definite + !> tridiagonal matrix to high relative accuracy. This means that if the + !> eigenvalues range over many orders of magnitude in size, then the + !> small eigenvalues and corresponding eigenvectors will be computed + !> more accurately than, for example, with the standard QR method. + !> The eigenvectors of a full or band positive definite Hermitian matrix + !> can also be found if CHETRD, CHPTRD, or CHBTRD has been used to + !> reduce this matrix to tridiagonal form. (The reduction to + !> tridiagonal form, however, may preclude the possibility of obtaining + !> high relative accuracy in the small eigenvalues of the original + !> matrix, if these eigenvalues range over many orders of magnitude.) + + pure subroutine stdlib_cpteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(ldz,*) + ! ==================================================================== + + ! Local Arrays + complex(sp) :: c(1,1), vt(1,1) + ! Local Scalars + integer(ilp) :: i, icompz, nru + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldz0 )z( 1, 1 ) = cone + return + end if + if( icompz==2 )call stdlib_claset( 'FULL', n, n, czero, cone, z, ldz ) + ! call stdlib_spttrf to factor the matrix. + call stdlib_spttrf( n, d, e, info ) + if( info/=0 )return + do i = 1, n + d( i ) = sqrt( d( i ) ) + end do + do i = 1, n - 1 + e( i ) = e( i )*d( i ) + end do + ! call stdlib_cbdsqr to compute the singular values/vectors of the + ! bidiagonal factor. + if( icompz>0 ) then + nru = n + else + nru = 0 + end if + call stdlib_cbdsqr( 'LOWER', n, 0, nru, 0, d, e, vt, 1, z, ldz, c, 1,work, info ) + + ! square the singular values. + if( info==0 ) then + do i = 1, n + d( i ) = d( i )*d( i ) + end do + else + info = n + info + end if + return + end subroutine stdlib_cpteqr + + !> CPTTRS: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H*D*U or A = L*D*L**H computed by CPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + + pure subroutine stdlib_cpttrs( uplo, n, nrhs, d, e, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: d(*) + complex(sp), intent(inout) :: b(ldb,*) + complex(sp), intent(in) :: e(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: iuplo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments. + info = 0 + upper = ( uplo=='U' .or. uplo=='U' ) + if( .not.upper .and. .not.( uplo=='L' .or. uplo=='L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_cptts2( iuplo, n, nrhs, d, e, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_cptts2( iuplo, n, jb, d, e, b( 1, j ), ldb ) + end do + end if + return + end subroutine stdlib_cpttrs + + !> CSPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric packed matrix A using the + !> factorization A = U*D*U**T or A = L*D*L**T computed by CSPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_cspcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ip, kase + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm0 .and. ap( ip )==zero )return + ip = ip - i + end do + else + ! lower triangular storage: examine d from top to bottom. + ip = 1 + do i = 1, n + if( ipiv( i )>0 .and. ap( ip )==zero )return + ip = ip + n - i + 1 + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_csptrs( uplo, n, 1, ap, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_cspcon + + !> CSPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_csprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, work,& + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_csptrs( uplo, n, 1, afp, ipiv, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_csptrs( uplo, n, 1, afp, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_csptrs( uplo, n, 1, afp, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_csprfs + + !> CSPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is symmetric and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + + pure subroutine stdlib_cspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb CSPSVX: uses the diagonal pivoting factorization A = U*D*U**T or + !> A = L*D*L**T to compute the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix stored + !> in packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_cspsvx( fact, uplo, n, nrhs, ap, afp, ipiv, b, ldb, x,ldx, rcond, ferr, & + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(inout) :: afp(*) + complex(sp), intent(in) :: ap(*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clansp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_cspcon( uplo, n, afp, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_csptrs( uplo, n, nrhs, afp, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_csprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CSTEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.CSTEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + !> 2. LAPACK routines can be used to reduce a complex Hermitean matrix to + !> real symmetric tridiagonal form. + !> (Any complex Hermitean tridiagonal matrix has real values on its diagonal + !> and potentially complex numbers on its off-diagonals. By applying a + !> similarity transform with an appropriate diagonal matrix + !> diag(1,e^{i \phy_1}, ... , e^{i \phy_{n-1}}), the complex Hermitean + !> matrix can be transformed into a real symmetric matrix and complex + !> arithmetic can be entirely avoided.) + !> While the eigenvectors of the real symmetric tridiagonal matrix are real, + !> the eigenvectors of original complex Hermitean matrix have complex entries + !> in general. + !> Since LAPACK drivers overwrite the matrix data with the eigenvectors, + !> CSTEMR accepts complex workspace to facilitate interoperability + !> with CUNMTR or CUPMTR. + + pure subroutine stdlib_cstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il, iu, ldz, nzc, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: w(*), work(*) + complex(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: minrgp = 3.0e-3_sp + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, valeig, wantz, zquery + integer(ilp) :: i, ibegin, iend, ifirst, iil, iindbl, iindw, iindwk, iinfo, iinspl, & + iiu, ilast, in, indd, inde2, inderr, indgp, indgrs, indwrk, itmp, itmp2, j, jblk, jj, & + liwmin, lwmin, nsplit, nzcmin, offset, wbegin, wend + real(sp) :: bignum, cs, eps, pivmin, r1, r2, rmax, rmin, rtol1, rtol2, safmin, scale, & + smlnum, sn, thresh, tmp, tnrm, wl, wu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ).or.( liwork==-1 ) ) + zquery = ( nzc==-1 ) + ! stdlib_sstemr needs work of size 6*n, iwork of size 3*n. + ! in addition, stdlib_slarre needs work of size 6*n, iwork of size 5*n. + ! furthermore, stdlib_clarrv needs work of size 12*n, iwork of size 7*n. + if( wantz ) then + lwmin = 18*n + liwmin = 10*n + else + ! need less workspace if only the eigenvalues are wanted + lwmin = 12*n + liwmin = 8*n + endif + wl = zero + wu = zero + iil = 0 + iiu = 0 + nsplit = 0 + if( valeig ) then + ! we do not reference vl, vu in the cases range = 'i','a' + ! the interval (wl, wu] contains all the wanted eigenvalues. + ! it is either given by the user or computed in stdlib_slarre. + wl = vl + wu = vu + elseif( indeig ) then + ! we do not reference il, iu in the cases range = 'v','a' + iil = il + iiu = iu + endif + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( valeig .and. n>0 .and. wu<=wl ) then + info = -7 + else if( indeig .and. ( iil<1 .or. iil>n ) ) then + info = -8 + else if( indeig .and. ( iiun ) ) then + info = -9 + else if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz.and.(.not.zquery) ) then + z( 1, 1 ) = one + isuppz(1) = 1 + isuppz(2) = 1 + end if + return + end if + if( n==2 ) then + if( .not.wantz ) then + call stdlib_slae2( d(1), e(1), d(2), r1, r2 ) + else if( wantz.and.(.not.zquery) ) then + call stdlib_slaev2( d(1), e(1), d(2), r1, r2, cs, sn ) + end if + if( alleig.or.(valeig.and.(r2>wl).and.(r2<=wu)).or.(indeig.and.(iil==1)) ) & + then + m = m+1 + w( m ) = r2 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = -sn + z( 2, m ) = cs + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + if( alleig.or.(valeig.and.(r1>wl).and.(r1<=wu)).or.(indeig.and.(iiu==2)) ) & + then + m = m+1 + w( m ) = r1 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = cs + z( 2, m ) = sn + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + else + ! continue with general n + indgrs = 1 + inderr = 2*n + 1 + indgp = 3*n + 1 + indd = 4*n + 1 + inde2 = 5*n + 1 + indwrk = 6*n + 1 + iinspl = 1 + iindbl = n + 1 + iindw = 2*n + 1 + iindwk = 3*n + 1 + ! scale matrix to allowable range, if necessary. + ! the allowable range is related to the pivmin parameter; see the + ! comments in stdlib_slarrd. the preference for scaling small values + ! up is heuristic; we expect users' matrices not to be close to the + ! rmax threshold. + scale = one + tnrm = stdlib_slanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + scale = rmax / tnrm + end if + if( scale/=one ) then + call stdlib_sscal( n, scale, d, 1 ) + call stdlib_sscal( n-1, scale, e, 1 ) + tnrm = tnrm*scale + if( valeig ) then + ! if eigenvalues in interval have to be found, + ! scale (wl, wu] accordingly + wl = wl*scale + wu = wu*scale + endif + end if + ! compute the desired eigenvalues of the tridiagonal after splitting + ! into smaller subblocks if the corresponding off-diagonal elements + ! are small + ! thresh is the splitting parameter for stdlib_slarre + ! a negative thresh forces the old splitting criterion based on the + ! size of the off-diagonal. a positive thresh switches to splitting + ! which preserves relative accuracy. + if( tryrac ) then + ! test whether the matrix warrants the more expensive relative approach. + call stdlib_slarrr( n, d, e, iinfo ) + else + ! the user does not care about relative accurately eigenvalues + iinfo = -1 + endif + ! set the splitting criterion + if (iinfo==0) then + thresh = eps + else + thresh = -eps + ! relative accuracy is desired but t does not guarantee it + tryrac = .false. + endif + if( tryrac ) then + ! copy original diagonal, needed to guarantee relative accuracy + call stdlib_scopy(n,d,1,work(indd),1) + endif + ! store the squares of the offdiagonal values of t + do j = 1, n-1 + work( inde2+j-1 ) = e(j)**2 + end do + ! set the tolerance parameters for bisection + if( .not.wantz ) then + ! stdlib_slarre computes the eigenvalues to full precision. + rtol1 = four * eps + rtol2 = four * eps + else + ! stdlib_slarre computes the eigenvalues to less than full precision. + ! stdlib_clarrv will refine the eigenvalue approximations, and we only + ! need less accurate initial bisection in stdlib_slarre. + ! note: these settings do only affect the subset case and stdlib_slarre + rtol1 = max( sqrt(eps)*5.0e-2_sp, four * eps ) + rtol2 = max( sqrt(eps)*5.0e-3_sp, four * eps ) + endif + call stdlib_slarre( range, n, wl, wu, iil, iiu, d, e,work(inde2), rtol1, rtol2, & + thresh, nsplit,iwork( iinspl ), m, w, work( inderr ),work( indgp ), iwork( iindbl ),& + iwork( iindw ), work( indgrs ), pivmin,work( indwrk ), iwork( iindwk ), iinfo ) + + if( iinfo/=0 ) then + info = 10 + abs( iinfo ) + return + end if + ! note that if range /= 'v', stdlib_slarre computes bounds on the desired + ! part of the spectrum. all desired eigenvalues are contained in + ! (wl,wu] + if( wantz ) then + ! compute the desired eigenvectors corresponding to the computed + ! eigenvalues + call stdlib_clarrv( n, wl, wu, d, e,pivmin, iwork( iinspl ), m,1, m, minrgp, & + rtol1, rtol2,w, work( inderr ), work( indgp ), iwork( iindbl ),iwork( iindw ), & + work( indgrs ), z, ldz,isuppz, work( indwrk ), iwork( iindwk ), iinfo ) + if( iinfo/=0 ) then + info = 20 + abs( iinfo ) + return + end if + else + ! stdlib_slarre computes eigenvalues of the (shifted) root representation + ! stdlib_clarrv returns the eigenvalues of the unshifted matrix. + ! however, if the eigenvectors are not desired by the user, we need + ! to apply the corresponding shifts from stdlib_slarre to obtain the + ! eigenvalues of the original matrix. + do j = 1, m + itmp = iwork( iindbl+j-1 ) + w( j ) = w( j ) + e( iwork( iinspl+itmp-1 ) ) + end do + end if + if ( tryrac ) then + ! refine computed eigenvalues so that they are relatively accurate + ! with respect to the original matrix t. + ibegin = 1 + wbegin = 1 + loop_39: do jblk = 1, iwork( iindbl+m-1 ) + iend = iwork( iinspl+jblk-1 ) + in = iend - ibegin + 1 + wend = wbegin - 1 + ! check if any eigenvalues have to be refined in this block + 36 continue + if( wend1 .or. n==2 ) then + if( .not. wantz ) then + call stdlib_slasrt( 'I', m, w, iinfo ) + if( iinfo/=0 ) then + info = 3 + return + end if + else + do j = 1, m - 1 + i = 0 + tmp = w( j ) + do jj = j + 1, m + if( w( jj ) CSYCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by CSYTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_csycon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_csytrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_csycon + + !> CSYCON_ROOK: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by CSYTRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_csycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==czero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_csytrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_csycon_rook + + !> CSYRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_csyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_csytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_csytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_csytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_csyrfs + + !> CSYSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_csysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CSYSV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> CSYTRF_RK is called to compute the factorization of a complex + !> symmetric matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine CSYTRS_3. + + pure subroutine stdlib_csysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CSYSV_ROOK: computes the solution to a complex system of linear + !> equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> CSYTRF_ROOK is called to compute the factorization of a complex + !> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling CSYTRS_ROOK. + + pure subroutine stdlib_csysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CSYSVX: uses the diagonal pivoting factorization to compute the + !> solution to a complex system of linear equations A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_csysvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: af(ldaf,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clansy( 'I', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_csycon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_csytrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_csyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CTBCON: estimates the reciprocal of the condition number of a + !> triangular band matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_ctbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,rwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: ab(ldab,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab 0. + if( anorm>zero ) then + ! estimate the 1-norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_clatbs( uplo, 'NO TRANSPOSE', diag, normin, n, kd,ab, ldab, work, & + scale, rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_clatbs( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, kd, ab, ldab,& + work, scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_icamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale CTFTRI: computes the inverse of a triangular matrix A stored in RFP + !> format. + !> This is a Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_ctftri( transr, uplo, diag, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo, diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_ctrtri( 'L', diag, n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_ctrmm( 'R', 'L', 'N', diag, n2, n1, -cone, a( 0 ),n, a( n1 ), n ) + + call stdlib_ctrtri( 'U', diag, n2, a( n ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ctrmm( 'L', 'U', 'C', diag, n2, n1, cone, a( n ), n,a( n1 ), n ) + + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_ctrtri( 'L', diag, n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_ctrmm( 'L', 'L', 'C', diag, n1, n2, -cone, a( n2 ),n, a( 0 ), n ) + + call stdlib_ctrtri( 'U', diag, n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ctrmm( 'R', 'U', 'N', diag, n1, n2, cone, a( n1 ),n, a( 0 ), n ) + + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_ctrtri( 'U', diag, n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_ctrmm( 'L', 'U', 'N', diag, n1, n2, -cone, a( 0 ),n1, a( n1*n1 ), & + n1 ) + call stdlib_ctrtri( 'L', diag, n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ctrmm( 'R', 'L', 'C', diag, n1, n2, cone, a( 1 ),n1, a( n1*n1 ), & + n1 ) + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_ctrtri( 'U', diag, n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_ctrmm( 'R', 'U', 'C', diag, n2, n1, -cone,a( n2*n2 ), n2, a( 0 ), & + n2 ) + call stdlib_ctrtri( 'L', diag, n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ctrmm( 'L', 'L', 'N', diag, n2, n1, cone,a( n1*n2 ), n2, a( 0 ), & + n2 ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_ctrtri( 'L', diag, k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_ctrmm( 'R', 'L', 'N', diag, k, k, -cone, a( 1 ),n+1, a( k+1 ), n+& + 1 ) + call stdlib_ctrtri( 'U', diag, k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ctrmm( 'L', 'U', 'C', diag, k, k, cone, a( 0 ), n+1,a( k+1 ), n+1 & + ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_ctrtri( 'L', diag, k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_ctrmm( 'L', 'L', 'C', diag, k, k, -cone, a( k+1 ),n+1, a( 0 ), n+& + 1 ) + call stdlib_ctrtri( 'U', diag, k, a( k ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ctrmm( 'R', 'U', 'N', diag, k, k, cone, a( k ), n+1,a( 0 ), n+1 ) + + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_ctrtri( 'U', diag, k, a( k ), k, info ) + if( info>0 )return + call stdlib_ctrmm( 'L', 'U', 'N', diag, k, k, -cone, a( k ), k,a( k*( k+1 ) ),& + k ) + call stdlib_ctrtri( 'L', diag, k, a( 0 ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ctrmm( 'R', 'L', 'C', diag, k, k, cone, a( 0 ), k,a( k*( k+1 ) ), & + k ) + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_ctrtri( 'U', diag, k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_ctrmm( 'R', 'U', 'C', diag, k, k, -cone,a( k*( k+1 ) ), k, a( 0 ),& + k ) + call stdlib_ctrtri( 'L', diag, k, a( k*k ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ctrmm( 'L', 'L', 'N', diag, k, k, cone, a( k*k ), k,a( 0 ), k ) + + end if + end if + end if + return + end subroutine stdlib_ctftri + + !> CTGSJA: computes the generalized singular value decomposition (GSVD) + !> of two complex upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine CGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), + !> where U, V and Q are unitary matrices. + !> R is a nonsingular upper triangular matrix, and D1 + !> and D2 are ``diagonal'' matrices, which are of the following + !> structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the unitary transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + + pure subroutine stdlib_ctgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobq, jobu, jobv + integer(ilp), intent(out) :: info, ncycle + integer(ilp), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p + real(sp), intent(in) :: tola, tolb + ! Array Arguments + real(sp), intent(out) :: alpha(*), beta(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + real(sp), parameter :: hugenum = huge(zero) + + + + ! Local Scalars + logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv + integer(ilp) :: i, j, kcycle + real(sp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin + complex(sp) :: a2, b2, snq, snu, snv + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,real,huge + ! Executable Statements + ! decode and test the input parameters + initu = stdlib_lsame( jobu, 'I' ) + wantu = initu .or. stdlib_lsame( jobu, 'U' ) + initv = stdlib_lsame( jobv, 'I' ) + wantv = initv .or. stdlib_lsame( jobv, 'V' ) + initq = stdlib_lsame( jobq, 'I' ) + wantq = initq .or. stdlib_lsame( jobq, 'Q' ) + info = 0 + if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -1 + else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -2 + else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( p<0 ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda=-hugenum) ) then + if( gamma=beta( k+i ) ) then + call stdlib_csscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) + else + call stdlib_csscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) + call stdlib_ccopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + else + alpha( k+i ) = zero + beta( k+i ) = one + call stdlib_ccopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + end do + ! post-assignment + do i = m + 1, k + l + alpha( i ) = zero + beta( i ) = one + end do + if( k+l CTGSY2: solves the generalized Sylvester equation + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, + !> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, + !> N-by-N and M-by-N, respectively. A, B, D and E are upper triangular + !> (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output + !> scaling factor chosen to avoid overflow. + !> In matrix notation solving equation (1) corresponds to solve + !> Zx = scale * b, where Z is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Ik is the identity matrix of size k and X**H is the transpose of X. + !> kron(X, Y) is the Kronecker product between the matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case is used to compute an estimate of Dif[(A, D), (B, E)] = + !> = sigma_min(Z) using reverse communication with CLACON. + !> CTGSY2 also (IJOB >= 1) contributes to the computation in CTGSYL + !> of an upper bound on the separation between to matrix pairs. Then + !> the input (A, D), (B, E) are sub-pencils of two matrix pairs in + !> CTGSYL. + + pure subroutine stdlib_ctgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, rdsum, rdscal,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: rdscal, rdsum + real(sp), intent(out) :: scale + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + complex(sp), intent(inout) :: c(ldc,*), f(ldf,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ldz = 2 + + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ierr, j, k + real(sp) :: scaloc + complex(sp) :: alpha + ! Local Arrays + integer(ilp) :: ipiv(ldz), jpiv(ldz) + complex(sp) :: rhs(ldz), z(ldz,ldz) + ! Intrinsic Functions + intrinsic :: cmplx,conjg,max + ! Executable Statements + ! decode and test input parameters + info = 0 + ierr = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>2 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda0 )info = ierr + if( ijob==0 ) then + call stdlib_cgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + else + call stdlib_clatdf( ijob, ldz, z, ldz, rhs, rdsum, rdscal,ipiv, jpiv ) + + end if + ! unpack solution vector(s) + c( i, j ) = rhs( 1 ) + f( i, j ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining equation. + if( i>1 ) then + alpha = -rhs( 1 ) + call stdlib_caxpy( i-1, alpha, a( 1, i ), 1, c( 1, j ), 1 ) + call stdlib_caxpy( i-1, alpha, d( 1, i ), 1, f( 1, j ), 1 ) + end if + if( j0 )info = ierr + call stdlib_cgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( i, j ) = rhs( 1 ) + f( i, j ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining equation. + do k = 1, j - 1 + f( i, k ) = f( i, k ) + rhs( 1 )*conjg( b( k, j ) ) +rhs( 2 )*conjg( e( k, & + j ) ) + end do + do k = i + 1, m + c( k, j ) = c( k, j ) - conjg( a( i, k ) )*rhs( 1 ) -conjg( d( i, k ) )& + *rhs( 2 ) + end do + end do loop_70 + end do loop_80 + end if + return + end subroutine stdlib_ctgsy2 + + !> CTGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with complex entries. A, B, D and E are upper + !> triangular (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 + !> is an output scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale*b, where Z + !> is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Here Ix is the identity matrix of size x and X**H is the conjugate + !> transpose of X. Kron(X, Y) is the Kronecker product between the + !> matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case (TRANS = 'C') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using CLACON. + !> If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of + !> Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. + !> This is a level-3 BLAS algorithm. + + pure subroutine stdlib_ctgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, dif, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n + integer(ilp), intent(out) :: info + real(sp), intent(out) :: dif, scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + complex(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + complex(sp), intent(inout) :: c(ldc,*), f(ldf,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_ccopy by calls to stdlib_claset. + ! sven hammarling, 1/5/02. + + + ! Local Scalars + logical(lk) :: lquery, notran + integer(ilp) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, & + p, pq, q + real(sp) :: dscale, dsum, scale2, scaloc + ! Intrinsic Functions + intrinsic :: cmplx,max,real,sqrt + ! Executable Statements + ! decode and test input parameters + info = 0 + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>4 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda=3 ) then + ifunc = ijob - 2 + call stdlib_claset( 'F', m, n, czero, czero, c, ldc ) + call stdlib_claset( 'F', m, n, czero, czero, f, ldf ) + else if( ijob>=1 .and. notran ) then + isolve = 2 + end if + end if + if( ( mb<=1 .and. nb<=1 ) .or. ( mb>=m .and. nb>=n ) )then + ! use unblocked level 2 solver + loop_30: do iround = 1, isolve + scale = one + dscale = zero + dsum = one + pq = m*n + call stdlib_ctgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,& + ldf, scale, dsum, dscale,info ) + if( dscale/=zero ) then + if( ijob==1 .or. ijob==3 ) then + dif = sqrt( real( 2*m*n,KIND=sp) ) / ( dscale*sqrt( dsum ) ) + else + dif = sqrt( real( pq,KIND=sp) ) / ( dscale*sqrt( dsum ) ) + end if + end if + if( isolve==2 .and. iround==1 ) then + if( notran ) then + ifunc = ijob + end if + scale2 = scale + call stdlib_clacpy( 'F', m, n, c, ldc, work, m ) + call stdlib_clacpy( 'F', m, n, f, ldf, work( m*n+1 ), m ) + call stdlib_claset( 'F', m, n, czero, czero, c, ldc ) + call stdlib_claset( 'F', m, n, czero, czero, f, ldf ) + else if( isolve==2 .and. iround==2 ) then + call stdlib_clacpy( 'F', m, n, work, m, c, ldc ) + call stdlib_clacpy( 'F', m, n, work( m*n+1 ), m, f, ldf ) + scale = scale2 + end if + end do loop_30 + return + end if + ! determine block structure of a + p = 0 + i = 1 + 40 continue + if( i>m )go to 50 + p = p + 1 + iwork( p ) = i + i = i + mb + if( i>=m )go to 50 + go to 40 + 50 continue + iwork( p+1 ) = m + 1 + if( iwork( p )==iwork( p+1 ) )p = p - 1 + ! determine block structure of b + q = p + 1 + j = 1 + 60 continue + if( j>n )go to 70 + q = q + 1 + iwork( q ) = j + j = j + nb + if( j>=n )go to 70 + go to 60 + 70 continue + iwork( q+1 ) = n + 1 + if( iwork( q )==iwork( q+1 ) )q = q - 1 + if( notran ) then + loop_150: do iround = 1, isolve + ! solve (i, j) - subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q + pq = 0 + scale = one + dscale = zero + dsum = one + loop_130: do j = p + 2, q + js = iwork( j ) + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_120: do i = p, 1, -1 + is = iwork( i ) + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + call stdlib_ctgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), & + ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, & + scaloc, dsum, dscale,linfo ) + if( linfo>0 )info = linfo + pq = pq + mb*nb + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + do k = js, je + call stdlib_cscal( is-1, cmplx( scaloc, zero,KIND=sp),c( 1, k ), 1 ) + + call stdlib_cscal( is-1, cmplx( scaloc, zero,KIND=sp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),c( ie+1, k ), & + 1 ) + call stdlib_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),f( ie+1, k ), & + 1 ) + end do + do k = je + 1, n + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! substitute r(i,j) and l(i,j) into remaining equation. + if( i>1 ) then + call stdlib_cgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=sp), a(& + 1, is ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=sp),c( 1, js ), & + ldc ) + call stdlib_cgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=sp), d(& + 1, is ), ldd,c( is, js ), ldc, cmplx( one, zero,KIND=sp),f( 1, js ), & + ldf ) + end if + if( j0 )info = linfo + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + do k = js, je + call stdlib_cscal( is-1, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( is-1, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + do k = js, je + call stdlib_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),c( ie+1, k ), 1 ) + + call stdlib_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),f( ie+1, k ), 1 ) + + end do + do k = je + 1, n + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1, k ),1 ) + + call stdlib_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! substitute r(i,j) and l(i,j) into remaining equation. + if( j>p+2 ) then + call stdlib_cgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=sp), c( is,& + js ), ldc,b( 1, js ), ldb, cmplx( one, zero,KIND=sp),f( is, 1 ), ldf ) + + call stdlib_cgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=sp), f( is,& + js ), ldf,e( 1, js ), lde, cmplx( one, zero,KIND=sp),f( is, 1 ), ldf ) + + end if + if( i

CTPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_ctpcon( norm, uplo, diag, n, ap, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CTPCON', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + rcond = one + return + end if + rcond = zero + smlnum = stdlib_slamch( 'SAFE MINIMUM' )*real( max( 1, n ),KIND=sp) + ! compute the norm of the triangular matrix a. + anorm = stdlib_clantp( norm, uplo, diag, n, ap, rwork ) + ! continue only if anorm > 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_clatps( uplo, 'NO TRANSPOSE', diag, normin, n, ap,work, scale, & + rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_clatps( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, ap, work, & + scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_icamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale CTPLQT: computes a blocked LQ factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_ctplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, mb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, nb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( mb<1 .or. (mb>m .and. m>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_ctplqt2( ib, nb, lb, a(i,i), lda, b( i, 1 ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(i+ib:m,:) from the right + if( i+ib<=m ) then + call stdlib_ctprfb( 'R', 'N', 'F', 'R', m-i-ib+1, nb, ib, lb,b( i, 1 ), ldb, t( & + 1, i ), ldt,a( i+ib, i ), lda, b( i+ib, 1 ), ldb,work, m-i-ib+1) + end if + end do + return + end subroutine stdlib_ctplqt + + !> CTPMLQT: applies a complex unitary matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_ctpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, mb, ldt + ! Array Arguments + complex(sp), intent(in) :: v(ldv,*), t(ldt,*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, nb, lb, kf, ldaq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldaq = max( 1, k ) + else if ( right ) then + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( mb<1 .or. (mb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_ctprfb( 'L', 'C', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + do i = 1, k, mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_ctprfb( 'R', 'N', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. tran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_ctprfb( 'L', 'N', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_ctprfb( 'R', 'C', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_ctpmlqt + + !> CTPMQRT: applies a complex orthogonal matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_ctpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt + ! Array Arguments + complex(sp), intent(in) :: v(ldv,*), t(ldt,*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, mb, lb, kf, ldaq, ldvq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldvq = max( 1, m ) + ldaq = max( 1, k ) + else if ( right ) then + ldvq = max( 1, n ) + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( nb<1 .or. (nb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_ctprfb( 'L', 'C', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + do i = 1, k, nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_ctprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. notran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_ctprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_ctprfb( 'R', 'C', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_ctpmqrt + + !> CTPQRT: computes a blocked QR factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_ctpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, nb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, mb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( nb<1 .or. (nb>n .and. n>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_ctpqrt2( mb, ib, lb, a(i,i), lda, b( 1, i ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**h to b(:,i+ib:n) from the left + if( i+ib<=n ) then + call stdlib_ctprfb( 'L', 'C', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1, i ), ldb, t( & + 1, i ), ldt,a( i, i+ib ), lda, b( 1, i+ib ), ldb,work, ib ) + end if + end do + return + end subroutine stdlib_ctpqrt + + !> CTRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_ctrcon( norm, uplo, diag, n, a, lda, rcond, work,rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_clatrs( uplo, 'NO TRANSPOSE', diag, normin, n, a,lda, work, scale,& + rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_clatrs( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, a, lda, work,& + scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_icamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale CTRSYL: solves the complex Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**H, and A and B are both upper triangular. A is + !> M-by-M and B is N-by-N; the right hand side C and the solution X are + !> M-by-N; and scale is an output scale factor, set <= 1 to avoid + !> overflow in X. + + subroutine stdlib_ctrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trana, tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, lda, ldb, ldc, m, n + real(sp), intent(out) :: scale + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notrna, notrnb + integer(ilp) :: j, k, l + real(sp) :: bignum, da11, db, eps, scaloc, sgn, smin, smlnum + complex(sp) :: a11, suml, sumr, vec, x11 + ! Local Arrays + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,min,real + ! Executable Statements + ! decode and test input parameters + notrna = stdlib_lsame( trana, 'N' ) + notrnb = stdlib_lsame( tranb, 'N' ) + info = 0 + if( .not.notrna .and. .not.stdlib_lsame( trana, 'C' ) ) then + info = -1 + else if( .not.notrnb .and. .not.stdlib_lsame( tranb, 'C' ) ) then + info = -2 + else if( isgn/=1 .and. isgn/=-1 ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldaone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_cladiv( vec*cmplx( scaloc,KIND=sp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_csscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_30 + else if( .not.notrna .and. notrnb ) then + ! solve a**h *x + isgn*x*b = scale*c. + ! the (k,l)th block of x is determined starting from + ! upper-left corner column by column by + ! a**h(k,k)*x(k,l) + isgn*x(k,l)*b(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 l-1 + ! r(k,l) = sum [a**h(i,k)*x(i,l)] + isgn*sum [x(k,j)*b(j,l)] + ! i=1 j=1 + loop_60: do l = 1, n + do k = 1, m + suml = stdlib_cdotc( k-1, a( 1, k ), 1, c( 1, l ), 1 ) + sumr = stdlib_cdotu( l-1, c( k, 1 ), ldc, b( 1, l ), 1 ) + vec = c( k, l ) - ( suml+sgn*sumr ) + scaloc = one + a11 = conjg( a( k, k ) ) + sgn*b( l, l ) + da11 = abs( real( a11,KIND=sp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=sp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_cladiv( vec*cmplx( scaloc,KIND=sp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_csscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_60 + else if( .not.notrna .and. .not.notrnb ) then + ! solve a**h*x + isgn*x*b**h = c. + ! the (k,l)th block of x is determined starting from + ! upper-right corner column by column by + ! a**h(k,k)*x(k,l) + isgn*x(k,l)*b**h(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 + ! r(k,l) = sum [a**h(i,k)*x(i,l)] + + ! i=1 + ! n + ! isgn*sum [x(k,j)*b**h(l,j)]. + ! j=l+1 + loop_90: do l = n, 1, -1 + do k = 1, m + suml = stdlib_cdotc( k-1, a( 1, k ), 1, c( 1, l ), 1 ) + sumr = stdlib_cdotc( n-l, c( k, min( l+1, n ) ), ldc,b( l, min( l+1, n ) ), & + ldb ) + vec = c( k, l ) - ( suml+sgn*conjg( sumr ) ) + scaloc = one + a11 = conjg( a( k, k )+sgn*b( l, l ) ) + da11 = abs( real( a11,KIND=sp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=sp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_cladiv( vec*cmplx( scaloc,KIND=sp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_csscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_90 + else if( notrna .and. .not.notrnb ) then + ! solve a*x + isgn*x*b**h = c. + ! the (k,l)th block of x is determined starting from + ! bottom-left corner column by column by + ! a(k,k)*x(k,l) + isgn*x(k,l)*b**h(l,l) = c(k,l) - r(k,l) + ! where + ! m n + ! r(k,l) = sum [a(k,i)*x(i,l)] + isgn*sum [x(k,j)*b**h(l,j)] + ! i=k+1 j=l+1 + loop_120: do l = n, 1, -1 + do k = m, 1, -1 + suml = stdlib_cdotu( m-k, a( k, min( k+1, m ) ), lda,c( min( k+1, m ), l ), 1 & + ) + sumr = stdlib_cdotc( n-l, c( k, min( l+1, n ) ), ldc,b( l, min( l+1, n ) ), & + ldb ) + vec = c( k, l ) - ( suml+sgn*conjg( sumr ) ) + scaloc = one + a11 = a( k, k ) + sgn*conjg( b( l, l ) ) + da11 = abs( real( a11,KIND=sp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=sp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_cladiv( vec*cmplx( scaloc,KIND=sp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_csscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_120 + end if + return + end subroutine stdlib_ctrsyl + + !> CUNBDB5: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then some other vector from the orthogonal complement + !> is returned. This vector is chosen in an arbitrary but deterministic + !> way. + + pure subroutine stdlib_cunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, j + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNBDB5', -info ) + return + end if + ! project x onto the orthogonal complement of q + call stdlib_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2,work, lwork, & + childinfo ) + ! if the projection is nonzero, then return + if( stdlib_scnrm2(m1,x1,incx1) /= czero.or. stdlib_scnrm2(m2,x2,incx2) /= czero ) & + then + return + end if + ! project each standard basis vector e_1,...,e_m1 in turn, stopping + ! when a nonzero projection is found + do i = 1, m1 + do j = 1, m1 + x1(j) = czero + end do + x1(i) = cone + do j = 1, m2 + x2(j) = czero + end do + call stdlib_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, childinfo ) + if( stdlib_scnrm2(m1,x1,incx1) /= czero.or. stdlib_scnrm2(m2,x2,incx2) /= czero ) & + then + return + end if + end do + ! project each standard basis vector e_(m1+1),...,e_(m1+m2) in turn, + ! stopping when a nonzero projection is found + do i = 1, m2 + do j = 1, m1 + x1(j) = czero + end do + do j = 1, m2 + x2(j) = czero + end do + x2(i) = cone + call stdlib_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, childinfo ) + if( stdlib_scnrm2(m1,x1,incx1) /= czero.or. stdlib_scnrm2(m2,x2,incx2) /= czero ) & + then + return + end if + end do + return + end subroutine stdlib_cunbdb5 + + !> CUNCSD: computes the CS decomposition of an M-by-M partitioned + !> unitary matrix X: + !> [ I 0 0 | 0 0 0 ] + !> [ 0 C 0 | 0 -S 0 ] + !> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H + !> X = [-----------] = [---------] [---------------------] [---------] . + !> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] + !> [ 0 S 0 | 0 C 0 ] + !> [ 0 0 I | 0 0 0 ] + !> X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, + !> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are + !> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in + !> which R = MIN(P,M-P,Q,M-Q). + + recursive subroutine stdlib_cuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, x11, & + ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, & + work, lwork, rwork, lrwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, signs, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, ldx11, ldx12, ldx21, ldx22, & + lrwork, lwork, m, p, q + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: theta(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*), work(*) + + complex(sp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) + + ! =================================================================== + + ! Local Scalars + character :: transt, signst + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, itauq2, j, lbbcsdwork, & + lbbcsdworkmin, lbbcsdworkopt, lorbdbwork, lorbdbworkmin, lorbdbworkopt, lorglqwork, & + lorglqworkmin, lorglqworkopt, lorgqrwork, lorgqrworkmin, lorgqrworkopt, lworkmin, & + lworkopt, p1, q1 + logical(lk) :: colmajor, defaultsigns, lquery, wantu1, wantu2, wantv1t, wantv2t + integer(ilp) :: lrworkmin, lrworkopt + logical(lk) :: lrquery + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + defaultsigns = .not. stdlib_lsame( signs, 'O' ) + lquery = lwork == -1 + lrquery = lrwork == -1 + if( m < 0 ) then + info = -7 + else if( p < 0 .or. p > m ) then + info = -8 + else if( q < 0 .or. q > m ) then + info = -9 + else if ( colmajor .and. ldx11 < max( 1, p ) ) then + info = -11 + else if (.not. colmajor .and. ldx11 < max( 1, q ) ) then + info = -11 + else if (colmajor .and. ldx12 < max( 1, p ) ) then + info = -13 + else if (.not. colmajor .and. ldx12 < max( 1, m-q ) ) then + info = -13 + else if (colmajor .and. ldx21 < max( 1, m-p ) ) then + info = -15 + else if (.not. colmajor .and. ldx21 < max( 1, q ) ) then + info = -15 + else if (colmajor .and. ldx22 < max( 1, m-p ) ) then + info = -17 + else if (.not. colmajor .and. ldx22 < max( 1, m-q ) ) then + info = -17 + else if( wantu1 .and. ldu1 < p ) then + info = -20 + else if( wantu2 .and. ldu2 < m-p ) then + info = -22 + else if( wantv1t .and. ldv1t < q ) then + info = -24 + else if( wantv2t .and. ldv2t < m-q ) then + info = -26 + end if + ! work with transpose if convenient + if( info == 0 .and. min( p, m-p ) < min( q, m-q ) ) then + if( colmajor ) then + transt = 'T' + else + transt = 'N' + end if + if( defaultsigns ) then + signst = 'O' + else + signst = 'D' + end if + call stdlib_cuncsd( jobv1t, jobv2t, jobu1, jobu2, transt, signst, m,q, p, x11, & + ldx11, x21, ldx21, x12, ldx12, x22,ldx22, theta, v1t, ldv1t, v2t, ldv2t, u1, ldu1,& + u2, ldu2, work, lwork, rwork, lrwork, iwork,info ) + return + end if + ! work with permutation [ 0 i; i 0 ] * x * [ 0 i; i 0 ] if + ! convenient + if( info == 0 .and. m-q < q ) then + if( defaultsigns ) then + signst = 'O' + else + signst = 'D' + end if + call stdlib_cuncsd( jobu2, jobu1, jobv2t, jobv1t, trans, signst, m,m-p, m-q, x22, & + ldx22, x21, ldx21, x12, ldx12, x11,ldx11, theta, u2, ldu2, u1, ldu1, v2t, ldv2t, & + v1t,ldv1t, work, lwork, rwork, lrwork, iwork, info ) + return + end if + ! compute workspace + if( info == 0 ) then + ! real workspace + iphi = 2 + ib11d = iphi + max( 1, q - 1 ) + ib11e = ib11d + max( 1, q ) + ib12d = ib11e + max( 1, q - 1 ) + ib12e = ib12d + max( 1, q ) + ib21d = ib12e + max( 1, q - 1 ) + ib21e = ib21d + max( 1, q ) + ib22d = ib21e + max( 1, q - 1 ) + ib22e = ib22d + max( 1, q ) + ibbcsd = ib22e + max( 1, q - 1 ) + call stdlib_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, theta, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, theta, theta, theta, theta, theta,theta, & + theta, theta, rwork, -1, childinfo ) + lbbcsdworkopt = int( rwork(1),KIND=ilp) + lbbcsdworkmin = lbbcsdworkopt + lrworkopt = ibbcsd + lbbcsdworkopt - 1 + lrworkmin = ibbcsd + lbbcsdworkmin - 1 + rwork(1) = lrworkopt + ! complex workspace + itaup1 = 2 + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m - p ) + itauq2 = itauq1 + max( 1, q ) + iorgqr = itauq2 + max( 1, m - q ) + call stdlib_cungqr( m-q, m-q, m-q, u1, max(1,m-q), u1, work, -1,childinfo ) + lorgqrworkopt = int( work(1),KIND=ilp) + lorgqrworkmin = max( 1, m - q ) + iorglq = itauq2 + max( 1, m - q ) + call stdlib_cunglq( m-q, m-q, m-q, u1, max(1,m-q), u1, work, -1,childinfo ) + lorglqworkopt = int( work(1),KIND=ilp) + lorglqworkmin = max( 1, m - q ) + iorbdb = itauq2 + max( 1, m - q ) + call stdlib_cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, theta, u1, u2,v1t, v2t, work, -1, childinfo ) + lorbdbworkopt = int( work(1),KIND=ilp) + lorbdbworkmin = lorbdbworkopt + lworkopt = max( iorgqr + lorgqrworkopt, iorglq + lorglqworkopt,iorbdb + & + lorbdbworkopt ) - 1 + lworkmin = max( iorgqr + lorgqrworkmin, iorglq + lorglqworkmin,iorbdb + & + lorbdbworkmin ) - 1 + work(1) = max(lworkopt,lworkmin) + if( lwork < lworkmin.and. .not. ( lquery .or. lrquery ) ) then + info = -22 + else if( lrwork < lrworkmin.and. .not. ( lquery .or. lrquery ) ) then + info = -24 + else + lorgqrwork = lwork - iorgqr + 1 + lorglqwork = lwork - iorglq + 1 + lorbdbwork = lwork - iorbdb + 1 + lbbcsdwork = lrwork - ibbcsd + 1 + end if + end if + ! abort if any illegal arguments + if( info /= 0 ) then + call stdlib_xerbla( 'CUNCSD', -info ) + return + else if( lquery .or. lrquery ) then + return + end if + ! transform to bidiagonal block form + call stdlib_cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12, x21,ldx21, x22, & + ldx22, theta, rwork(iphi), work(itaup1),work(itaup2), work(itauq1), work(itauq2),work(& + iorbdb), lorbdbwork, childinfo ) + ! accumulate householder reflectors + if( colmajor ) then + if( wantu1 .and. p > 0 ) then + call stdlib_clacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_cungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqrwork, & + info) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_clacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_cungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqrwork,& + info ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_clacpy( 'U', q-1, q-1, x11(1,2), ldx11, v1t(2,2),ldv1t ) + v1t(1, 1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_cunglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglqwork, info ) + end if + if( wantv2t .and. m-q > 0 ) then + call stdlib_clacpy( 'U', p, m-q, x12, ldx12, v2t, ldv2t ) + if( m-p > q ) then + call stdlib_clacpy( 'U', m-p-q, m-p-q, x22(q+1,p+1), ldx22,v2t(p+1,p+1), & + ldv2t ) + end if + if( m > q ) then + call stdlib_cunglq( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorglq), & + lorglqwork, info ) + end if + end if + else + if( wantu1 .and. p > 0 ) then + call stdlib_clacpy( 'U', q, p, x11, ldx11, u1, ldu1 ) + call stdlib_cunglq( p, p, q, u1, ldu1, work(itaup1), work(iorglq),lorglqwork, & + info) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_clacpy( 'U', q, m-p, x21, ldx21, u2, ldu2 ) + call stdlib_cunglq( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorglq), lorglqwork,& + info ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_clacpy( 'L', q-1, q-1, x11(2,1), ldx11, v1t(2,2),ldv1t ) + v1t(1, 1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_cungqr( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorgqr), & + lorgqrwork, info ) + end if + if( wantv2t .and. m-q > 0 ) then + p1 = min( p+1, m ) + q1 = min( q+1, m ) + call stdlib_clacpy( 'L', m-q, p, x12, ldx12, v2t, ldv2t ) + if ( m > p+q ) then + call stdlib_clacpy( 'L', m-p-q, m-p-q, x22(p1,q1), ldx22,v2t(p+1,p+1), ldv2t ) + + end if + call stdlib_cungqr( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorgqr), & + lorgqrwork, info ) + end if + end if + ! compute the csd of the matrix in bidiagonal-block form + call stdlib_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q, theta,rwork(iphi), & + u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, rwork(ib11d), rwork(ib11e), rwork(ib12d),& + rwork(ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),& + lbbcsdwork, info ) + ! permute rows and columns to place identity submatrices in top- + ! left corner of (1,1)-block and/or bottom-right corner of (1,2)- + ! block and/or bottom-right corner of (2,1)-block and/or top-left + ! corner of (2,2)-block + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + if( colmajor ) then + call stdlib_clapmt( .false., m-p, m-p, u2, ldu2, iwork ) + else + call stdlib_clapmr( .false., m-p, m-p, u2, ldu2, iwork ) + end if + end if + if( m > 0 .and. wantv2t ) then + do i = 1, p + iwork(i) = m - p - q + i + end do + do i = p + 1, m - q + iwork(i) = i - p + end do + if( .not. colmajor ) then + call stdlib_clapmt( .false., m-q, m-q, v2t, ldv2t, iwork ) + else + call stdlib_clapmr( .false., m-q, m-q, v2t, ldv2t, iwork ) + end if + end if + return + ! end stdlib_cuncsd + end subroutine stdlib_cuncsd + + !> CUNGHR: generates a complex unitary matrix Q which is defined as the + !> product of IHI-ILO elementary reflectors of order N, as returned by + !> CGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + pure subroutine stdlib_cunghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lwkopt, nb, nh + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nh = ihi - ilo + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda0 ) then + ! generate q(ilo+1:ihi,ilo+1:ihi) + call stdlib_cungqr( nh, nh, nh, a( ilo+1, ilo+1 ), lda, tau( ilo ),work, lwork, & + iinfo ) + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunghr + + !> CUNGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> CHETRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_cungtr( uplo, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, j, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + ! generate q(2:n,2:n) + call stdlib_cungqr( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cungtr + + !> CUNHR_COL: takes an M-by-N complex matrix Q_in with orthonormal columns + !> as input, stored in A, and performs Householder Reconstruction (HR), + !> i.e. reconstructs Householder vectors V(i) implicitly representing + !> another M-by-N matrix Q_out, with the property that Q_in = Q_out*S, + !> where S is an N-by-N diagonal matrix with diagonal entries + !> equal to +1 or -1. The Householder vectors (columns V(i) of V) are + !> stored in A on output, and the diagonal entries of S are stored in D. + !> Block reflectors are also returned in T + !> (same output format as CGEQRT). + + pure subroutine stdlib_cunhr_col( m, n, nb, a, lda, t, ldt, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: d(*), t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( nb<1 ) then + info = -3 + else if( ldan ) then + call stdlib_ctrsm( 'R', 'U', 'N', 'N', m-n, n, cone, a, lda,a( n+1, 1 ), lda ) + + end if + ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n) + ! as a sequence of upper-triangular blocks with nb-size column + ! blocking. + ! loop over the column blocks of size nb of the array a(1:m,1:n) + ! and the array t(1:nb,1:n), jb is the column index of a column + ! block, jnb is the column block size at each step jb. + nplusone = n + 1 + do jb = 1, n, nb + ! (2-0) determine the column block size jnb. + jnb = min( nplusone-jb, nb ) + ! (2-1) copy the upper-triangular part of the current jnb-by-jnb + ! diagonal block u(jb) (of the n-by-n matrix u) stored + ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part + ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1) + ! column-by-column, total jnb*(jnb+1)/2 elements. + jbtemp1 = jb - 1 + do j = jb, jb+jnb-1 + call stdlib_ccopy( j-jbtemp1, a( jb, j ), 1, t( 1, j ), 1 ) + end do + ! (2-2) perform on the upper-triangular part of the current + ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored + ! in t(1:jnb,jb:jb+jnb-1) the following operation in place: + ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper- + ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication + ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb + ! diagonal block s(jb) of the n-by-n sign matrix s from the + ! right means changing the sign of each j-th column of the block + ! u(jb) according to the sign of the diagonal element of the block + ! s(jb), i.e. s(j,j) that is stored in the array element d(j). + do j = jb, jb+jnb-1 + if( d( j )==cone ) then + call stdlib_cscal( j-jbtemp1, -cone, t( 1, j ), 1 ) + end if + end do + ! (2-3) perform the triangular solve for the current block + ! matrix x(jb): + ! x(jb) * (a(jb)**t) = b(jb), where: + ! a(jb)**t is a jnb-by-jnb unit upper-triangular + ! coefficient block, and a(jb)=v1(jb), which + ! is a jnb-by-jnb unit lower-triangular block + ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1). + ! the n-by-n matrix v1 is the upper part + ! of the m-by-n lower-trapezoidal matrix v + ! stored in a(1:m,1:n); + ! b(jb) is a jnb-by-jnb upper-triangular right-hand + ! side block, b(jb) = (-1)*u(jb)*s(jb), and + ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1); + ! x(jb) is a jnb-by-jnb upper-triangular solution + ! block, x(jb) is the upper-triangular block + ! reflector t(jb), and x(jb) is stored + ! in t(1:jnb,jb:jb+jnb-1). + ! in other words, we perform the triangular solve for the + ! upper-triangular block t(jb): + ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb). + ! even though the blocks x(jb) and b(jb) are upper- + ! triangular, the routine stdlib_ctrsm will access all jnb**2 + ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore, + ! we need to set to zero the elements of the block + ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call + ! to stdlib_ctrsm. + ! (2-3a) set the elements to zero. + jbtemp2 = jb - 2 + do j = jb, jb+jnb-2 + do i = j-jbtemp2, nb + t( i, j ) = czero + end do + end do + ! (2-3b) perform the triangular solve. + call stdlib_ctrsm( 'R', 'L', 'C', 'U', jnb, jnb, cone,a( jb, jb ), lda, t( 1, jb ), & + ldt ) + end do + return + end subroutine stdlib_cunhr_col + + !> CUNMHR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> IHI-ILO elementary reflectors, as returned by CGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + pure subroutine stdlib_cunmhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: ihi, ilo, lda, ldc, lwork, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, nh, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nh = ihi - ilo + left = stdlib_lsame( side, 'L' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 .or. ilo>max( 1, nq ) ) then + info = -5 + else if( ihinq ) then + info = -6 + else if( lda CUNMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by CHETRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_cunmtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery, upper + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda CUPGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> CHPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_cupgtr( uplo, n, ap, tau, q, ldq, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, n + ! Array Arguments + complex(sp), intent(in) :: ap(*), tau(*) + complex(sp), intent(out) :: q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, ij, j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldq1 ) then + ! generate q(2:n,2:n) + call stdlib_cung2r( n-1, n-1, n-1, q( 2, 2 ), ldq, tau, work,iinfo ) + end if + end if + return + end subroutine stdlib_cupgtr + + !> CUPMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by CHPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_cupmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, m, n + ! Array Arguments + complex(sp), intent(inout) :: ap(*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: forwrd, left, notran, upper + integer(ilp) :: i, i1, i2, i3, ic, ii, jc, mi, ni, nq + complex(sp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldc CGBBRD: reduces a complex general m-by-n band matrix A to real upper + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> The routine computes B, and optionally forms Q or P**H, or computes + !> Q**H*C for a given matrix C. + + pure subroutine stdlib_cgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & + ldc, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc + ! Array Arguments + real(sp), intent(out) :: d(*), e(*), rwork(*) + complex(sp), intent(inout) :: ab(ldab,*), c(ldc,*) + complex(sp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: wantb, wantc, wantpt, wantq + integer(ilp) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,& + mu0, nr, nrt + real(sp) :: abst, rc + complex(sp) :: ra, rb, rs, t + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min + ! Executable Statements + ! test the input parameters + wantb = stdlib_lsame( vect, 'B' ) + wantq = stdlib_lsame( vect, 'Q' ) .or. wantb + wantpt = stdlib_lsame( vect, 'P' ) .or. wantb + wantc = ncc>0 + klu1 = kl + ku + 1 + info = 0 + if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncc<0 ) then + info = -4 + else if( kl<0 ) then + info = -5 + else if( ku<0 ) then + info = -6 + else if( ldab1 ) then + ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce + ! first to lower bidiagonal form and then transform to upper + ! bidiagonal + if( ku>0 ) then + ml0 = 1 + mu0 = 2 + else + ml0 = 2 + mu0 = 1 + end if + ! wherever possible, plane rotations are generated and applied in + ! vector operations of length nr over the index set j1:j2:klu1. + ! the complex sines of the plane rotations are stored in work, + ! and the real cosines in rwork. + klm = min( m-1, kl ) + kun = min( n-1, ku ) + kb = klm + kun + kb1 = kb + 1 + inca = kb1*ldab + nr = 0 + j1 = klm + 2 + j2 = 1 - kun + loop_90: do i = 1, minmn + ! reduce i-th column and i-th row of matrix to bidiagonal form + ml = klm + 1 + mu = kun + 1 + loop_80: do kk = 1, kb + j1 = j1 + kb + j2 = j2 + kb + ! generate plane rotations to annihilate nonzero elements + ! which have been created below the band + if( nr>0 )call stdlib_clargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & + rwork( j1 ), kb1 ) + ! apply plane rotations from the left + do l = 1, kb + if( j2-klm+l-1>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_clartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & + klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 ) + end do + if( ml>ml0 ) then + if( ml<=m-i+1 ) then + ! generate plane rotation to annihilate a(i+ml-1,i) + ! within the band, and apply rotation from the left + call stdlib_clartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), & + work( i+ml-1 ), ra ) + ab( ku+ml-1, i ) = ra + if( in ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j-1,j+ku) above the band + ! and store it in work(n+1:2*n) + work( j+kun ) = work( j )*ab( 1, j+kun ) + ab( 1, j+kun ) = rwork( j )*ab( 1, j+kun ) + end do + ! generate plane rotations to annihilate nonzero elements + ! which have been generated above the band + if( nr>0 )call stdlib_clargv( nr, ab( 1, j1+kun-1 ), inca,work( j1+kun ), kb1,& + rwork( j1+kun ),kb1 ) + ! apply plane rotations from the right + do l = 1, kb + if( j2+l-1>m ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_clartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& + kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 ) + end do + if( ml==ml0 .and. mu>mu0 ) then + if( mu<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+mu-1) + ! within the band, and apply rotation from the right + call stdlib_clartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( & + i+mu-1 ), work( i+mu-1 ), ra ) + ab( ku-mu+3, i+mu-2 ) = ra + call stdlib_crot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1,ab( ku-& + mu+3, i+mu-1 ), 1,rwork( i+mu-1 ), work( i+mu-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kb1 + end if + if( wantpt ) then + ! accumulate product of plane rotations in p**h + do j = j1, j2, kb1 + call stdlib_crot( n, pt( j+kun-1, 1 ), ldpt,pt( j+kun, 1 ), ldpt, rwork(& + j+kun ),conjg( work( j+kun ) ) ) + end do + end if + if( j2+kb>m ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j+kl+ku,j+ku-1) below the + ! band and store it in work(1:n) + work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) + ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun ) + end do + if( ml>ml0 ) then + ml = ml - 1 + else + mu = mu - 1 + end if + end do loop_80 + end do loop_90 + end if + if( ku==0 .and. kl>0 ) then + ! a has been reduced to complex lower bidiagonal form + ! transform lower bidiagonal form to upper bidiagonal by applying + ! plane rotations from the left, overwriting superdiagonal + ! elements on subdiagonal elements + do i = 1, min( m-1, n ) + call stdlib_clartg( ab( 1, i ), ab( 2, i ), rc, rs, ra ) + ab( 1, i ) = ra + if( i0 .and. m1 ) then + rb = -conjg( rs )*ab( ku, i ) + ab( ku, i ) = rc*ab( ku, i ) + end if + if( wantpt )call stdlib_crot( n, pt( i, 1 ), ldpt, pt( m+1, 1 ), ldpt,rc, & + conjg( rs ) ) + end do + end if + end if + ! make diagonal and superdiagonal elements real, storing them in d + ! and e + t = ab( ku+1, 1 ) + loop_120: do i = 1, minmn + abst = abs( t ) + d( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( wantq )call stdlib_cscal( m, t, q( 1, i ), 1 ) + if( wantc )call stdlib_cscal( ncc, conjg( t ), c( i, 1 ), ldc ) + if( i CGBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_cgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, x, & + ldx, ferr, berr, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, k, kase, kk, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv, work, n,info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_cgbtrs( transt, n, kl, ku, 1, afb, ldafb, ipiv,work, n, info ) + + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_cgbtrs( transn, n, kl, ku, 1, afb, ldafb, ipiv,work, n, info ) + + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_cgbrfs + + !> CGBSV: computes the solution to a complex system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_cgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( kl<0 ) then + info = -2 + else if( ku<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( ldb CGBSVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a band matrix of order N with KL subdiagonals and KU + !> superdiagonals, and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_cgbsvx( fact, trans, n, kl, ku, nrhs, ab, ldab, afb,ldafb, ipiv, equed, r, & + c, b, ldb, x, ldx,rcond, ferr, berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(inout) :: c(*), r(*) + complex(sp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + ! moved setting of info = n+1 so info does not subsequently get + ! overwritten. sven, 17 mar 05. + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j, j1, j2 + real(sp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kl<0 ) then + info = -4 + else if( ku<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -14 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + anorm = zero + do j = 1, info + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + anorm = max( anorm, abs( ab( i, j ) ) ) + end do + end do + rpvgrw = stdlib_clantb( 'M', 'U', 'N', info, min( info-1, kl+ku ),afb( max( 1, & + kl+ku+2-info ), 1 ), ldafb,rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = anorm / rpvgrw + end if + rwork( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_clangb( norm, n, kl, ku, ab, ldab, rwork ) + rpvgrw = stdlib_clantb( 'M', 'U', 'N', n, kl+ku, afb, ldafb, rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_clangb( 'M', n, kl, ku, ab, ldab, rwork ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_cgbcon( norm, n, kl, ku, afb, ldafb, ipiv, anorm, rcond,work, rwork, info ) + + ! compute the solution matrix x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cgbtrs( trans, n, kl, ku, nrhs, afb, ldafb, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_cgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb, ipiv,b, ldb, x, ldx, & + ferr, berr, work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CGEBRD: reduces a general complex M-by-N matrix A to upper or lower + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_cgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! test the input parameters + info = 0 + nb = max( 1, stdlib_ilaenv( 1, 'CGEBRD', ' ', m, n, -1, -1 ) ) + lwkopt = ( m+n )*nb + work( 1 ) = real( lwkopt,KIND=sp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=( m+n )*nbmin ) then + nb = lwork / ( m+n ) + else + nb = 1 + nx = minmn + end if + end if + end if + else + nx = minmn + end if + do i = 1, minmn - nx, nb + ! reduce rows and columns i:i+ib-1 to bidiagonal form and return + ! the matrices x and y which are needed to update the unreduced + ! part of the matrix + call stdlib_clabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & + taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) + ! update the trailing submatrix a(i+ib:m,i+ib:n), using + ! an update of the form a := a - v*y**h - x*u**h + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -& + cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda ) + + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, & + work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda ) + ! copy diagonal and off-diagonal elements of b back into a + if( m>=n ) then + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j, j+1 ) = e( j ) + end do + else + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j+1, j ) = e( j ) + end do + end if + end do + ! use unblocked code to reduce the remainder of the matrix + call stdlib_cgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & + work, iinfo ) + work( 1 ) = ws + return + end subroutine stdlib_cgebrd + + !> CGEHRD: reduces a complex general matrix A to upper Hessenberg form H by + !> an unitary similarity transformation: Q**H * A * Q = H . + + pure subroutine stdlib_cgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iwt, j, ldwork, lwkopt, nb, nbmin, nh, nx + complex(sp) :: ei + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda1 .and. nb=(n*nbmin+tsize) ) then + nb = (lwork-tsize) / n + else + nb = 1 + end if + end if + end if + end if + ldwork = n + if( nb=nh ) then + ! use unblocked code below + i = ilo + else + ! use blocked code + iwt = 1 + n*nb + do i = ilo, ihi - 1 - nx, nb + ib = min( nb, ihi-i ) + ! reduce columns i:i+ib-1 to hessenberg form, returning the + ! matrices v and t of the block reflector h = i - v*t*v**h + ! which performs the reduction, and also the matrix y = a*v*t + call stdlib_clahr2( ihi, i, ib, a( 1, i ), lda, tau( i ),work( iwt ), ldt, work, & + ldwork ) + ! apply the block reflector h to a(1:ihi,i+ib:ihi) from the + ! right, computing a := a - y * v**h. v(i+ib,ib-1) must be set + ! to 1 + ei = a( i+ib, i+ib-1 ) + a( i+ib, i+ib-1 ) = cone + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',ihi, ihi-i-ib+1,ib, -& + cone, work, ldwork, a( i+ib, i ), lda, cone,a( 1, i+ib ), lda ) + a( i+ib, i+ib-1 ) = ei + ! apply the block reflector h to a(1:i,i+1:i+ib-1) from the + ! right + call stdlib_ctrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', i, ib-1,cone, & + a( i+1, i ), lda, work, ldwork ) + do j = 0, ib-2 + call stdlib_caxpy( i, -cone, work( ldwork*j+1 ), 1,a( 1, i+j+1 ), 1 ) + end do + ! apply the block reflector h to a(i+1:ihi,i+ib:n) from the + ! left + call stdlib_clarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE',ihi-i, & + n-i-ib+1, ib, a( i+1, i ), lda,work( iwt ), ldt, a( i+1, i+ib ), lda,work, & + ldwork ) + end do + end if + ! use unblocked code to reduce the rest of the matrix + call stdlib_cgehd2( n, i, ihi, a, lda, tau, work, iinfo ) + work( 1 ) = lwkopt + return + end subroutine stdlib_cgehrd + + !> CGELQT: computes a blocked LQ factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_cgelqt( m, n, mb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, mb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, iinfo, k + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( mb<1 .or. (mb>min(m,n) .and. min(m,n)>0 ))then + info = -3 + else if( lda CGELS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, or its conjugate-transpose, using a QR + !> or LQ factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an underdetermined system A**H * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**H * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_cgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, tpsd + integer(ilp) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize + real(sp) :: anrm, bignum, bnrm, smlnum + ! Local Arrays + real(sp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! test the input arguments. + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + nb = stdlib_ilaenv( 1, 'CGEQRF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'CUNMQR', 'LN', m, nrhs, n,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'CUNMQR', 'LC', m, nrhs, n,-1 ) ) + end if + else + nb = stdlib_ilaenv( 1, 'CGELQF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'CUNMLQ', 'LC', n, nrhs, m,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'CUNMLQ', 'LN', n, nrhs, m,-1 ) ) + end if + end if + wsize = max( 1, mn + max( mn, nrhs )*nb ) + work( 1 ) = real( wsize,KIND=sp) + end if + if( info/=0 ) then + call stdlib_xerbla( 'CGELS ', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( min( m, n, nrhs )==0 ) then + call stdlib_claset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb ) + return + end if + ! get machine parameters + smlnum = stdlib_slamch( 'S' ) / stdlib_slamch( 'P' ) + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! scale a, b if max element outside range [smlnum,bignum] + anrm = stdlib_clange( 'M', m, n, a, lda, rwork ) + iascl = 0 + if( anrm>zero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + go to 50 + end if + brow = m + if( tpsd )brow = n + bnrm = stdlib_clange( 'M', brow, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if( m>=n ) then + ! compute qr factorization of a + call stdlib_cgeqrf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least n, optimally n*nb + if( .not.tpsd ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) + call stdlib_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1 ), & + b, ldb, work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = n + else + ! underdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs) + call stdlib_ctrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,& + ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = czero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_cunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_cgelqf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least m, optimally m*nb. + if( .not.tpsd ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_ctrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = czero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs) + call stdlib_cunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1 ), & + b, ldb, work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**h * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_cunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs) + call stdlib_ctrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, & + b, ldb, info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_clascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_clascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_clascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_clascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( wsize,KIND=sp) + return + end subroutine stdlib_cgels + + !> CGEQP3: computes a QR factorization with column pivoting of a + !> matrix A: A*P = Q*R using Level 3 BLAS. + + pure subroutine stdlib_cgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: inb = 1 + integer(ilp), parameter :: inbmin = 2 + integer(ilp), parameter :: ixover = 3 + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, & + sminmn, sn, topbmn + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + ! ==================== + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 ) then + na = min( m, nfxd ) + ! cc call stdlib_cgeqr2( m, na, a, lda, tau, work, info ) + call stdlib_cgeqrf( m, na, a, lda, tau, work, lwork, info ) + iws = max( iws, int( work( 1 ),KIND=ilp) ) + if( na1 ) .and. ( nb=nbmin ) .and. ( nb CGEQRT: computes a blocked QR factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_cgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk), parameter :: use_recursive_qr = .true. + integer(ilp) :: i, ib, iinfo, k + + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nb<1 .or. ( nb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda CGERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + + pure subroutine stdlib_cgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cgetrs( trans, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_cgetrs( transt, n, 1, af, ldaf, ipiv, work, n,info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_cgetrs( transn, n, 1, af, ldaf, ipiv, work, n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_cgerfs + + !> CGETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_cgetrf( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_cgetrf2( m, n, a, lda, ipiv, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks and test for exact + ! singularity. + call stdlib_cgetrf2( m-j+1, jb, a( j, j ), lda, ipiv( j ), iinfo ) + ! adjust info and the pivot indices. + if( info==0 .and. iinfo>0 )info = iinfo + j - 1 + do i = j, min( m, j+jb-1 ) + ipiv( i ) = j - 1 + ipiv( i ) + end do + ! apply interchanges to columns 1:j-1. + call stdlib_claswp( j-1, a, lda, j, j+jb-1, ipiv, 1 ) + if( j+jb<=n ) then + ! apply interchanges to columns j+jb:n. + call stdlib_claswp( n-j-jb+1, a( 1, j+jb ), lda, j, j+jb-1,ipiv, 1 ) + ! compute block row of u. + call stdlib_ctrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,& + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_cgetrf + + !> CGGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + + pure subroutine stdlib_cggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), d(*) + complex(sp), intent(out) :: work(*), x(*), y(*) + ! =================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, lopt, lwkmin, lwkopt, nb, nb1, nb2, nb3, nb4, np + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + np = min( n, p ) + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -2 + else if( p<0 .or. pm ) then + call stdlib_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', n-m, 1,b( m+1, m+p-n+1 ), & + ldb, d( m+1 ), n-m, info ) + if( info>0 ) then + info = 1 + return + end if + call stdlib_ccopy( n-m, d( m+1 ), 1, y( m+p-n+1 ), 1 ) + end if + ! set y1 = 0 + do i = 1, m + p - n + y( i ) = czero + end do + ! update d1 = d1 - t12*y2 + call stdlib_cgemv( 'NO TRANSPOSE', m, n-m, -cone, b( 1, m+p-n+1 ), ldb,y( m+p-n+1 ), 1,& + cone, d, 1 ) + ! solve triangular system: r11*x = d1 + if( m>0 ) then + call stdlib_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', m, 1, a, lda,d, m, info ) + + if( info>0 ) then + info = 2 + return + end if + ! copy d to x + call stdlib_ccopy( m, d, 1, x, 1 ) + end if + ! backward transformation y = z**h *y + call stdlib_cunmrq( 'LEFT', 'CONJUGATE TRANSPOSE', p, 1, np,b( max( 1, n-p+1 ), 1 ), & + ldb, work( m+1 ), y,max( 1, p ), work( m+np+1 ), lwork-m-np, info ) + work( 1 ) = m + np + max( lopt, int( work( m+np+1 ),KIND=ilp) ) + return + end subroutine stdlib_cggglm + + !> CGGHD3: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then CGGHD3 reduces the original + !> problem to generalized Hessenberg form. + !> This is a blocked variant of CGGHRD, using matrix-matrix + !> multiplications for parts of the computation to enhance performance. + + pure subroutine stdlib_cgghd3( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: blk22, initq, initz, lquery, wantq, wantz + character :: compq2, compz2 + integer(ilp) :: cola, i, ierr, j, j0, jcol, jj, jrow, k, kacc22, len, lwkopt, n2nb, nb,& + nblst, nbmin, nh, nnb, nx, ppw, ppwo, pw, top, topq + real(sp) :: c + complex(sp) :: c1, c2, ctemp, s, s1, s2, temp, temp1, temp2, temp3 + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'CGGHD3', ' ', n, ilo, ihi, -1 ) + lwkopt = max( 6*n*nb, 1 ) + work( 1 ) = cmplx( lwkopt,KIND=sp) + initq = stdlib_lsame( compq, 'I' ) + wantq = initq .or. stdlib_lsame( compq, 'V' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi1 )call stdlib_claset( 'LOWER', n-1, n-1, czero, czero, b(2, 1), ldb ) + ! quick return if possible + nh = ihi - ilo + 1 + if( nh<=1 ) then + work( 1 ) = cone + return + end if + ! determine the blocksize. + nbmin = stdlib_ilaenv( 2, 'CGGHD3', ' ', n, ilo, ihi, -1 ) + if( nb>1 .and. nb=6*n*nbmin ) then + nb = lwork / ( 6*n ) + else + nb = 1 + end if + end if + end if + end if + if( nb=nh ) then + ! use unblocked code below + jcol = ilo + else + ! use blocked code + kacc22 = stdlib_ilaenv( 16, 'CGGHD3', ' ', n, ilo, ihi, -1 ) + blk22 = kacc22==2 + do jcol = ilo, ihi-2, nb + nnb = min( nb, ihi-jcol-1 ) + ! initialize small unitary factors that will hold the + ! accumulated givens rotations in workspace. + ! n2nb denotes the number of 2*nnb-by-2*nnb factors + ! nblst denotes the (possibly smaller) order of the last + ! factor. + n2nb = ( ihi-jcol-1 ) / nnb - 1 + nblst = ihi - jcol - n2nb*nnb + call stdlib_claset( 'ALL', nblst, nblst, czero, cone, work, nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_claset( 'ALL', 2*nnb, 2*nnb, czero, cone,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! reduce columns jcol:jcol+nnb-1 of a to hessenberg form. + do j = jcol, jcol+nnb-1 + ! reduce jth column of a. store cosines and sines in jth + ! column of a and b, respectively. + do i = ihi, j+2, -1 + temp = a( i-1, j ) + call stdlib_clartg( temp, a( i, j ), c, s, a( i-1, j ) ) + a( i, j ) = cmplx( c,KIND=sp) + b( i, j ) = s + end do + ! accumulate givens rotations into workspace array. + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + ctemp = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = ctemp*temp - s*work( jj ) + work( jj ) = conjg( s )*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + ctemp = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = ctemp*temp - s*work( jj ) + work( jj ) = conjg( s )*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + ! top denotes the number of top rows in a and b that will + ! not be updated during the next steps. + if( jcol<=2 ) then + top = 0 + else + top = jcol + end if + ! propagate transformations through b and replace stored + ! left sines/cosines by right sines/cosines. + do jj = n, j+1, -1 + ! update jjth column of b. + do i = min( jj+1, ihi ), j+2, -1 + ctemp = a( i, j ) + s = b( i, j ) + temp = b( i, jj ) + b( i, jj ) = ctemp*temp - conjg( s )*b( i-1, jj ) + b( i-1, jj ) = s*temp + ctemp*b( i-1, jj ) + end do + ! annihilate b( jj+1, jj ). + if( jj0 ) then + do i = jj, 1, -1 + c = real( a( j+1+i, j ),KIND=sp) + call stdlib_crot( ihi-top, a( top+1, j+i+1 ), 1,a( top+1, j+i ), 1, c,-& + conjg( b( j+1+i, j ) ) ) + end do + end if + ! update (j+1)th column of a by transformations from left. + if ( j < jcol + nnb - 1 ) then + len = 1 + j - jcol + ! multiply with the trailing accumulated unitary + ! matrix, which takes the form + ! [ u11 u12 ] + ! u = [ ], + ! [ u21 u22 ] + ! where u21 is a len-by-len matrix and u12 is lower + ! triangular. + jrow = ihi - nblst + 1 + call stdlib_cgemv( 'CONJUGATE', nblst, len, cone, work,nblst, a( jrow, j+1 & + ), 1, czero,work( pw ), 1 ) + ppw = pw + len + do i = jrow, jrow+nblst-len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_ctrmv( 'LOWER', 'CONJUGATE', 'NON-UNIT',nblst-len, work( & + len*nblst + 1 ), nblst,work( pw+len ), 1 ) + call stdlib_cgemv( 'CONJUGATE', len, nblst-len, cone,work( (len+1)*nblst - & + len + 1 ), nblst,a( jrow+nblst-len, j+1 ), 1, cone,work( pw+len ), 1 ) + + ppw = pw + do i = jrow, jrow+nblst-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ! multiply with the other accumulated unitary + ! matrices, which take the form + ! [ u11 u12 0 ] + ! [ ] + ! u = [ u21 u22 0 ], + ! [ ] + ! [ 0 0 i ] + ! where i denotes the (nnb-len)-by-(nnb-len) identity + ! matrix, u21 is a len-by-len upper triangular matrix + ! and u12 is an nnb-by-nnb lower triangular matrix. + ppwo = 1 + nblst*nblst + j0 = jrow - nnb + do jrow = j0, jcol+1, -nnb + ppw = pw + len + do i = jrow, jrow+nnb-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + ppw = pw + do i = jrow+nnb, jrow+nnb+len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_ctrmv( 'UPPER', 'CONJUGATE', 'NON-UNIT', len,work( ppwo + & + nnb ), 2*nnb, work( pw ),1 ) + call stdlib_ctrmv( 'LOWER', 'CONJUGATE', 'NON-UNIT', nnb,work( ppwo + & + 2*len*nnb ),2*nnb, work( pw + len ), 1 ) + call stdlib_cgemv( 'CONJUGATE', nnb, len, cone,work( ppwo ), 2*nnb, a( & + jrow, j+1 ), 1,cone, work( pw ), 1 ) + call stdlib_cgemv( 'CONJUGATE', len, nnb, cone,work( ppwo + 2*len*nnb + & + nnb ), 2*nnb,a( jrow+nnb, j+1 ), 1, cone,work( pw+len ), 1 ) + ppw = pw + do i = jrow, jrow+len+nnb-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + ! apply accumulated unitary matrices to a. + cola = n - jcol - nnb + 1 + j = ihi - nblst + 1 + call stdlib_cgemm( 'CONJUGATE', 'NO TRANSPOSE', nblst,cola, nblst, cone, work, & + nblst,a( j, jcol+nnb ), lda, czero, work( pw ),nblst ) + call stdlib_clacpy( 'ALL', nblst, cola, work( pw ), nblst,a( j, jcol+nnb ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of + ! [ u11 u12 ] + ! u = [ ] + ! [ u21 u22 ], + ! where all blocks are nnb-by-nnb, u21 is upper + ! triangular and u12 is lower triangular. + call stdlib_cunm22( 'LEFT', 'CONJUGATE', 2*nnb, cola, nnb,nnb, work( ppwo )& + , 2*nnb,a( j, jcol+nnb ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_cgemm( 'CONJUGATE', 'NO TRANSPOSE', 2*nnb,cola, 2*nnb, cone, & + work( ppwo ), 2*nnb,a( j, jcol+nnb ), lda, czero, work( pw ),2*nnb ) + + call stdlib_clacpy( 'ALL', 2*nnb, cola, work( pw ), 2*nnb,a( j, jcol+nnb ),& + lda ) + end if + ppwo = ppwo + 4*nnb*nnb + end do + ! apply accumulated unitary matrices to q. + if( wantq ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, cone, q( & + topq, j ), ldq,work, nblst, czero, work( pw ), nh ) + call stdlib_clacpy( 'ALL', nh, nblst, work( pw ), nh,q( topq, j ), ldq ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_cunm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,q( topq, j ), ldq, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, & + cone, q( topq, j ), ldq,work( ppwo ), 2*nnb, czero, work( pw ),nh ) + + call stdlib_clacpy( 'ALL', nh, 2*nnb, work( pw ), nh,q( topq, j ), ldq ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! accumulate right givens rotations if required. + if ( wantz .or. top>0 ) then + ! initialize small unitary factors that will hold the + ! accumulated givens rotations in workspace. + call stdlib_claset( 'ALL', nblst, nblst, czero, cone, work,nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_claset( 'ALL', 2*nnb, 2*nnb, czero, cone,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! accumulate givens rotations into workspace array. + do j = jcol, jcol+nnb-1 + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + ctemp = a( i, j ) + a( i, j ) = czero + s = b( i, j ) + b( i, j ) = czero + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = ctemp*temp -conjg( s )*work( jj ) + work( jj ) = s*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + ctemp = a( i, j ) + a( i, j ) = czero + s = b( i, j ) + b( i, j ) = czero + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = ctemp*temp -conjg( s )*work( jj ) + work( jj ) = s*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end do + else + call stdlib_claset( 'LOWER', ihi - jcol - 1, nnb, czero, czero,a( jcol + 2, & + jcol ), lda ) + call stdlib_claset( 'LOWER', ihi - jcol - 1, nnb, czero, czero,b( jcol + 2, & + jcol ), ldb ) + end if + ! apply accumulated unitary matrices to a and b. + if ( top>0 ) then + j = ihi - nblst + 1 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, cone, a( & + 1, j ), lda,work, nblst, czero, work( pw ), top ) + call stdlib_clacpy( 'ALL', top, nblst, work( pw ), top,a( 1, j ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_cunm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,a( 1, j ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + cone, a( 1, j ), lda,work( ppwo ), 2*nnb, czero,work( pw ), top ) + + call stdlib_clacpy( 'ALL', top, 2*nnb, work( pw ), top,a( 1, j ), lda ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + j = ihi - nblst + 1 + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, cone, b( & + 1, j ), ldb,work, nblst, czero, work( pw ), top ) + call stdlib_clacpy( 'ALL', top, nblst, work( pw ), top,b( 1, j ), ldb ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_cunm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,b( 1, j ), ldb, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + cone, b( 1, j ), ldb,work( ppwo ), 2*nnb, czero,work( pw ), top ) + + call stdlib_clacpy( 'ALL', top, 2*nnb, work( pw ), top,b( 1, j ), ldb ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! apply accumulated unitary matrices to z. + if( wantz ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, cone, z( & + topq, j ), ldz,work, nblst, czero, work( pw ), nh ) + call stdlib_clacpy( 'ALL', nh, nblst, work( pw ), nh,z( topq, j ), ldz ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_cunm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,z( topq, j ), ldz, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, & + cone, z( topq, j ), ldz,work( ppwo ), 2*nnb, czero, work( pw ),nh ) + + call stdlib_clacpy( 'ALL', nh, 2*nnb, work( pw ), nh,z( topq, j ), ldz ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + end if + ! use unblocked code to reduce the rest of the matrix + ! avoid re-initialization of modified q and z. + compq2 = compq + compz2 = compz + if ( jcol/=ilo ) then + if ( wantq )compq2 = 'V' + if ( wantz )compz2 = 'V' + end if + if ( jcol CGGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + + pure subroutine stdlib_cgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), c(*), d(*) + complex(sp), intent(out) :: work(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkmin, lwkopt, mn, nb, nb1, nb2, nb3, nb4, nr + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( p<0 .or. p>n .or. p0 ) then + call stdlib_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', p, 1,b( 1, n-p+1 ), ldb, d,& + p, info ) + if( info>0 ) then + info = 1 + return + end if + ! put the solution in x + call stdlib_ccopy( p, d, 1, x( n-p+1 ), 1 ) + ! update c1 + call stdlib_cgemv( 'NO TRANSPOSE', n-p, p, -cone, a( 1, n-p+1 ), lda,d, 1, cone, c, & + 1 ) + end if + ! solve r11*x1 = c1 for x1 + if( n>p ) then + call stdlib_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n-p, 1,a, lda, c, n-p, & + info ) + if( info>0 ) then + info = 2 + return + end if + ! put the solutions in x + call stdlib_ccopy( n-p, c, 1, x, 1 ) + end if + ! compute the residual vector: + if( m0 )call stdlib_cgemv( 'NO TRANSPOSE', nr, n-m, -cone, a( n-p+1, m+1 ),lda, d(& + nr+1 ), 1, cone, c( n-p+1 ), 1 ) + else + nr = p + end if + if( nr>0 ) then + call stdlib_ctrmv( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', nr,a( n-p+1, n-p+1 ), lda, & + d, 1 ) + call stdlib_caxpy( nr, -cone, d, 1, c( n-p+1 ), 1 ) + end if + ! backward transformation x = q**h*x + call stdlib_cunmrq( 'LEFT', 'CONJUGATE TRANSPOSE', n, 1, p, b, ldb,work( 1 ), x, n, & + work( p+mn+1 ), lwork-p-mn, info ) + work( 1 ) = p + mn + max( lopt, int( work( p+mn+1 ),KIND=ilp) ) + return + end subroutine stdlib_cgglse + + !> CGTCON: estimates the reciprocal of the condition number of a complex + !> tridiagonal matrix A using the LU factorization as computed by + !> CGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_cgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: d(*), dl(*), du(*), du2(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + integer(ilp) :: i, kase, kase1 + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: cmplx + ! Executable Statements + ! test the input arguments. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm CGTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_cgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, x, & + ldx, ferr, berr, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: b(ldb,*), d(*), df(*), dl(*), dlf(*), du(*), du2(*), duf(*) + + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cgttrs( trans, n, 1, dlf, df, duf, du2, ipiv, work, n,info ) + call stdlib_caxpy( n, cmplx( one,KIND=sp), work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 70 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_cgttrs( transt, n, 1, dlf, df, duf, du2, ipiv, work,n, info ) + + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_cgttrs( transn, n, 1, dlf, df, duf, du2, ipiv, work,n, info ) + + end if + go to 70 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_110 + return + end subroutine stdlib_cgtrfs + + !> CGTSVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_cgtsvx( fact, trans, n, nrhs, dl, d, du, dlf, df, duf,du2, ipiv, b, & + ldb, x, ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: b(ldb,*), d(*), dl(*), du(*) + complex(sp), intent(inout) :: df(*), dlf(*), du2(*), duf(*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact, notran + character :: norm + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + notran = stdlib_lsame( trans, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb1 ) then + call stdlib_ccopy( n-1, dl, 1, dlf, 1 ) + call stdlib_ccopy( n-1, du, 1, duf, 1 ) + end if + call stdlib_cgttrf( n, dlf, df, duf, du2, ipiv, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_clangt( norm, n, dl, d, du ) + ! compute the reciprocal of the condition number of a. + call stdlib_cgtcon( norm, n, dlf, df, duf, du2, ipiv, anorm, rcond, work,info ) + ! compute the solution vectors x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cgttrs( trans, n, nrhs, dlf, df, duf, du2, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_cgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2, ipiv,b, ldb, x, ldx, & + ferr, berr, work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CHBGST: reduces a complex Hermitian-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**H*S by CPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**H*A*X, where + !> X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the + !> bandwidth of A. + + pure subroutine stdlib_chbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, rwork,& + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldx, n + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(in) :: bb(ldbb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: update, upper, wantx + integer(ilp) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, & + nrt, nx + real(sp) :: bii + complex(sp) :: ra, ra1, t + ! Intrinsic Functions + intrinsic :: conjg,max,min,real + ! Executable Statements + ! test the input parameters + wantx = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + ka1 = ka + 1 + kb1 = kb + 1 + info = 0 + if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldabn-1 )go to 480 + end if + if( upper ) then + ! transform a, working with the upper triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( kb1, i ),KIND=sp) + ab( ka1, i ) = ( real( ab( ka1, i ),KIND=sp) / bii ) / bii + do j = i + 1, i1 + ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii + end do + do j = max( 1, i-ka ), i - 1 + ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*conjg( ab( k-i+ka1, & + i ) ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1, i ) +real( ab( ka1, i ),& + KIND=sp)*bb( j-i+kb1, i )*conjg( bb( k-i+kb1, i ) ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1,& + i ) + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j ) + + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_csscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_cgerc( n-m, kbt, -cone, x( m+1, i ), 1,bb( kb1-kbt, i )& + , 1, x( m+1, i-kbt ),ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+ka1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_130: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i,i-k+ka+1) + call stdlib_clartg( ab( k+1, i-k+ka ), ra1,rwork( i-k+ka-m ), work( i-k+ka-& + m ), ra ) + ! create nonzero element a(i-k,i-k+ka+1) outside the + ! band and store it in work(i-k) + t = -bb( kb1-k, i )*ra1 + work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( 1, i-k+ka & + ) + ab( 1, i-k+ka ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( 1, i-k+ka ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( 1, j+1 ) + ab( 1, j+1 ) = rwork( j-m )*ab( 1, j+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_clargv( nrt, ab( 1, j2t ), inca, work( j2t-m ), ka1,rwork(& + j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, & + rwork( j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + rwork( j2-m ),work( j2-m ), ka1 ) + call stdlib_clacgv( nr, work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_crot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j-m ), & + conjg( work( j-m ) ) ) + end do + end if + end do loop_130 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kb1-kbt, i )*ra1 + end if + end if + loop_170: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), & + inca, rwork( j2-ka ),work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + rwork( j ) = rwork( j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( 1, j+1 ) + ab( 1, j+1 ) = rwork( j )*ab( 1, j+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_clargv( nr, ab( 1, j2 ), inca, work( j2 ), ka1,rwork( j2 ), ka1 ) + + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, & + rwork( j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + rwork( j2 ),work( j2 ), ka1 ) + call stdlib_clacgv( nr, work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_crot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j ), conjg( & + work( j ) ) ) + end do + end if + end do loop_210 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, j2 + ka, -1 + rwork( j-m ) = rwork( j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( 1, i ),KIND=sp) + ab( 1, i ) = ( real( ab( 1, i ),KIND=sp) / bii ) / bii + do j = i + 1, i1 + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do j = max( 1, i-ka ), i - 1 + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*conjg( ab( i-k+1,k ) ) - & + conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + real( ab( 1, i ),KIND=sp)*bb( i-j+& + 1, j )*conjg( bb( i-k+1,k ) ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( k-j+1, j ) = ab( k-j+1, j ) -conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_csscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_cgeru( n-m, kbt, -cone, x( m+1, i ), 1,bb( kbt+1, i-& + kbt ), ldbb-1,x( m+1, i-kbt ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_360: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i-k+ka+1,i) + call stdlib_clartg( ab( ka1-k, i ), ra1, rwork( i-k+ka-m ),work( i-k+ka-m )& + , ra ) + ! create nonzero element a(i-k+ka+1,i-k) outside the + ! band and store it in work(i-k) + t = -bb( k+1, i-k )*ra1 + work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( ka1, i-k ) + + ab( ka1, i-k ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( ka1, i-k ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = rwork( j-m )*ab( ka1, j-ka+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_clargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, & + rwork( j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(& + j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, rwork( & + j2-m ), work( j2-m ), ka1 ) + call stdlib_clacgv( nr, work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_crot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j-m ), work(& + j-m ) ) + end do + end if + end do loop_360 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1 + end if + end if + loop_400: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-& + ka+1 ), inca,rwork( j2-ka ), work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + rwork( j ) = rwork( j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = rwork( j )*ab( ka1, j-ka+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_clargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,rwork( j2 ), & + ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(& + j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, rwork( & + j2 ), work( j2 ), ka1 ) + call stdlib_clacgv( nr, work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_crot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j ), work( & + j ) ) + end do + end if + end do loop_440 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, j2 + ka, -1 + rwork( j-m ) = rwork( j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + end if + go to 10 + 480 continue + ! **************************** phase 2 ***************************** + ! the logical structure of this phase is: + ! update = .true. + ! do i = 1, m + ! use s(i) to update a and create a new bulge + ! apply rotations to push all bulges ka positions upward + ! end do + ! update = .false. + ! do i = m - ka - 1, 2, -1 + ! apply rotations to push all bulges ka positions upward + ! end do + ! to avoid duplicating code, the two loops are merged. + update = .true. + i = 0 + 490 continue + if( update ) then + i = i + 1 + kbt = min( kb, m-i ) + i0 = i + 1 + i1 = max( 1, i-ka ) + i2 = i + kbt - ka1 + if( i>m ) then + update = .false. + i = i - 1 + i0 = m + 1 + if( ka==0 )return + go to 490 + end if + else + i = i - ka + if( i<2 )return + end if + if( i0 )call stdlib_cgeru( nx, kbt, -cone, x( 1, i ), 1,bb( kb, i+1 ), & + ldbb-1, x( 1, i+1 ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+ka1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_610: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_clargv( nrt, ab( 1, j1+ka ), inca, work( j1 ), ka1,rwork( & + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + rwork( j1 ),work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + rwork( j1 ), work( j1 ),ka1 ) + call stdlib_clacgv( nr, work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( j1t ),work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_crot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( j ), work( j ) ) + + end do + end if + end do loop_610 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1 + end if + end if + loop_650: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),& + inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + rwork( m-kb+j ) = rwork( m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j-1,j+ka) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( 1, j+ka-1 ) + ab( 1, j+ka-1 ) = rwork( m-kb+j )*ab( 1, j+ka-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_650 + loop_690: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_clargv( nr, ab( 1, j1+ka ), inca, work( m-kb+j1 ),ka1, rwork( m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 ) + call stdlib_clacgv( nr, work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_crot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_690 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( j1t ),work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, i2 - ka + rwork( j ) = rwork( j+ka ) + work( j ) = work( j+ka ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( 1, i ),KIND=sp) + ab( 1, i ) = ( real( ab( 1, i ),KIND=sp) / bii ) / bii + do j = i1, i - 1 + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do j = i + 1, min( n, i+ka ) + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do k = i + 1, i + kbt + do j = k, i + kbt + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*conjg( ab( k-i+1,i ) ) - & + conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + real( ab( 1, i ),KIND=sp)*bb( j-i+& + 1, i )*conjg( bb( k-i+1,i ) ) + end do + do j = i + kbt + 1, min( n, i+ka ) + ab( j-k+1, k ) = ab( j-k+1, k ) -conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + + end do + end do + do j = i1, i + do k = i + 1, min( j+ka, i+kbt ) + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_csscal( nx, one / bii, x( 1, i ), 1 ) + if( kbt>0 )call stdlib_cgerc( nx, kbt, -cone, x( 1, i ), 1, bb( 2, i ),1, x( & + 1, i+1 ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_840: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_clargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,rwork( & + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( & + j1 ), work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, rwork(& + j1 ),work( j1 ), ka1 ) + call stdlib_clacgv( nr, work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_crot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( j ), conjg( work(& + j ) ) ) + end do + end if + end do loop_840 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1 + end if + end if + loop_880: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, & + j1t+l-1 ), inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + rwork( m-kb+j ) = rwork( m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j+ka,j-1) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 ) + ab( ka1, j-1 ) = rwork( m-kb+j )*ab( ka1, j-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_880 + loop_920: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_clargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, rwork( m-kb+& + j1 ), ka1 ) + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_clartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( & + m-kb+j1 ), work( m-kb+j1 ),ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_clar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, rwork(& + m-kb+j1 ),work( m-kb+j1 ), ka1 ) + call stdlib_clacgv( nr, work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_crot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( m-kb+j ), conjg( & + work( m-kb+j ) ) ) + end do + end if + end do loop_920 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_clartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, i2 - ka + rwork( j ) = rwork( j+ka ) + work( j ) = work( j+ka ) + end do + end if + end if + go to 490 + end subroutine stdlib_chbgst + + !> CHBTRD: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_chbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldq, n + ! Array Arguments + real(sp), intent(out) :: d(*), e(*) + complex(sp), intent(inout) :: ab(ldab,*), q(ldq,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: initq, upper, wantq + integer(ilp) :: i, i2, ibl, inca, incx, iqaend, iqb, iqend, j, j1, j1end, j1inc, j2, & + jend, jin, jinc, k, kd1, kdm1, kdn, l, last, lend, nq, nr, nrt + real(sp) :: abst + complex(sp) :: t, temp + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,real + ! Executable Statements + ! test the input parameters + initq = stdlib_lsame( vect, 'V' ) + wantq = initq .or. stdlib_lsame( vect, 'U' ) + upper = stdlib_lsame( uplo, 'U' ) + kd1 = kd + 1 + kdm1 = kd - 1 + incx = ldab - 1 + iqend = 1 + info = 0 + if( .not.wantq .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldab1 ) then + ! reduce to complex hermitian tridiagonal form, working with + ! the upper triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + ab( kd1, 1 ) = real( ab( kd1, 1 ),KIND=sp) + loop_90: do i = 1, n - 2 + ! reduce i-th row of matrix to tridiagonal form + loop_80: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_clargv( nr, ab( 1, j1-1 ), inca, work( j1 ),kd1, d( j1 ), & + kd1 ) + ! apply rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_clartv or stdlib_crot is used + if( nr>=2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_clartv( nr, ab( l+1, j1-1 ), inca,ab( l, j1 ), inca, & + d( j1 ),work( j1 ), kd1 ) + end do + else + jend = j1 + ( nr-1 )*kd1 + do jinc = j1, jend, kd1 + call stdlib_crot( kdm1, ab( 2, jinc-1 ), 1,ab( 1, jinc ), 1, d( & + jinc ),work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+k-1) + ! within the band + call stdlib_clartg( ab( kd-k+3, i+k-2 ),ab( kd-k+2, i+k-1 ), d( i+k-& + 1 ),work( i+k-1 ), temp ) + ab( kd-k+3, i+k-2 ) = temp + ! apply rotation from the right + call stdlib_crot( k-3, ab( kd-k+4, i+k-2 ), 1,ab( kd-k+3, i+k-1 ), 1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_clar2v( nr, ab( kd1, j1-1 ), ab( kd1, j1 ),ab( kd, & + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the left + if( nr>0 ) then + call stdlib_clacgv( nr, work( j1 ), kd1 ) + if( 2*kd-1n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_clartv( nrt, ab( kd-l, j1+l ), inca,ab( kd-& + l+1, j1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do jin = j1, j1end, kd1 + call stdlib_crot( kd-1, ab( kd-1, jin+1 ), incx,ab( kd, jin+1 )& + , incx,d( jin ), work( jin ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_crot( lend, ab( kd-1, last+1 ), incx,ab( kd, & + last+1 ), incx, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_crot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + conjg( work( j ) ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_crot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), conjg( & + work( j ) ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j-1,j+kd) outside the band + ! and store it in work + work( j+kd ) = work( j )*ab( 1, j+kd ) + ab( 1, j+kd ) = d( j )*ab( 1, j+kd ) + end do + end do loop_80 + end do loop_90 + end if + if( kd>0 ) then + ! make off-diagonal elements real and copy them to e + do i = 1, n - 1 + t = ab( kd, i+1 ) + abst = abs( t ) + ab( kd, i+1 ) = abst + e( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( i1 ) then + ! reduce to complex hermitian tridiagonal form, working with + ! the lower triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + ab( 1, 1 ) = real( ab( 1, 1 ),KIND=sp) + loop_210: do i = 1, n - 2 + ! reduce i-th column of matrix to tridiagonal form + loop_200: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_clargv( nr, ab( kd1, j1-kd1 ), inca,work( j1 ), kd1, d( j1 )& + , kd1 ) + ! apply plane rotations from one side + ! dependent on the the number of diagonals either + ! stdlib_clartv or stdlib_crot is used + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_clartv( nr, ab( kd1-l, j1-kd1+l ), inca,ab( kd1-l+1, & + j1-kd1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + jend = j1 + kd1*( nr-1 ) + do jinc = j1, jend, kd1 + call stdlib_crot( kdm1, ab( kd, jinc-kd ), incx,ab( kd1, jinc-kd )& + , incx,d( jinc ), work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i+k-1,i) + ! within the band + call stdlib_clartg( ab( k-1, i ), ab( k, i ),d( i+k-1 ), work( i+k-1 & + ), temp ) + ab( k-1, i ) = temp + ! apply rotation from the left + call stdlib_crot( k-3, ab( k-2, i+1 ), ldab-1,ab( k-1, i+1 ), ldab-1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_clar2v( nr, ab( 1, j1-1 ), ab( 1, j1 ),ab( 2, j1-1 ),& + inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_clartv or stdlib_crot is used + if( nr>0 ) then + call stdlib_clacgv( nr, work( j1 ), kd1 ) + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + if( j2+l>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_clartv( nrt, ab( l+2, j1-1 ), inca,ab( l+1,& + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do j1inc = j1, j1end, kd1 + call stdlib_crot( kdm1, ab( 3, j1inc-1 ), 1,ab( 2, j1inc ), 1, & + d( j1inc ),work( j1inc ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_crot( lend, ab( 3, last-1 ), 1,ab( 2, last ),& + 1, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_crot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_crot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j+kd,j-1) outside the + ! band and store it in work + work( j+kd ) = work( j )*ab( kd1, j ) + ab( kd1, j ) = d( j )*ab( kd1, j ) + end do + end do loop_200 + end do loop_210 + end if + if( kd>0 ) then + ! make off-diagonal elements real and copy them to e + do i = 1, n - 1 + t = ab( 2, i ) + abst = abs( t ) + ab( 2, i ) = abst + e( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( i CHECON: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_checon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_chetrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_checon + + !> CHECON_ROOK: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_checon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_chetrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_checon_rook + + !> CHEEV: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. + + subroutine stdlib_cheev( jobz, uplo, n, a, lda, w, work, lwork, rwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale, llwork, lwkopt, nb + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_clascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_chetrd to reduce hermitian matrix to tridiagonal form. + inde = 1 + indtau = 1 + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_chetrd( uplo, n, a, lda, w, rwork( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_cungtr to generate the unitary matrix, then call stdlib_csteqr. + if( .not.wantz ) then + call stdlib_ssterf( n, w, rwork( inde ), info ) + else + call stdlib_cungtr( uplo, n, a, lda, work( indtau ), work( indwrk ),llwork, iinfo ) + + indwrk = inde + n + call stdlib_csteqr( jobz, n, w, rwork( inde ), a, lda,rwork( indwrk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! set work(1) to optimal complex workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_cheev + + !> CHEEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> CHEEVR first reduces the matrix A to tridiagonal form T with a call + !> to CHETRD. Then, whenever possible, CHEEVR calls CSTEMR to compute + !> the eigenspectrum using Relatively Robust Representations. CSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see CSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : CHEEVR calls CSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> CHEEVR calls SSTEBZ and CSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of CSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + subroutine stdlib_cheevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork,rwork, lrwork, iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, liwork, lrwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, iinfo, imax, indibl, indifl, indisp, indiwo, indrd, indrdd, & + indre, indree, indrwk, indtau, indwk, indwkn, iscale, itmp1, j, jj, liwmin, llwork, & + llrwork, llwrkn, lrwmin, lwkopt, lwmin, nb, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'CHEEVR', 'N', 1, 2, 3, 4 ) + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( lrwork==-1 ) .or.( liwork==-1 ) ) + lrwmin = max( 1, 24*n ) + liwmin = max( 1, 10*n ) + lwmin = max( 1, 2*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( a( 1, 1 ),KIND=sp) )then + m = 1 + w( 1 ) = real( a( 1, 1 ),KIND=sp) + end if + end if + if( wantz ) then + z( 1, 1 ) = one + isuppz( 1 ) = 1 + isuppz( 2 ) = 1 + end if + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if (valeig) then + vll = vl + vuu = vu + end if + anrm = stdlib_clansy( 'M', uplo, n, a, lda, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_csscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_csscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: the iwork indices are + ! used only if stdlib_ssterf or stdlib_cstemr fail. + ! work(indtau:indtau+n-1) stores the complex scalar factors of the + ! elementary reflectors used in stdlib_chetrd. + indtau = 1 + ! indwk is the starting offset of the remaining complex workspace, + ! and llwork is the remaining complex workspace size. + indwk = indtau + n + llwork = lwork - indwk + 1 + ! rwork(indrd:indrd+n-1) stores the real tridiagonal's diagonal + ! entries. + indrd = 1 + ! rwork(indre:indre+n-1) stores the off-diagonal entries of the + ! tridiagonal matrix from stdlib_chetrd. + indre = indrd + n + ! rwork(indrdd:indrdd+n-1) is a copy of the diagonal entries over + ! -written by stdlib_cstemr (the stdlib_ssterf path copies the diagonal to w). + indrdd = indre + n + ! rwork(indree:indree+n-1) is a copy of the off-diagonal entries over + ! -written while computing the eigenvalues in stdlib_ssterf and stdlib_cstemr. + indree = indrdd + n + ! indrwk is the starting offset of the left-over real workspace, and + ! llrwork is the remaining workspace size. + indrwk = indree + n + llrwork = lrwork - indrwk + 1 + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_sstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_sstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_sstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indifl + n + ! call stdlib_chetrd to reduce hermitian matrix to tridiagonal form. + call stdlib_chetrd( uplo, n, a, lda, rwork( indrd ), rwork( indre ),work( indtau ), & + work( indwk ), llwork, iinfo ) + ! if all eigenvalues are desired + ! then call stdlib_ssterf or stdlib_cstemr and stdlib_cunmtr. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig.or.test ) .and. ( ieeeok==1 ) ) then + if( .not.wantz ) then + call stdlib_scopy( n, rwork( indrd ), 1, w, 1 ) + call stdlib_scopy( n-1, rwork( indre ), 1, rwork( indree ), 1 ) + call stdlib_ssterf( n, w, rwork( indree ), info ) + else + call stdlib_scopy( n-1, rwork( indre ), 1, rwork( indree ), 1 ) + call stdlib_scopy( n, rwork( indrd ), 1, rwork( indrdd ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_cstemr( jobz, 'A', n, rwork( indrdd ),rwork( indree ), vl, vu, il, & + iu, m, w,z, ldz, n, isuppz, tryrac,rwork( indrwk ), llrwork,iwork, liwork, info ) + + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_cstemr. + if( wantz .and. info==0 ) then + indwkn = indwk + llwrkn = lwork - indwkn + 1 + call stdlib_cunmtr( 'L', uplo, 'N', n, m, a, lda,work( indtau ), z, ldz, work(& + indwkn ),llwrkn, iinfo ) + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_cstein. + ! also call stdlib_sstebz and stdlib_cstein if stdlib_cstemr fails. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indrd ), rwork( & + indre ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwo )& + , info ) + if( wantz ) then + call stdlib_cstein( n, rwork( indrd ), rwork( indre ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwo ), iwork( indifl ),info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_cstein. + indwkn = indwk + llwrkn = lwork - indwkn + 1 + call stdlib_cunmtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) CHEEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_cheevx( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, lwork, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indtau, indwrk, iscale, itmp1, j, jj, llwork, lwkmin, lwkopt, nb, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( a( 1, 1 ),KIND=sp) )then + m = 1 + w( 1 ) = real( a( 1, 1 ),KIND=sp) + end if + end if + if( wantz )z( 1, 1 ) = cone + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + end if + anrm = stdlib_clanhe( 'M', uplo, n, a, lda, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_csscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_csscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_chetrd to reduce hermitian matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indtau = 1 + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_chetrd( uplo, n, a, lda, rwork( indd ), rwork( inde ),work( indtau ), work(& + indwrk ), llwork, iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal to + ! zero, then call stdlib_ssterf or stdlib_cungtr and stdlib_csteqr. if this fails for + ! some eigenvalue, then try stdlib_sstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_scopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_scopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_ssterf( n, w, rwork( indee ), info ) + else + call stdlib_clacpy( 'A', n, n, a, lda, z, ldz ) + call stdlib_cungtr( uplo, n, z, ldz, work( indtau ),work( indwrk ), llwork, & + iinfo ) + call stdlib_scopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_csteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 40 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_cstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_cstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_cstein. + call stdlib_cunmtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwrk ), llwork, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 40 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) CHEGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian and B is also + !> positive definite. + + subroutine stdlib_chegv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, lwork, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: lwkopt, nb, neig + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork== -1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + call stdlib_ctrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, cone,b, ldb, a, lda & + ) + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h*y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + call stdlib_ctrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, cone,b, ldb, a, lda & + ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_chegv + + !> CHEGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> Eigenvalues and eigenvectors can be selected by specifying either a + !> range of values or a range of indices for the desired eigenvalues. + + subroutine stdlib_chegvx( itype, jobz, range, uplo, n, a, lda, b, ldb,vl, vu, il, iu, abstol,& + m, w, z, ldz, work,lwork, rwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, lda, ldb, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, upper, valeig, wantz + character :: trans + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if (info==0) then + if (ldz<1 .or. (wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + call stdlib_ctrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, cone, b,ldb, z, ldz ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h*y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + call stdlib_ctrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, cone, b,ldb, z, ldz ) + + end if + end if + ! set work(1) to optimal complex workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_chegvx + + !> CHERFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_cherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_chetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_chetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_chetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_cherfs + + !> CHESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_chesv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CHESV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**H)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> CHETRF_RK is called to compute the factorization of a complex + !> Hermitian matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine CHETRS_3. + + pure subroutine stdlib_chesv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CHESV_ROOK: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used + !> to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> CHETRF_ROOK is called to compute the factorization of a complex + !> Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling CHETRS_ROOK (uses BLAS 2). + + pure subroutine stdlib_chesv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CHESVX: uses the diagonal pivoting factorization to compute the + !> solution to a complex system of linear equations A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_chesvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: a(lda,*), b(ldb,*) + complex(sp), intent(inout) :: af(ldaf,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clanhe( 'I', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_checon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_chetrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_cherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CHGEQZ: computes the eigenvalues of a complex matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the single-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a complex matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by CGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices and S and P are upper triangular. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from CGGHRD that reduced + !> the matrix pair (A,B) to generalized Hessenberg form, then the output + !> matrices Q1*Q and Z1*Z are the unitary factors from the generalized + !> Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) + !> (equivalently, of (A,B)) are computed as a pair of complex values + !> (alpha,beta). If beta is nonzero, lambda = alpha / beta is an + !> eigenvalue of the generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> The values of alpha and beta for the i-th eigenvalue can be read + !> directly from the generalized Schur form: alpha = S(i,i), + !> beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + + subroutine stdlib_chgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alpha, beta, q, ldq,& + z, ldz, work, lwork,rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz, job + integer(ilp), intent(in) :: ihi, ilo, ldh, ldq, ldt, ldz, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: alpha(*), beta(*), work(*) + complex(sp), intent(inout) :: h(ldh,*), q(ldq,*), t(ldt,*), z(ldz,*) + ! ===================================================================== + + + + ! Local Scalars + logical(lk) :: ilazr2, ilazro, ilq, ilschr, ilz, lquery + integer(ilp) :: icompq, icompz, ifirst, ifrstm, iiter, ilast, ilastm, in, ischur, & + istart, j, jc, jch, jiter, jr, maxit + real(sp) :: absb, anorm, ascale, atol, bnorm, bscale, btol, c, safmin, temp, temp2, & + tempr, ulp + complex(sp) :: abi22, ad11, ad12, ad21, ad22, ctemp, ctemp2, ctemp3, eshift, s, shift, & + signbc, u12, x, abi12, y + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,min,real,sqrt + ! Statement Functions + real(sp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=sp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode job, compq, compz + if( stdlib_lsame( job, 'E' ) ) then + ilschr = .false. + ischur = 1 + else if( stdlib_lsame( job, 'S' ) ) then + ilschr = .true. + ischur = 2 + else + ilschr = .true. + ischur = 0 + end if + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + ilq = .true. + icompq = 0 + end if + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + ilz = .true. + icompz = 0 + end if + ! check argument values + info = 0 + work( 1 ) = max( 1, n ) + lquery = ( lwork==-1 ) + if( ischur==0 ) then + info = -1 + else if( icompq==0 ) then + info = -2 + else if( icompz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihisafmin ) then + signbc = conjg( t( j, j ) / absb ) + t( j, j ) = absb + if( ilschr ) then + call stdlib_cscal( j-1, signbc, t( 1, j ), 1 ) + call stdlib_cscal( j, signbc, h( 1, j ), 1 ) + else + call stdlib_cscal( 1, signbc, h( j, j ), 1 ) + end if + if( ilz )call stdlib_cscal( n, signbc, z( 1, j ), 1 ) + else + t( j, j ) = czero + end if + alpha( j ) = h( j, j ) + beta( j ) = t( j, j ) + end do + ! if ihi < ilo, skip qz steps + if( ihimaxit )go to 180 + ! split the matrix if possible. + ! two tests: + ! 1: h(j,j-1)=0 or j=ilo + ! 2: t(j,j)=0 + ! special case: j=ilast + if( ilast==ilo ) then + go to 60 + else + if( abs1( h( ilast, ilast-1 ) )<=max( safmin, ulp*(abs1( h( ilast, ilast ) ) + & + abs1( h( ilast-1, ilast-1 )) ) ) ) then + h( ilast, ilast-1 ) = czero + go to 60 + end if + end if + if( abs( t( ilast, ilast ) )<=max( safmin, ulp*(abs( t( ilast - 1, ilast ) ) + abs( & + t( ilast-1, ilast-1 )) ) ) ) then + t( ilast, ilast ) = czero + go to 50 + end if + ! general case: j ilo )temp = temp + abs ( t( j - 1, j ) ) + if( abs( t( j, j ) )=btol ) then + if( jch+1>=ilast ) then + go to 60 + else + ifirst = jch + 1 + go to 70 + end if + end if + t( jch+1, jch+1 ) = czero + end do + go to 50 + else + ! only test 2 passed -- chase the zero to t(ilast,ilast) + ! then process as in the case t(ilast,ilast)=0 + do jch = j, ilast - 1 + ctemp = t( jch, jch+1 ) + call stdlib_clartg( ctemp, t( jch+1, jch+1 ), c, s,t( jch, jch+1 ) ) + + t( jch+1, jch+1 ) = czero + if( jchsafmin ) then + signbc = conjg( t( ilast, ilast ) / absb ) + t( ilast, ilast ) = absb + if( ilschr ) then + call stdlib_cscal( ilast-ifrstm, signbc, t( ifrstm, ilast ), 1 ) + call stdlib_cscal( ilast+1-ifrstm, signbc, h( ifrstm, ilast ),1 ) + else + call stdlib_cscal( 1, signbc, h( ilast, ilast ), 1 ) + end if + if( ilz )call stdlib_cscal( n, signbc, z( 1, ilast ), 1 ) + else + t( ilast, ilast ) = czero + end if + alpha( ilast ) = h( ilast, ilast ) + beta( ilast ) = t( ilast, ilast ) + ! go to next block -- exit if finished. + ilast = ilast - 1 + if( ilastilast )ifrstm = ilo + end if + go to 160 + ! qz step + ! this iteration only involves rows/columns ifirst:ilast. we + ! assume ifirst < ilast, and that the diagonal of b is non-zero. + 70 continue + iiter = iiter + 1 + if( .not.ilschr ) then + ifrstm = ifirst + end if + ! compute the shift. + ! at this point, ifirst < ilast, and the diagonal elements of + ! t(ifirst:ilast,ifirst,ilast) are larger than btol (in + ! magnitude) + if( ( iiter / 10 )*10/=iiter ) then + ! the wilkinson shift (aep p.512_sp), i.e., the eigenvalue of + ! the bottom-right 2x2 block of a inv(b) which is nearest to + ! the bottom-right element. + ! we factor b as u*d, where u has unit diagonals, and + ! compute (a*inv(d))*inv(u). + u12 = ( bscale*t( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad11 = ( ascale*h( ilast-1, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad21 = ( ascale*h( ilast, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad12 = ( ascale*h( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad22 = ( ascale*h( ilast, ilast ) ) /( bscale*t( ilast, ilast ) ) + abi22 = ad22 - u12*ad21 + abi12 = ad12 - u12*ad11 + shift = abi22 + ctemp = sqrt( abi12 )*sqrt( ad21 ) + temp = abs1( ctemp ) + if( ctemp/=zero ) then + x = half*( ad11-shift ) + temp2 = abs1( x ) + temp = max( temp, abs1( x ) ) + y = temp*sqrt( ( x / temp )**2+( ctemp / temp )**2 ) + if( temp2>zero ) then + if( real( x / temp2,KIND=sp)*real( y,KIND=sp)+aimag( x / temp2 )*aimag( y )& + safmin ) & + then + eshift = eshift + ( ascale*h( ilast,ilast ) )/( bscale*t( ilast, ilast ) ) + + else + eshift = eshift + ( ascale*h( ilast,ilast-1 ) )/( bscale*t( ilast-1, ilast-1 )& + ) + end if + shift = eshift + end if + ! now check for two consecutive small subdiagonals. + do j = ilast - 1, ifirst + 1, -1 + istart = j + ctemp = ascale*h( j, j ) - shift*( bscale*t( j, j ) ) + temp = abs1( ctemp ) + temp2 = ascale*abs1( h( j+1, j ) ) + tempr = max( temp, temp2 ) + if( tempristart ) then + ctemp = h( j, j-1 ) + call stdlib_clartg( ctemp, h( j+1, j-1 ), c, s, h( j, j-1 ) ) + h( j+1, j-1 ) = czero + end if + do jc = j, ilastm + ctemp = c*h( j, jc ) + s*h( j+1, jc ) + h( j+1, jc ) = -conjg( s )*h( j, jc ) + c*h( j+1, jc ) + h( j, jc ) = ctemp + ctemp2 = c*t( j, jc ) + s*t( j+1, jc ) + t( j+1, jc ) = -conjg( s )*t( j, jc ) + c*t( j+1, jc ) + t( j, jc ) = ctemp2 + end do + if( ilq ) then + do jr = 1, n + ctemp = c*q( jr, j ) + conjg( s )*q( jr, j+1 ) + q( jr, j+1 ) = -s*q( jr, j ) + c*q( jr, j+1 ) + q( jr, j ) = ctemp + end do + end if + ctemp = t( j+1, j+1 ) + call stdlib_clartg( ctemp, t( j+1, j ), c, s, t( j+1, j+1 ) ) + t( j+1, j ) = czero + do jr = ifrstm, min( j+2, ilast ) + ctemp = c*h( jr, j+1 ) + s*h( jr, j ) + h( jr, j ) = -conjg( s )*h( jr, j+1 ) + c*h( jr, j ) + h( jr, j+1 ) = ctemp + end do + do jr = ifrstm, j + ctemp = c*t( jr, j+1 ) + s*t( jr, j ) + t( jr, j ) = -conjg( s )*t( jr, j+1 ) + c*t( jr, j ) + t( jr, j+1 ) = ctemp + end do + if( ilz ) then + do jr = 1, n + ctemp = c*z( jr, j+1 ) + s*z( jr, j ) + z( jr, j ) = -conjg( s )*z( jr, j+1 ) + c*z( jr, j ) + z( jr, j+1 ) = ctemp + end do + end if + end do loop_150 + 160 continue + end do loop_170 + ! drop-through = non-convergence + 180 continue + info = ilast + go to 210 + ! successful completion of all qz steps + 190 continue + ! set eigenvalues 1:ilo-1 + do j = 1, ilo - 1 + absb = abs( t( j, j ) ) + if( absb>safmin ) then + signbc = conjg( t( j, j ) / absb ) + t( j, j ) = absb + if( ilschr ) then + call stdlib_cscal( j-1, signbc, t( 1, j ), 1 ) + call stdlib_cscal( j, signbc, h( 1, j ), 1 ) + else + call stdlib_cscal( 1, signbc, h( j, j ), 1 ) + end if + if( ilz )call stdlib_cscal( n, signbc, z( 1, j ), 1 ) + else + t( j, j ) = czero + end if + alpha( j ) = h( j, j ) + beta( j ) = t( j, j ) + end do + ! normal termination + info = 0 + ! exit (other than argument error) -- return optimal workspace size + 210 continue + work( 1 ) = cmplx( n,KIND=sp) + return + end subroutine stdlib_chgeqz + + !> CHPCON: estimates the reciprocal of the condition number of a complex + !> Hermitian packed matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_chpcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(sp), intent(in) :: ap(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ip, kase + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm0 .and. ap( ip )==zero )return + ip = ip - i + end do + else + ! lower triangular storage: examine d from top to bottom. + ip = 1 + do i = 1, n + if( ipiv( i )>0 .and. ap( ip )==zero )return + ip = ip + n - i + 1 + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_clacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_chptrs( uplo, n, 1, ap, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_chpcon + + !> CHPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix in packed storage. + + subroutine stdlib_chpev( jobz, uplo, n, ap, w, z, ldz, work, rwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwrk, iscale + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_csscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_chptrd to reduce hermitian packed matrix to tridiagonal form. + inde = 1 + indtau = 1 + call stdlib_chptrd( uplo, n, ap, w, rwork( inde ), work( indtau ),iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_cupgtr to generate the orthogonal matrix, then call stdlib_csteqr. + if( .not.wantz ) then + call stdlib_ssterf( n, w, rwork( inde ), info ) + else + indwrk = indtau + n + call stdlib_cupgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + indrwk = inde + n + call stdlib_csteqr( jobz, n, w, rwork( inde ), z, ldz,rwork( indrwk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_chpev + + !> CHPEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A in packed storage. + !> Eigenvalues/vectors can be selected by specifying either a range of + !> values or a range of indices for the desired eigenvalues. + + subroutine stdlib_chpevx( jobz, range, uplo, n, ap, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, rwork, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indtau, indwrk, iscale, itmp1, j, jj, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( ap( 1 ),KIND=sp) ) then + m = 1 + w( 1 ) = real( ap( 1 ),KIND=sp) + end if + end if + if( wantz )z( 1, 1 ) = cone + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if ( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + endif + anrm = stdlib_clanhp( 'M', uplo, n, ap, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_csscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_chptrd to reduce hermitian packed matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indtau = 1 + indwrk = indtau + n + call stdlib_chptrd( uplo, n, ap, rwork( indd ), rwork( inde ),work( indtau ), iinfo ) + + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_ssterf or stdlib_cupgtr and stdlib_csteqr. if this fails + ! for some eigenvalue, then try stdlib_sstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_scopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_scopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_ssterf( n, w, rwork( indee ), info ) + else + call stdlib_cupgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_scopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_csteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 20 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_cstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_cstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_cstein. + indwrk = indtau + n + call stdlib_cupmtr( 'L', uplo, 'N', n, m, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 20 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) CHPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian, stored in packed format, + !> and B is also positive definite. + + subroutine stdlib_chpgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ap(*), bp(*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: trans + integer(ilp) :: j, neig + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, neig + call stdlib_ctpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h*y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_ctpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_chpgv + + !> CHPGVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. Eigenvalues and eigenvectors can be selected by + !> specifying either a range of values or a range of indices for the + !> desired eigenvalues. + + subroutine stdlib_chpgvx( itype, jobz, range, uplo, n, ap, bp, vl, vu,il, iu, abstol, m, w, & + z, ldz, work, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ap(*), bp(*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: alleig, indeig, upper, valeig, wantz + character :: trans + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else + if( valeig ) then + if( n>0 .and. vu<=vl ) then + info = -9 + end if + else if( indeig ) then + if( il<1 ) then + info = -10 + else if( iun ) then + info = -11 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, m + call stdlib_ctpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h*y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, m + call stdlib_ctpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_chpgvx + + !> CHPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_chprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, work,& + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(sp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_chptrs( uplo, n, 1, afp, ipiv, work, n, info ) + call stdlib_caxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_clacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_clacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_chptrs( uplo, n, 1, afp, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_chptrs( uplo, n, 1, afp, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_chprfs + + !> CHPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is Hermitian and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + + pure subroutine stdlib_chpsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb CHPSVX: uses the diagonal pivoting factorization A = U*D*U**H or + !> A = L*D*L**H to compute the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix stored + !> in packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_chpsvx( fact, uplo, n, nrhs, ap, afp, ipiv, b, ldb, x,ldx, rcond, ferr, & + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(sp), intent(inout) :: afp(*) + complex(sp), intent(in) :: ap(*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clanhp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_chpcon( uplo, n, afp, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_chptrs( uplo, n, nrhs, afp, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_chprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CHSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a complex upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + + subroutine stdlib_chsein( side, eigsrc, initv, select, n, h, ldh, w, vl,ldvl, vr, ldvr, mm, & + m, work, rwork, ifaill,ifailr, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: eigsrc, initv, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldh, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: ifaill(*), ifailr(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(in) :: h(ldh,*) + complex(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*), w(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: rzero = 0.0e+0_sp + + + ! Local Scalars + logical(lk) :: bothv, fromqr, leftv, noinit, rightv + integer(ilp) :: i, iinfo, k, kl, kln, kr, ks, ldwork + real(sp) :: eps3, hnorm, smlnum, ulp, unfl + complex(sp) :: cdum, wk + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters. + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + fromqr = stdlib_lsame( eigsrc, 'Q' ) + noinit = stdlib_lsame( initv, 'N' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + m = 0 + do k = 1, n + if( select( k ) )m = m + 1 + end do + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.fromqr .and. .not.stdlib_lsame( eigsrc, 'N' ) ) then + info = -2 + else if( .not.noinit .and. .not.stdlib_lsame( initv, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldhkr ) then + do i = k, n - 1 + if( h( i+1, i )==czero )go to 50 + end do + 50 continue + kr = i + end if + end if + if( kl/=kln ) then + kln = kl + ! compute infinity-norm of submatrix h(kl:kr,kl:kr) if it + ! has not ben computed before. + hnorm = stdlib_clanhs( 'I', kr-kl+1, h( kl, kl ), ldh, rwork ) + if( stdlib_sisnan( hnorm ) ) then + info = -6 + return + else if( (hnorm>rzero) ) then + eps3 = hnorm*ulp + else + eps3 = smlnum + end if + end if + ! perturb eigenvalue if it is close to any previous + ! selected eigenvalues affiliated to the submatrix + ! h(kl:kr,kl:kr). close roots are modified by eps3. + wk = w( k ) + 60 continue + do i = k - 1, kl, -1 + if( select( i ) .and. cabs1( w( i )-wk )0 ) then + info = info + 1 + ifaill( ks ) = k + else + ifaill( ks ) = 0 + end if + do i = 1, kl - 1 + vl( i, ks ) = czero + end do + end if + if( rightv ) then + ! compute right eigenvector. + call stdlib_claein( .true., noinit, kr, h, ldh, wk, vr( 1, ks ),work, ldwork, & + rwork, eps3, smlnum, iinfo ) + if( iinfo>0 ) then + info = info + 1 + ifailr( ks ) = k + else + ifailr( ks ) = 0 + end if + do i = kr + 1, n + vr( i, ks ) = czero + end do + end if + ks = ks + 1 + end if + end do loop_100 + return + end subroutine stdlib_chsein + + !> Using the divide and conquer method, CLAED0: computes all eigenvalues + !> of a symmetric tridiagonal matrix which is one diagonal block of + !> those from reducing a dense or band Hermitian matrix and + !> corresponding eigenvectors of the dense or band matrix. + + pure subroutine stdlib_claed0( qsiz, n, d, e, q, ldq, qstore, ldqs, rwork,iwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, ldqs, n, qsiz + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: q(ldq,*) + complex(sp), intent(out) :: qstore(ldqs,*) + ! ===================================================================== + ! warning: n could be as big as qsiz! + + ! Local Scalars + integer(ilp) :: curlvl, curprb, curr, i, igivcl, igivnm, igivpt, indxq, iperm, iprmpt, & + iq, iqptr, iwrem, j, k, lgn, ll, matsiz, msd2, smlsiz, smm1, spm1, spm2, submat, & + subpbs, tlvls + real(sp) :: temp + ! Intrinsic Functions + intrinsic :: abs,int,log,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + ! if( icompq < 0 .or. icompq > 2 ) then + ! info = -1 + ! else if( ( icompq == 1 ) .and. ( qsiz < max( 0, n ) ) ) + ! $ then + if( qsizsmlsiz ) then + do j = subpbs, 1, -1 + iwork( 2*j ) = ( iwork( j )+1 ) / 2 + iwork( 2*j-1 ) = iwork( j ) / 2 + end do + tlvls = tlvls + 1 + subpbs = 2*subpbs + go to 10 + end if + do j = 2, subpbs + iwork( j ) = iwork( j ) + iwork( j-1 ) + end do + ! divide the matrix into subpbs submatrices of size at most smlsiz+1 + ! using rank-1 modifications (cuts). + spm1 = subpbs - 1 + do i = 1, spm1 + submat = iwork( i ) + 1 + smm1 = submat - 1 + d( smm1 ) = d( smm1 ) - abs( e( smm1 ) ) + d( submat ) = d( submat ) - abs( e( smm1 ) ) + end do + indxq = 4*n + 3 + ! set up workspaces for eigenvalues only/accumulate new vectors + ! routine + temp = log( real( n,KIND=sp) ) / log( two ) + lgn = int( temp,KIND=ilp) + if( 2**lgn0 ) then + info = submat*( n+1 ) + submat + matsiz - 1 + return + end if + k = 1 + do j = submat, iwork( i+1 ) + iwork( indxq+j ) = k + k = k + 1 + end do + end do + ! successively merge eigensystems of adjacent submatrices + ! into eigensystem for the corresponding larger matrix. + ! while ( subpbs > 1 ) + curlvl = 1 + 80 continue + if( subpbs>1 ) then + spm2 = subpbs - 2 + do i = 0, spm2, 2 + if( i==0 ) then + submat = 1 + matsiz = iwork( 2 ) + msd2 = iwork( 1 ) + curprb = 0 + else + submat = iwork( i ) + 1 + matsiz = iwork( i+2 ) - iwork( i ) + msd2 = matsiz / 2 + curprb = curprb + 1 + end if + ! merge lower order eigensystems (of size msd2 and matsiz - msd2) + ! into an eigensystem of size matsiz. stdlib_claed7 handles the case + ! when the eigenvectors of a full or band hermitian matrix (which + ! was reduced to tridiagonal form) are desired. + ! i am free to use q as a valuable working space until loop 150. + call stdlib_claed7( matsiz, msd2, qsiz, tlvls, curlvl, curprb,d( submat ), & + qstore( 1, submat ), ldqs,e( submat+msd2-1 ), iwork( indxq+submat ),rwork( iq ), & + iwork( iqptr ), iwork( iprmpt ),iwork( iperm ), iwork( igivpt ),iwork( igivcl ), & + rwork( igivnm ),q( 1, submat ), rwork( iwrem ),iwork( subpbs+1 ), info ) + if( info>0 ) then + info = submat*( n+1 ) + submat + matsiz - 1 + return + end if + iwork( i / 2+1 ) = iwork( i+2 ) + end do + subpbs = subpbs / 2 + curlvl = curlvl + 1 + go to 80 + end if + ! end while + ! re-merge the eigenvalues/vectors which were deflated at the final + ! merge step. + do i = 1, n + j = iwork( indxq+i ) + rwork( i ) = d( j ) + call stdlib_ccopy( qsiz, qstore( 1, j ), 1, q( 1, i ), 1 ) + end do + call stdlib_scopy( n, rwork, 1, d, 1 ) + return + end subroutine stdlib_claed0 + + !> CLAMSWLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (CLASWLQ) + + pure subroutine stdlib_clamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), t(ldt,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * mb + else + lw = m * mb + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( k<0 ) then + info = -5 + else if( m=max(m,n,k))) then + call stdlib_cgemlqt( side, trans, m, n, k, mb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.tran) then + ! multiply q to the last block of c + kk = mod((m-k),(nb-k)) + ctr = (m-k)/(nb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_ctpmlqt('L','C',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+nb) + ctr = ctr - 1 + call stdlib_ctpmlqt('L','C',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:nb) + call stdlib_cgemlqt('L','C',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.notran) then + ! multiply q to the first block of c + kk = mod((m-k),(nb-k)) + ii = m-kk+1 + ctr = 1 + call stdlib_cgemlqt('L','N',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (i:i+nb,1:n) + call stdlib_ctpmlqt('L','N',nb-k , n, k, 0,mb, a(1,i), lda,t(1, ctr *k+1), ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_ctpmlqt('L','N',kk , n, k, 0, mb, a(1,ii), lda,t(1, ctr*k+1), ldt, c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.notran) then + ! multiply q to the last block of c + kk = mod((n-k),(nb-k)) + ctr = (n-k)/(nb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_ctpmlqt('R','N',m , kk, k, 0, mb, a(1, ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_ctpmlqt('R','N', m, nb-k, k, 0, mb, a(1, i), lda,t(1,ctr*k+1), ldt,& + c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_cgemlqt('R','N',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.tran) then + ! multiply q to the first block of c + kk = mod((n-k),(nb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_cgemlqt('R','C',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_ctpmlqt('R','C',m , nb-k, k, 0,mb, a(1,i), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_ctpmlqt('R','C',m , kk, k, 0,mb, a(1,ii), lda,t(1,ctr*k+1),ldt, c(1,1),& + ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_clamswlq + + !> CLAMTSQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (CLATSQR) + + pure subroutine stdlib_clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), t(ldt,*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr, q + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * nb + q = m + else + lw = m * nb + q = n + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m=max(m,n,k))) then + call stdlib_cgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.notran) then + ! multiply q to the last block of c + kk = mod((m-k),(mb-k)) + ctr = (m-k)/(mb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_ctpmqrt('L','N',kk , n, k, 0, nb, a(ii,1), lda,t(1, ctr*k+1),ldt , c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + ctr = ctr - 1 + call stdlib_ctpmqrt('L','N',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:mb,1:n) + call stdlib_cgemqrt('L','N',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.tran) then + ! multiply q to the first block of c + kk = mod((m-k),(mb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_cgemqrt('L','C',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + call stdlib_ctpmqrt('L','C',mb-k , n, k, 0,nb, a(i,1), lda,t(1, ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_ctpmqrt('L','C',kk , n, k, 0,nb, a(ii,1), lda,t(1,ctr*k+1), ldt, c(1,1)& + , ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.tran) then + ! multiply q to the last block of c + kk = mod((n-k),(mb-k)) + ctr = (n-k)/(mb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_ctpmqrt('R','C',m , kk, k, 0, nb, a(ii,1), lda,t(1, ctr*k+1), ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_ctpmqrt('R','C',m , mb-k, k, 0,nb, a(i,1), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_cgemqrt('R','C',m , mb, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.notran) then + ! multiply q to the first block of c + kk = mod((n-k),(mb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_cgemqrt('R','N', m, mb , k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_ctpmqrt('R','N', m, mb-k, k, 0,nb, a(i,1), lda,t(1,ctr*k+1),ldt, c(1,1)& + , ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_ctpmqrt('R','N', m, kk , k, 0,nb, a(ii,1), lda,t(1,ctr*k+1),ldt, c(1,1)& + , ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_clamtsqr + + !> CLAQR2: is identical to CLAQR3 except that it avoids + !> recursion by calling CLAHQR instead of CLAQR4. + !> Aggressive early deflation: + !> This subroutine accepts as input an upper Hessenberg matrix + !> H and performs an unitary similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an unitary similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + pure subroutine stdlib_claqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(sp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(sp), parameter :: rzero = 0.0_sp + real(sp), parameter :: rone = 1.0_sp + + + ! Local Scalars + complex(sp) :: beta, cdum, s, tau + real(sp) :: foo, safmax, safmin, smlnum, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & + lwk1, lwk2, lwkopt + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,int,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_cgehrd ==== + call stdlib_cgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_cunmhr ==== + call stdlib_cunmhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = jw + max( lwk1, lwk2 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = cone + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = czero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sh( kwtop ) = h( kwtop, kwtop ) + ns = 1 + nd = 0 + if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero + end if + work( 1 ) = cone + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_clacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_ccopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_claset( 'A', jw, jw, czero, cone, v, ldv ) + call stdlib_clahqr( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + infqr ) + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + do knt = infqr + 1, jw + ! ==== small spike tip deflation test ==== + foo = cabs1( t( ns, ns ) ) + if( foo==rzero )foo = cabs1( s ) + if( cabs1( s )*cabs1( v( 1, ns ) )<=max( smlnum, ulp*foo ) )then + ! ==== cone more converged eigenvalue ==== + ns = ns - 1 + else + ! ==== cone undeflatable eigenvalue. move it up out of the + ! . way. (stdlib_ctrexc can not fail in this case.) ==== + ifst = ns + call stdlib_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + ilst = ilst + 1 + end if + end do + ! ==== return to hessenberg form ==== + if( ns==0 )s = czero + if( nscabs1( t( ifst, ifst ) ) )ifst = j + end do + ilst = i + if( ifst/=ilst )call stdlib_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + + end do + end if + ! ==== restore shift/eigenvalue array from t ==== + do i = infqr + 1, jw + sh( kwtop+i-1 ) = t( i, i ) + end do + if( ns1 .and. s/=czero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_ccopy( ns, v, ldv, work, 1 ) + do i = 1, ns + work( i ) = conjg( work( i ) ) + end do + beta = work( 1 ) + call stdlib_clarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = cone + call stdlib_claset( 'L', jw-2, jw-2, czero, czero, t( 3, 1 ), ldt ) + call stdlib_clarf( 'L', ns, jw, work, 1, conjg( tau ), t, ldt,work( jw+1 ) ) + + call stdlib_clarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_clarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_cgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*conjg( v( 1, 1 ) ) + call stdlib_clacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_ccopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=czero )call stdlib_cunmhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_cgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & + czero, wv, ldwv ) + call stdlib_clacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_cgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & + czero, t, ldt ) + call stdlib_clacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_cgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & + czero, wv, ldwv ) + call stdlib_clacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + end subroutine stdlib_claqr2 + + !> CLASWLQ: computes a blocked Tall-Skinny LQ factorization of + !> a complex M-by-N matrix A for M <= N: + !> A = ( L 0 ) * Q, + !> where: + !> Q is a n-by-N orthogonal matrix, stored on exit in an implicit + !> form in the elements above the diagonal of the array A and in + !> the elements of the array T; + !> L is a lower-triangular M-by-M matrix stored on exit in + !> the elements on and below the diagonal of the array A. + !> 0 is a M-by-(N-M) zero matrix, if M < N, and is not stored. + + pure subroutine stdlib_claswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, lwork, ldt + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. nm .and. m>0 )) then + info = -3 + else if( nb<=0 ) then + info = -4 + else if( lda=n).or.(nb<=m).or.(nb>=n)) then + call stdlib_cgelqt( m, n, mb, a, lda, t, ldt, work, info) + return + end if + kk = mod((n-m),(nb-m)) + ii=n-kk+1 + ! compute the lq factorization of the first block a(1:m,1:nb) + call stdlib_cgelqt( m, nb, mb, a(1,1), lda, t, ldt, work, info) + ctr = 1 + do i = nb+1, ii-nb+m , (nb-m) + ! compute the qr factorization of the current block a(1:m,i:i+nb-m) + call stdlib_ctplqt( m, nb-m, 0, mb, a(1,1), lda, a( 1, i ),lda, t(1,ctr*m+1),ldt, & + work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(1:m,ii:n) + if (ii<=n) then + call stdlib_ctplqt( m, kk, 0, mb, a(1,1), lda, a( 1, ii ),lda, t(1,ctr*m+1), ldt,& + work, info ) + end if + work( 1 ) = m * mb + return + end subroutine stdlib_claswlq + + !> CLATSQR: computes a blocked Tall-Skinny QR factorization of + !> a complex M-by-N matrix A for M >= N: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix, stored on exit in an implicit + !> form in the elements below the diagonal of the array A and in + !> the elements of the array T; + !> R is an upper-triangular N-by-N matrix, stored on exit in + !> the elements on and above the diagonal of the array A. + !> 0 is a (M-N)-by-N zero matrix, and is not stored. + + pure subroutine stdlib_clatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, ldt, lwork + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. mn .and. n>0 )) then + info = -4 + else if( lda=m)) then + call stdlib_cgeqrt( m, n, nb, a, lda, t, ldt, work, info) + return + end if + kk = mod((m-n),(mb-n)) + ii=m-kk+1 + ! compute the qr factorization of the first block a(1:mb,1:n) + call stdlib_cgeqrt( mb, n, nb, a(1,1), lda, t, ldt, work, info ) + ctr = 1 + do i = mb+1, ii-mb+n , (mb-n) + ! compute the qr factorization of the current block a(i:i+mb-n,1:n) + call stdlib_ctpqrt( mb-n, n, 0, nb, a(1,1), lda, a( i, 1 ), lda,t(1,ctr * n + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(ii:m,1:n) + if (ii<=m) then + call stdlib_ctpqrt( kk, n, 0, nb, a(1,1), lda, a( ii, 1 ), lda,t(1, ctr * n + 1), & + ldt,work, info ) + end if + work( 1 ) = n*nb + return + end subroutine stdlib_clatsqr + + !> CPBSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular band matrix, and L is a lower + !> triangular band matrix, with the same number of superdiagonals or + !> subdiagonals as A. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_cpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(sp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab CPBSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_cpbsvx( fact, uplo, n, kd, nrhs, ab, ldab, afb, ldafb,equed, s, b, ldb, x, & + ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(inout) :: s(*) + complex(sp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ, upper + integer(ilp) :: i, infequ, j, j1, j2 + real(sp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + upper = stdlib_lsame( uplo, 'U' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clanhb( '1', uplo, n, kd, ab, ldab, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_cpbcon( uplo, n, kd, afb, ldafb, anorm, rcond, work, rwork,info ) + ! compute the solution matrix x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cpbtrs( uplo, n, kd, nrhs, afb, ldafb, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_cpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CPFTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_cpftrf( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(sp), intent(inout) :: a(0:*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CPFTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_cpotrf( 'L', n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_ctrsm( 'R', 'L', 'C', 'N', n2, n1, cone, a( 0 ), n,a( n1 ), n ) + + call stdlib_cherk( 'U', 'N', n2, n1, -one, a( n1 ), n, one,a( n ), n ) + call stdlib_cpotrf( 'U', n2, a( n ), n, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_cpotrf( 'L', n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_ctrsm( 'L', 'L', 'N', 'N', n1, n2, cone, a( n2 ), n,a( 0 ), n ) + + call stdlib_cherk( 'U', 'C', n2, n1, -one, a( 0 ), n, one,a( n1 ), n ) + call stdlib_cpotrf( 'U', n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + call stdlib_cpotrf( 'U', n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_ctrsm( 'L', 'U', 'C', 'N', n1, n2, cone, a( 0 ), n1,a( n1*n1 ), & + n1 ) + call stdlib_cherk( 'L', 'C', n2, n1, -one, a( n1*n1 ), n1, one,a( 1 ), n1 ) + + call stdlib_cpotrf( 'L', n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + call stdlib_cpotrf( 'U', n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_ctrsm( 'R', 'U', 'N', 'N', n2, n1, cone, a( n2*n2 ),n2, a( 0 ), & + n2 ) + call stdlib_cherk( 'L', 'N', n2, n1, -one, a( 0 ), n2, one,a( n1*n2 ), n2 ) + + call stdlib_cpotrf( 'L', n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_cpotrf( 'L', k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_ctrsm( 'R', 'L', 'C', 'N', k, k, cone, a( 1 ), n+1,a( k+1 ), n+1 ) + + call stdlib_cherk( 'U', 'N', k, k, -one, a( k+1 ), n+1, one,a( 0 ), n+1 ) + + call stdlib_cpotrf( 'U', k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_cpotrf( 'L', k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_ctrsm( 'L', 'L', 'N', 'N', k, k, cone, a( k+1 ),n+1, a( 0 ), n+1 ) + + call stdlib_cherk( 'U', 'C', k, k, -one, a( 0 ), n+1, one,a( k ), n+1 ) + + call stdlib_cpotrf( 'U', k, a( k ), n+1, info ) + if( info>0 )info = info + k + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_cpotrf( 'U', k, a( 0+k ), k, info ) + if( info>0 )return + call stdlib_ctrsm( 'L', 'U', 'C', 'N', k, k, cone, a( k ), n1,a( k*( k+1 ) ), & + k ) + call stdlib_cherk( 'L', 'C', k, k, -one, a( k*( k+1 ) ), k, one,a( 0 ), k ) + + call stdlib_cpotrf( 'L', k, a( 0 ), k, info ) + if( info>0 )info = info + k + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_cpotrf( 'U', k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_ctrsm( 'R', 'U', 'N', 'N', k, k, cone,a( k*( k+1 ) ), k, a( 0 ), & + k ) + call stdlib_cherk( 'L', 'N', k, k, -one, a( 0 ), k, one,a( k*k ), k ) + call stdlib_cpotrf( 'L', k, a( k*k ), k, info ) + if( info>0 )info = info + k + end if + end if + end if + return + end subroutine stdlib_cpftrf + + !> CPFTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by CPFTRF. + + pure subroutine stdlib_cpftri( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(sp), intent(inout) :: a(0:*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CPFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_ctftri( transr, uplo, 'N', n, a, info ) + if( info>0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution of triangular matrix multiply: inv(u)*inv(u)^c or + ! inv(l)^c*inv(l). there are eight cases. + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_clauum( 'L', n1, a( 0 ), n, info ) + call stdlib_cherk( 'L', 'C', n1, n2, one, a( n1 ), n, one,a( 0 ), n ) + call stdlib_ctrmm( 'L', 'U', 'N', 'N', n2, n1, cone, a( n ), n,a( n1 ), n ) + + call stdlib_clauum( 'U', n2, a( n ), n, info ) + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_clauum( 'L', n1, a( n2 ), n, info ) + call stdlib_cherk( 'L', 'N', n1, n2, one, a( 0 ), n, one,a( n2 ), n ) + call stdlib_ctrmm( 'R', 'U', 'C', 'N', n1, n2, cone, a( n1 ), n,a( 0 ), n ) + + call stdlib_clauum( 'U', n2, a( n1 ), n, info ) + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose, and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_clauum( 'U', n1, a( 0 ), n1, info ) + call stdlib_cherk( 'U', 'N', n1, n2, one, a( n1*n1 ), n1, one,a( 0 ), n1 ) + + call stdlib_ctrmm( 'R', 'L', 'N', 'N', n1, n2, cone, a( 1 ), n1,a( n1*n1 ), & + n1 ) + call stdlib_clauum( 'L', n2, a( 1 ), n1, info ) + else + ! srpa for upper, transpose, and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_clauum( 'U', n1, a( n2*n2 ), n2, info ) + call stdlib_cherk( 'U', 'C', n1, n2, one, a( 0 ), n2, one,a( n2*n2 ), n2 ) + + call stdlib_ctrmm( 'L', 'L', 'C', 'N', n2, n1, cone, a( n1*n2 ),n2, a( 0 ), & + n2 ) + call stdlib_clauum( 'L', n2, a( n1*n2 ), n2, info ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_clauum( 'L', k, a( 1 ), n+1, info ) + call stdlib_cherk( 'L', 'C', k, k, one, a( k+1 ), n+1, one,a( 1 ), n+1 ) + + call stdlib_ctrmm( 'L', 'U', 'N', 'N', k, k, cone, a( 0 ), n+1,a( k+1 ), n+1 ) + + call stdlib_clauum( 'U', k, a( 0 ), n+1, info ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_clauum( 'L', k, a( k+1 ), n+1, info ) + call stdlib_cherk( 'L', 'N', k, k, one, a( 0 ), n+1, one,a( k+1 ), n+1 ) + + call stdlib_ctrmm( 'R', 'U', 'C', 'N', k, k, cone, a( k ), n+1,a( 0 ), n+1 ) + + call stdlib_clauum( 'U', k, a( k ), n+1, info ) + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose, and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1), + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_clauum( 'U', k, a( k ), k, info ) + call stdlib_cherk( 'U', 'N', k, k, one, a( k*( k+1 ) ), k, one,a( k ), k ) + + call stdlib_ctrmm( 'R', 'L', 'N', 'N', k, k, cone, a( 0 ), k,a( k*( k+1 ) ), & + k ) + call stdlib_clauum( 'L', k, a( 0 ), k, info ) + else + ! srpa for upper, transpose, and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0), + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_clauum( 'U', k, a( k*( k+1 ) ), k, info ) + call stdlib_cherk( 'U', 'C', k, k, one, a( 0 ), k, one,a( k*( k+1 ) ), k ) + + call stdlib_ctrmm( 'L', 'L', 'C', 'N', k, k, cone, a( k*k ), k,a( 0 ), k ) + + call stdlib_clauum( 'L', k, a( k*k ), k, info ) + end if + end if + end if + return + end subroutine stdlib_cpftri + + !> CPOSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H* U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_cposv( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CPOSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_cposvx( fact, uplo, n, nrhs, a, lda, af, ldaf, equed,s, b, ldb, x, ldx, & + rcond, ferr, berr, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(inout) :: s(*) + complex(sp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(sp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clanhe( '1', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_cpocon( uplo, n, af, ldaf, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cpotrs( uplo, n, nrhs, af, ldaf, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_cporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x, ldx,ferr, berr, work, & + rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CPTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and tridiagonal, and provides error bounds and backward error + !> estimates for the solution. + + pure subroutine stdlib_cptrfs( uplo, n, nrhs, d, e, df, ef, b, ldb, x, ldx,ferr, berr, work, & + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(in) :: d(*), df(*) + complex(sp), intent(in) :: b(ldb,*), e(*), ef(*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ix, j, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin + complex(sp) :: bi, cx, dx, ex, zdum + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,max,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=sp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_cpttrs( uplo, n, 1, df, ef, work, n, info ) + call stdlib_caxpy( n, cmplx( one,KIND=sp), work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + ix = stdlib_isamax( n, rwork, 1 ) + ferr( j ) = rwork( ix ) + ! estimate the norm of inv(a). + ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by + ! m(i,j) = abs(a(i,j)), i = j, + ! m(i,j) = -abs(a(i,j)), i .ne. j, + ! and e = [ 1, 1, ..., 1 ]**t. note m(a) = m(l)*d*m(l)**h. + ! solve m(l) * x = e. + rwork( 1 ) = one + do i = 2, n + rwork( i ) = one + rwork( i-1 )*abs( ef( i-1 ) ) + end do + ! solve d * m(l)**h * x = b. + rwork( n ) = rwork( n ) / df( n ) + do i = n - 1, 1, -1 + rwork( i ) = rwork( i ) / df( i ) +rwork( i+1 )*abs( ef( i ) ) + end do + ! compute norm(inv(a)) = max(x(i)), 1<=i<=n. + ix = stdlib_isamax( n, rwork, 1 ) + ferr( j ) = ferr( j )*abs( rwork( ix ) ) + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_100 + return + end subroutine stdlib_cptrfs + + !> CPTSV: computes the solution to a complex system of linear equations + !> A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal + !> matrix, and X and B are N-by-NRHS matrices. + !> A is factored as A = L*D*L**H, and the factored form of A is then + !> used to solve the system of equations. + + pure subroutine stdlib_cptsv( n, nrhs, d, e, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: d(*) + complex(sp), intent(inout) :: b(ldb,*), e(*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb CPTSVX: uses the factorization A = L*D*L**H to compute the solution + !> to a complex system of linear equations A*X = B, where A is an + !> N-by-N Hermitian positive definite tridiagonal matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_cptsvx( fact, n, nrhs, d, e, df, ef, b, ldb, x, ldx,rcond, ferr, berr,& + work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(in) :: d(*) + real(sp), intent(inout) :: df(*) + complex(sp), intent(in) :: b(ldb,*), e(*) + complex(sp), intent(inout) :: ef(*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb1 )call stdlib_ccopy( n-1, e, 1, ef, 1 ) + call stdlib_cpttrf( n, df, ef, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_clanht( '1', n, d, e ) + ! compute the reciprocal of the condition number of a. + call stdlib_cptcon( n, df, ef, anorm, rcond, rwork, info ) + ! compute the solution vectors x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cpttrs( 'LOWER', n, nrhs, df, ef, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_cptrfs( 'LOWER', n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CSTEDC: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + !> matrix to tridiagonal form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See SLAED3 for details. + + pure subroutine stdlib_cstedc( compz, n, d, e, z, ldz, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(out) :: work(*) + complex(sp), intent(inout) :: z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: finish, i, icompz, ii, j, k, lgn, liwmin, ll, lrwmin, lwmin, m, smlsiz,& + start + real(sp) :: eps, orgnrm, p, tiny + ! Intrinsic Functions + intrinsic :: abs,int,log,max,mod,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or.( icompz>0 .and. ldztiny ) then + finish = finish + 1 + go to 40 + end if + end if + ! (sub) problem determined. compute its size and solve it. + m = finish - start + 1 + if( m>smlsiz ) then + ! scale. + orgnrm = stdlib_slanst( 'M', m, d( start ), e( start ) ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, m, 1, d( start ), m,info ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, m-1, 1, e( start ),m-1, info ) + + call stdlib_claed0( n, m, d( start ), e( start ), z( 1, start ),ldz, work, n, & + rwork, iwork, info ) + if( info>0 ) then + info = ( info / ( m+1 )+start-1 )*( n+1 ) +mod( info, ( m+1 ) ) + start - & + 1 + go to 70 + end if + ! scale back. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, m, 1, d( start ), m,info ) + else + call stdlib_ssteqr( 'I', m, d( start ), e( start ), rwork, m,rwork( m*m+1 ), & + info ) + call stdlib_clacrm( n, m, z( 1, start ), ldz, rwork, m, work, n,rwork( m*m+1 )& + ) + call stdlib_clacpy( 'A', n, m, work, n, z( 1, start ), ldz ) + if( info>0 ) then + info = start*( n+1 ) + finish + go to 70 + end if + end if + start = finish + 1 + go to 30 + end if + ! endwhile + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

CSTEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> CSTEGR is a compatibility wrapper around the improved CSTEMR routine. + !> See SSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : CSTEGR and CSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + + pure subroutine stdlib_cstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: w(*), work(*) + complex(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: tryrac + ! Executable Statements + info = 0 + tryrac = .false. + call stdlib_cstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, n, isuppz, & + tryrac, work, lwork,iwork, liwork, info ) + end subroutine stdlib_cstegr + + !> CTGSEN: reorders the generalized Schur decomposition of a complex + !> matrix pair (A, B) (in terms of an unitary equivalence trans- + !> formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the pair (A,B). The leading + !> columns of Q and Z form unitary bases of the corresponding left and + !> right eigenspaces (deflating subspaces). (A, B) must be in + !> generalized Schur canonical form, that is, A and B are both upper + !> triangular. + !> CTGSEN also computes the generalized eigenvalues + !> w(j)= ALPHA(j) / BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, the routine computes estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + + pure subroutine stdlib_ctgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, q, & + ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(out) :: pl, pr + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: dif(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + complex(sp), intent(out) :: alpha(*), beta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, swap, wantd, wantd1, wantd2, wantp + integer(ilp) :: i, ierr, ijb, k, kase, ks, liwmin, lwmin, mn2, n1, n2 + real(sp) :: dscale, dsum, rdscal, safmin + complex(sp) :: temp1, temp2 + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,cmplx,conjg,max,sqrt + ! Executable Statements + ! decode and test the input parameters + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ijob<0 .or. ijob>5 ) then + info = -1 + else if( n<0 ) then + info = -5 + else if( lda=4 + wantd1 = ijob==2 .or. ijob==4 + wantd2 = ijob==3 .or. ijob==5 + wantd = wantd1 .or. wantd2 + ! set m to the dimension of the specified pair of deflating + ! subspaces. + m = 0 + if( .not.lquery .or. ijob/=0 ) then + do k = 1, n + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + if( k0 ) then + ! swap is rejected: exit. + info = 1 + if( wantp ) then + pl = zero + pr = zero + end if + if( wantd ) then + dif( 1 ) = zero + dif( 2 ) = zero + end if + go to 70 + end if + end if + end do + if( wantp ) then + ! solve generalized sylvester equation for r and l: + ! a11 * r - l * a22 = a12 + ! b11 * r - l * b22 = b12 + n1 = m + n2 = n - m + i = n1 + 1 + call stdlib_clacpy( 'FULL', n1, n2, a( 1, i ), lda, work, n1 ) + call stdlib_clacpy( 'FULL', n1, n2, b( 1, i ), ldb, work( n1*n2+1 ),n1 ) + ijb = 0 + call stdlib_ctgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,& + i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-2*n1*n2, & + iwork, ierr ) + ! estimate the reciprocal of norms of "projections" onto + ! left and right eigenspaces + rdscal = zero + dsum = one + call stdlib_classq( n1*n2, work, 1, rdscal, dsum ) + pl = rdscal*sqrt( dsum ) + if( pl==zero ) then + pl = one + else + pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) ) + end if + rdscal = zero + dsum = one + call stdlib_classq( n1*n2, work( n1*n2+1 ), 1, rdscal, dsum ) + pr = rdscal*sqrt( dsum ) + if( pr==zero ) then + pr = one + else + pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) ) + end if + end if + if( wantd ) then + ! compute estimates difu and difl. + if( wantd1 ) then + n1 = m + n2 = n - m + i = n1 + 1 + ijb = idifjb + ! frobenius norm-based difu estimate. + call stdlib_ctgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(& + i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + ! frobenius norm-based difl estimate. + call stdlib_ctgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),& + ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2 ), work( n1*n2*2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + else + ! compute 1-norm-based estimates of difu and difl using + ! reversed communication with stdlib_clacn2. in each step a + ! generalized sylvester equation or a transposed variant + ! is solved. + kase = 0 + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + mn2 = 2*n1*n2 + ! 1-norm-based estimate of difu. + 40 continue + call stdlib_clacn2( mn2, work( mn2+1 ), work, dif( 1 ), kase,isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation + call stdlib_ctgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_ctgsyl( 'C', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 40 + end if + dif( 1 ) = dscale / dif( 1 ) + ! 1-norm-based estimate of difl. + 50 continue + call stdlib_clacn2( mn2, work( mn2+1 ), work, dif( 2 ), kase,isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation + call stdlib_ctgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_ctgsyl( 'C', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 50 + end if + dif( 2 ) = dscale / dif( 2 ) + end if + end if + ! if b(k,k) is complex, make it real and positive (normalization + ! of the generalized schur form) and store the generalized + ! eigenvalues of reordered pair (a, b) + do k = 1, n + dscale = abs( b( k, k ) ) + if( dscale>safmin ) then + temp1 = conjg( b( k, k ) / dscale ) + temp2 = b( k, k ) / dscale + b( k, k ) = dscale + call stdlib_cscal( n-k, temp1, b( k, k+1 ), ldb ) + call stdlib_cscal( n-k+1, temp1, a( k, k ), lda ) + if( wantq )call stdlib_cscal( n, temp2, q( 1, k ), 1 ) + else + b( k, k ) = cmplx( zero, zero,KIND=sp) + end if + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + end do + 70 continue + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_ctgsen + + !> CTGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B). + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + + pure subroutine stdlib_ctgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, & + dif, mm, m, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: dif(*), s(*) + complex(sp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, somcon, wantbh, wantdf, wants + integer(ilp) :: i, ierr, ifst, ilst, k, ks, lwmin, n1, n2 + real(sp) :: bignum, cond, eps, lnrm, rnrm, scale, smlnum + complex(sp) :: yhax, yhbx + ! Local Arrays + complex(sp) :: dummy(1), dummy1(1) + ! Intrinsic Functions + intrinsic :: abs,cmplx,max + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantdf = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.wants .and. .not.wantdf ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lda0 ) then + ! ill-conditioned problem - swap rejected. + dif( ks ) = zero + else + ! reordering successful, solve generalized sylvester + ! equation for r and l, + ! a22 * r - l * a11 = a12 + ! b22 * r - l * b11 = b12, + ! and compute estimate of difl[(a11,b11), (a22, b22)]. + n1 = 1 + n2 = n - n1 + i = n*n + 1 + call stdlib_ctgsyl( 'N', idifjb, n2, n1, work( n*n1+n1+1 ),n, work, n, & + work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, & + dif( ks ), dummy,1, iwork, ierr ) + end if + end if + end if + end do loop_20 + work( 1 ) = lwmin + return + end subroutine stdlib_ctgsna + + !> CTRSEN: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in + !> the leading positions on the diagonal of the upper triangular matrix + !> T, and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + + subroutine stdlib_ctrsen( job, compq, select, n, t, ldt, q, ldq, w, m, s,sep, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldq, ldt, lwork, n + real(sp), intent(out) :: s, sep + ! Array Arguments + logical(lk), intent(in) :: select(*) + complex(sp), intent(inout) :: q(ldq,*), t(ldt,*) + complex(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantbh, wantq, wants, wantsp + integer(ilp) :: ierr, k, kase, ks, lwmin, n1, n2, nn + real(sp) :: est, rnorm, scale + ! Local Arrays + integer(ilp) :: isave(3) + real(sp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode and test the input parameters. + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + wantq = stdlib_lsame( compq, 'V' ) + ! set m to the number of selected eigenvalues. + m = 0 + do k = 1, n + if( select( k ) )m = m + 1 + end do + n1 = m + n2 = n - m + nn = n1*n2 + info = 0 + lquery = ( lwork==-1 ) + if( wantsp ) then + lwmin = max( 1, 2*nn ) + else if( stdlib_lsame( job, 'N' ) ) then + lwmin = 1 + else if( stdlib_lsame( job, 'E' ) ) then + lwmin = max( 1, nn ) + end if + if( .not.stdlib_lsame( job, 'N' ) .and. .not.wants .and. .not.wantsp )then + info = -1 + else if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt CUNBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines CUNBDB2, CUNBDB3, and CUNBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < q .or. m-p < q ) then + info = -2 + else if( q < 0 .or. m-q < q ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-2 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNBDB1', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., q of x11 and x21 + do i = 1, q + call stdlib_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + theta(i) = atan2( real( x21(i,i),KIND=sp), real( x11(i,i),KIND=sp) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i) = cone + x21(i,i) = cone + call stdlib_clarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + call stdlib_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + if( i < q ) then + call stdlib_csrot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c,s ) + call stdlib_clacgv( q-i, x21(i,i+1), ldx21 ) + call stdlib_clarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) + s = real( x21(i,i+1),KIND=sp) + x21(i,i+1) = cone + call stdlib_clarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & + ldx11, work(ilarf) ) + call stdlib_clarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & + ldx21, work(ilarf) ) + call stdlib_clacgv( q-i, x21(i,i+1), ldx21 ) + c = sqrt( stdlib_scnrm2( p-i, x11(i+1,i+1), 1 )**2+ stdlib_scnrm2( m-p-i, x21(i+& + 1,i+1), 1 )**2 ) + phi(i) = atan2( s, c ) + call stdlib_cunbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1,x21(i+1,i+1), 1, x11(i+1,& + i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) + end if + end do + return + end subroutine stdlib_cunbdb1 + + !> CUNBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines CUNBDB1, CUNBDB3, and CUNBDB4 handle cases in + !> which P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < 0 .or. p > m-p ) then + info = -2 + else if( q < 0 .or. q < p .or. m-q < p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNBDB2', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., p of x11 and x21 + do i = 1, p + if( i > 1 ) then + call stdlib_csrot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c,s ) + end if + call stdlib_clacgv( q-i+1, x11(i,i), ldx11 ) + call stdlib_clarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + c = real( x11(i,i),KIND=sp) + x11(i,i) = cone + call stdlib_clarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_clarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & + work(ilarf) ) + call stdlib_clacgv( q-i+1, x11(i,i), ldx11 ) + s = sqrt( stdlib_scnrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_scnrm2( m-p-i+1, x21(i,i), & + 1 )**2 ) + theta(i) = atan2( s, c ) + call stdlib_cunbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1, x21(i,i), 1,x11(i+1,i+1), & + ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_cscal( p-i, cnegone, x11(i+1,i), 1 ) + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + if( i < p ) then + call stdlib_clarfgp( p-i, x11(i+1,i), x11(i+2,i), 1, taup1(i) ) + phi(i) = atan2( real( x11(i+1,i),KIND=sp), real( x21(i,i),KIND=sp) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x11(i+1,i) = cone + call stdlib_clarf( 'L', p-i, q-i, x11(i+1,i), 1, conjg(taup1(i)),x11(i+1,i+1), & + ldx11, work(ilarf) ) + end if + x21(i,i) = cone + call stdlib_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + end do + ! reduce the bottom-right portion of x21 to the identity matrix + do i = p + 1, q + call stdlib_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + x21(i,i) = cone + call stdlib_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + end do + return + end subroutine stdlib_cunbdb2 + + !> CUNBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines CUNBDB1, CUNBDB2, and CUNBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( 2*p < m .or. p > m ) then + info = -2 + else if( q < m-p .or. m-q < m-p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNBDB3', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., m-p of x11 and x21 + do i = 1, m-p + if( i > 1 ) then + call stdlib_csrot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c,s ) + end if + call stdlib_clacgv( q-i+1, x21(i,i), ldx21 ) + call stdlib_clarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + s = real( x21(i,i),KIND=sp) + x21(i,i) = cone + call stdlib_clarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_clarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + call stdlib_clacgv( q-i+1, x21(i,i), ldx21 ) + c = sqrt( stdlib_scnrm2( p-i+1, x11(i,i), 1 )**2+ stdlib_scnrm2( m-p-i, x21(i+1,i), & + 1 )**2 ) + theta(i) = atan2( s, c ) + call stdlib_cunbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1, x21(i+1,i), 1,x11(i,i+1), & + ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + if( i < m-p ) then + call stdlib_clarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1, taup2(i) ) + phi(i) = atan2( real( x21(i+1,i),KIND=sp), real( x11(i,i),KIND=sp) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x21(i+1,i) = cone + call stdlib_clarf( 'L', m-p-i, q-i, x21(i+1,i), 1, conjg(taup2(i)),x21(i+1,i+1), & + ldx21, work(ilarf) ) + end if + x11(i,i) = cone + call stdlib_clarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + end do + ! reduce the bottom-right portion of x11 to the identity matrix + do i = m-p + 1, q + call stdlib_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + x11(i,i) = cone + call stdlib_clarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + end do + return + end subroutine stdlib_cunbdb3 + + !> CUNBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines CUNBDB1, CUNBDB2, and CUNBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + phantom, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + complex(sp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) + complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < m-q .or. m-p < m-q ) then + info = -2 + else if( q < m-q .or. q > m ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( q-1, p-1, m-p-1 ) + iorbdb5 = 2 + lorbdb5 = q + lworkopt = ilarf + llarf - 1 + lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNBDB4', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., m-q of x11 and x21 + do i = 1, m-q + if( i == 1 ) then + do j = 1, m + phantom(j) = czero + end do + call stdlib_cunbdb5( p, m-p, q, phantom(1), 1, phantom(p+1), 1,x11, ldx11, x21, & + ldx21, work(iorbdb5),lorbdb5, childinfo ) + call stdlib_cscal( p, cnegone, phantom(1), 1 ) + call stdlib_clarfgp( p, phantom(1), phantom(2), 1, taup1(1) ) + call stdlib_clarfgp( m-p, phantom(p+1), phantom(p+2), 1, taup2(1) ) + theta(i) = atan2( real( phantom(1),KIND=sp), real( phantom(p+1),KIND=sp) ) + + c = cos( theta(i) ) + s = sin( theta(i) ) + phantom(1) = cone + phantom(p+1) = cone + call stdlib_clarf( 'L', p, q, phantom(1), 1, conjg(taup1(1)), x11,ldx11, work(& + ilarf) ) + call stdlib_clarf( 'L', m-p, q, phantom(p+1), 1, conjg(taup2(1)),x21, ldx21, & + work(ilarf) ) + else + call stdlib_cunbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1,x21(i,i-1), 1, x11(i,i)& + , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) + call stdlib_cscal( p-i+1, cnegone, x11(i,i-1), 1 ) + call stdlib_clarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1, taup1(i) ) + call stdlib_clarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1,taup2(i) ) + theta(i) = atan2( real( x11(i,i-1),KIND=sp), real( x21(i,i-1),KIND=sp) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i-1) = cone + x21(i,i-1) = cone + call stdlib_clarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1,conjg(taup1(i)), x11(i,i), & + ldx11, work(ilarf) ) + call stdlib_clarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1,conjg(taup2(i)), x21(i,i), & + ldx21, work(ilarf) ) + end if + call stdlib_csrot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) + call stdlib_clacgv( q-i+1, x21(i,i), ldx21 ) + call stdlib_clarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + c = real( x21(i,i),KIND=sp) + x21(i,i) = cone + call stdlib_clarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_clarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + call stdlib_clacgv( q-i+1, x21(i,i), ldx21 ) + if( i < m-q ) then + s = sqrt( stdlib_scnrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_scnrm2( m-p-i, x21(i+1,& + i), 1 )**2 ) + phi(i) = atan2( s, c ) + end if + end do + ! reduce the bottom-right portion of x11 to [ i 0 ] + do i = m - q + 1, p + call stdlib_clacgv( q-i+1, x11(i,i), ldx11 ) + call stdlib_clarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + x11(i,i) = cone + call stdlib_clarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_clarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & + work(ilarf) ) + call stdlib_clacgv( q-i+1, x11(i,i), ldx11 ) + end do + ! reduce the bottom-right portion of x21 to [ 0 i ] + do i = p + 1, q + call stdlib_clacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) + call stdlib_clarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) + + x21(m-q+i-p,i) = cone + call stdlib_clarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& + , ldx21, work(ilarf) ) + call stdlib_clacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) + end do + return + end subroutine stdlib_cunbdb4 + + !> CUNCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + + subroutine stdlib_cuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q + integer(ilp), intent(in) :: lrwork + integer(ilp) :: lrworkmin, lrworkopt + ! Array Arguments + real(sp), intent(out) :: rwork(*) + real(sp), intent(out) :: theta(*) + complex(sp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) + complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & + lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & + r + logical(lk) :: lquery, wantu1, wantu2, wantv1t + ! Local Arrays + real(sp) :: dum(1) + complex(sp) :: cdum(1,1) + ! Intrinsic Function + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + lquery = ( lwork==-1 ) .or. ( lrwork==-1 ) + if( m < 0 ) then + info = -4 + else if( p < 0 .or. p > m ) then + info = -5 + else if( q < 0 .or. q > m ) then + info = -6 + else if( ldx11 < max( 1, p ) ) then + info = -8 + else if( ldx21 < max( 1, m-p ) ) then + info = -10 + else if( wantu1 .and. ldu1 < max( 1, p ) ) then + info = -13 + else if( wantu2 .and. ldu2 < max( 1, m - p ) ) then + info = -15 + else if( wantv1t .and. ldv1t < max( 1, q ) ) then + info = -17 + end if + r = min( p, m-p, q, m-q ) + ! compute workspace + ! work layout: + ! |-----------------------------------------| + ! | lworkopt (1) | + ! |-----------------------------------------| + ! | taup1 (max(1,p)) | + ! | taup2 (max(1,m-p)) | + ! | tauq1 (max(1,q)) | + ! |-----------------------------------------| + ! | stdlib_cunbdb work | stdlib_cungqr work | stdlib_cunglq work | + ! | | | | + ! | | | | + ! | | | | + ! | | | | + ! |-----------------------------------------| + ! rwork layout: + ! |------------------| + ! | lrworkopt (1) | + ! |------------------| + ! | phi (max(1,r-1)) | + ! |------------------| + ! | b11d (r) | + ! | b11e (r-1) | + ! | b12d (r) | + ! | b12e (r-1) | + ! | b21d (r) | + ! | b21e (r-1) | + ! | b22d (r) | + ! | b22e (r-1) | + ! | stdlib_cbbcsd rwork | + ! |------------------| + if( info == 0 ) then + iphi = 2 + ib11d = iphi + max( 1, r-1 ) + ib11e = ib11d + max( 1, r ) + ib12d = ib11e + max( 1, r - 1 ) + ib12e = ib12d + max( 1, r ) + ib21d = ib12e + max( 1, r - 1 ) + ib21e = ib21d + max( 1, r ) + ib22d = ib21e + max( 1, r - 1 ) + ib22e = ib22d + max( 1, r ) + ibbcsd = ib22e + max( 1, r - 1 ) + itaup1 = 2 + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m-p ) + iorbdb = itauq1 + max( 1, q ) + iorgqr = itauq1 + max( 1, q ) + iorglq = itauq1 + max( 1, q ) + lorgqrmin = 1 + lorgqropt = 1 + lorglqmin = 1 + lorglqopt = 1 + if( r == q ) then + call stdlib_cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,dum, cdum, cdum, & + cdum, work, -1,childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_cungqr( p, p, q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + endif + if( wantu2 .and. m-p > 0 ) then + call stdlib_cungqr( m-p, m-p, q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_cunglq( q-1, q-1, q-1, v1t, ldv1t,cdum, work(1), -1, childinfo ) + + lorglqmin = max( lorglqmin, q-1 ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_cbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum(1), u1, & + ldu1, u2, ldu2, v1t, ldv1t, cdum,1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else if( r == p ) then + call stdlib_cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_cungqr( p-1, p-1, p-1, u1(2,2), ldu1, cdum, work(1),-1, childinfo & + ) + lorgqrmin = max( lorgqrmin, p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_cungqr( m-p, m-p, q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_cunglq( q, q, r, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_cbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum, v1t, & + ldv1t, cdum, 1, u1, ldu1, u2, ldu2,dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else if( r == m-p ) then + call stdlib_cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_cungqr( p, p, q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_cungqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2, cdum,work(1), -1, & + childinfo ) + lorgqrmin = max( lorgqrmin, m-p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_cunglq( q, q, r, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_cbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum, cdum,& + 1, v1t, ldv1t, u2, ldu2, u1,ldu1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else + call stdlib_cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, cdum, work(1), -1, childinfo) + lorbdb = m + int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_cungqr( p, p, m-q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_cungqr( m-p, m-p, m-q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_cunglq( q, q, q, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_cbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum, u2, & + ldu2, u1, ldu1, cdum, 1, v1t,ldv1t, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + end if + lrworkmin = ibbcsd+lbbcsd-1 + lrworkopt = lrworkmin + rwork(1) = lrworkopt + lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1 ) + lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1 ) + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -19 + end if + if( lrwork < lrworkmin .and. .not.lquery ) then + info = -21 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'CUNCSD2BY1', -info ) + return + else if( lquery ) then + return + end if + lorgqr = lwork-iorgqr+1 + lorglq = lwork-iorglq+1 + ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, + ! in which r = min(p,m-p,q,m-q) + if( r == q ) then + ! case 1: r = q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_clacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_cungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_clacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_cungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + v1t(1,1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_clacpy( 'U', q-1, q-1, x21(1,2), ldx21, v1t(2,2),ldv1t ) + call stdlib_cunglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglq, childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_cbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,rwork(iphi), u1, & + ldu1, u2, ldu2, v1t, ldv1t, cdum,1, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& + ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),lrwork-& + ibbcsd+1, childinfo ) + ! permute rows and columns to place czero submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_clapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == p ) then + ! case 2: r = p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + u1(1,1) = cone + do j = 2, p + u1(1,j) = czero + u1(j,1) = czero + end do + call stdlib_clacpy( 'L', p-1, p-1, x11(2,1), ldx11, u1(2,2), ldu1 ) + call stdlib_cungqr( p-1, p-1, p-1, u1(2,2), ldu1, work(itaup1),work(iorgqr), & + lorgqr, childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_clacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_cungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_clacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) + call stdlib_cunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_cbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,rwork(iphi), v1t,& + ldv1t, cdum, 1, u1, ldu1, u2,ldu2, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& + ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd), & + lbbcsd,childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_clapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == m-p ) then + ! case 3: r = m-p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_clacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_cungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + u2(1,1) = cone + do j = 2, m-p + u2(1,j) = czero + u2(j,1) = czero + end do + call stdlib_clacpy( 'L', m-p-1, m-p-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_cungqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2,work(itaup2), work(iorgqr)& + , lorgqr, childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_clacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) + call stdlib_cunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_cbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, rwork(iphi), & + cdum, 1, v1t, ldv1t, u2, ldu2,u1, ldu1, rwork(ib11d), rwork(ib11e),rwork(ib12d), & + rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & + lbbcsd, childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > r ) then + do i = 1, r + iwork(i) = q - r + i + end do + do i = r + 1, q + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_clapmt( .false., p, q, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_clapmr( .false., q, q, v1t, ldv1t, iwork ) + end if + end if + else + ! case 4: r = m-q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, & + childinfo ) + ! accumulate householder reflectors + if( wantu2 .and. m-p > 0 ) then + call stdlib_ccopy( m-p, work(iorbdb+p), 1, u2, 1 ) + end if + if( wantu1 .and. p > 0 ) then + call stdlib_ccopy( p, work(iorbdb), 1, u1, 1 ) + do j = 2, p + u1(1,j) = czero + end do + call stdlib_clacpy( 'L', p-1, m-q-1, x11(2,1), ldx11, u1(2,2),ldu1 ) + call stdlib_cungqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + do j = 2, m-p + u2(1,j) = czero + end do + call stdlib_clacpy( 'L', m-p-1, m-q-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_cungqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_clacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) + call stdlib_clacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& + 1), ldv1t ) + call stdlib_clacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) + + call stdlib_cunglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_cbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, rwork(iphi), & + u2, ldu2, u1, ldu1, cdum, 1,v1t, ldv1t, rwork(ib11d), rwork(ib11e),rwork(ib12d), & + rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & + lbbcsd, childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( p > r ) then + do i = 1, r + iwork(i) = p - r + i + end do + do i = r + 1, p + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_clapmt( .false., p, p, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_clapmr( .false., p, q, v1t, ldv1t, iwork ) + end if + end if + end if + return + end subroutine stdlib_cuncsd2by1 + + !> CUNGBR: generates one of the complex unitary matrices Q or P**H + !> determined by CGEBRD when reducing a complex matrix A to bidiagonal + !> form: A = Q * B * P**H. Q and P**H are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + !> is of order N: + !> if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m + !> rows of P**H, where n >= m >= k; + !> if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as + !> an N-by-N matrix. + + pure subroutine stdlib_cungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantq + integer(ilp) :: i, iinfo, j, lwkopt, mn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + wantq = stdlib_lsame( vect, 'Q' ) + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 .or. ( wantq .and. ( n>m .or. nn .or. m=k ) then + call stdlib_cungqr( m, n, k, a, lda, tau, work, -1, iinfo ) + else + if( m>1 ) then + call stdlib_cungqr( m-1, m-1, m-1, a, lda, tau, work, -1,iinfo ) + end if + end if + else + if( k1 ) then + call stdlib_cunglq( n-1, n-1, n-1, a, lda, tau, work, -1,iinfo ) + end if + end if + end if + lwkopt = real( work( 1 ),KIND=sp) + lwkopt = max (lwkopt, mn) + end if + if( info/=0 ) then + call stdlib_xerbla( 'CUNGBR', -info ) + return + else if( lquery ) then + work( 1 ) = lwkopt + return + end if + ! quick return if possible + if( m==0 .or. n==0 ) then + work( 1 ) = 1 + return + end if + if( wantq ) then + ! form q, determined by a call to stdlib_cgebrd to reduce an m-by-k + ! matrix + if( m>=k ) then + ! if m >= k, assume m >= n >= k + call stdlib_cungqr( m, n, k, a, lda, tau, work, lwork, iinfo ) + else + ! if m < k, assume m = n + ! shift the vectors which define the elementary reflectors cone + ! column to the right, and set the first row and column of q + ! to those of the unit matrix + do j = m, 2, -1 + a( 1, j ) = czero + do i = j + 1, m + a( i, j ) = a( i, j-1 ) + end do + end do + a( 1, 1 ) = cone + do i = 2, m + a( i, 1 ) = czero + end do + if( m>1 ) then + ! form q(2:m,2:m) + call stdlib_cungqr( m-1, m-1, m-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + else + ! form p**h, determined by a call to stdlib_cgebrd to reduce a k-by-n + ! matrix + if( k= n, assume m = n + ! shift the vectors which define the elementary reflectors cone + ! row downward, and set the first row and column of p**h to + ! those of the unit matrix + a( 1, 1 ) = cone + do i = 2, n + a( i, 1 ) = czero + end do + do j = 2, n + do i = j - 1, 2, -1 + a( i, j ) = a( i-1, j ) + end do + a( 1, j ) = czero + end do + if( n>1 ) then + ! form p**h(2:n,2:n) + call stdlib_cunglq( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cungbr + + !> CUNGTSQR: generates an M-by-N complex matrix Q_out with orthonormal + !> columns, which are the first N columns of a product of comlpex unitary + !> matrices of order M which are returned by CLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for CLATSQR. + + pure subroutine stdlib_cungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(in) :: t(ldt,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input parameters + lquery = lwork==-1 + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. m If VECT = 'Q', CUNMBR: overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'C': P**H * C C * P**H + !> Here Q and P**H are the unitary matrices determined by CGEBRD when + !> reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + !> and P**H are defined as products of elementary reflectors H(i) and + !> G(i) respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the unitary matrix Q or P**H that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + + pure subroutine stdlib_cunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), c(ldc,*) + complex(sp), intent(in) :: tau(*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: applyq, left, lquery, notran + character :: transt + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + applyq = stdlib_lsame( vect, 'Q' ) + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q or p and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( k<0 ) then + info = -6 + else if( ( applyq .and. lda0 .and. n>0 ) then + if( applyq ) then + if( left ) then + nb = stdlib_ilaenv( 1, 'CUNMQR', side // trans, m-1, n, m-1,-1 ) + else + nb = stdlib_ilaenv( 1, 'CUNMQR', side // trans, m, n-1, n-1,-1 ) + end if + else + if( left ) then + nb = stdlib_ilaenv( 1, 'CUNMLQ', side // trans, m-1, n, m-1,-1 ) + else + nb = stdlib_ilaenv( 1, 'CUNMLQ', side // trans, m, n-1, n-1,-1 ) + end if + end if + lwkopt = nw*nb + else + lwkopt = 1 + end if + work( 1 ) = lwkopt + end if + if( info/=0 ) then + call stdlib_xerbla( 'CUNMBR', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( m==0 .or. n==0 )return + if( applyq ) then + ! apply q + if( nq>=k ) then + ! q was determined by a call to stdlib_cgebrd with nq >= k + call stdlib_cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & + ) + else if( nq>1 ) then + ! q was determined by a call to stdlib_cgebrd with nq < k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_cunmqr( side, trans, mi, ni, nq-1, a( 2, 1 ), lda, tau,c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + else + ! apply p + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + if( nq>k ) then + ! p was determined by a call to stdlib_cgebrd with nq > k + call stdlib_cunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & + iinfo ) + else if( nq>1 ) then + ! p was determined by a call to stdlib_cgebrd with nq <= k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_cunmlq( side, transt, mi, ni, nq-1, a( 1, 2 ), lda,tau, c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_cunmbr + + !> CGELQ: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_cgelq( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks, lwmin, lwopt, lwreq + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'CGELQ ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'CGELQ ', ' ', m, n, 2, -1 ) + else + mb = 1 + nb = n + end if + if( mb>min( m, n ) .or. mb<1 ) mb = 1 + if( nb>n .or. nb<=m ) nb = n + mintsz = m + 5 + if( nb>m .and. n>m ) then + if( mod( n - m, nb - m )==0 ) then + nblcks = ( n - m ) / ( nb - m ) + else + nblcks = ( n - m ) / ( nb - m ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + if( ( n<=m ) .or. ( nb<=m ) .or. ( nb>=n ) ) then + lwmin = max( 1, n ) + lwopt = max( 1, mb*n ) + else + lwmin = max( 1, m ) + lwopt = max( 1, mb*m ) + end if + lminws = .false. + if( ( tsize=lwmin ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=n ) ) then + lwreq = max( 1, mb*n ) + else + lwreq = max( 1, mb*m ) + end if + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) ) then + call stdlib_cgelqt( m, n, mb, a, lda, t( 6 ), mb, work, info ) + else + call stdlib_claswlq( m, n, mb, nb, a, lda, t( 6 ), mb, work,lwork, info ) + end if + work( 1 ) = lwreq + return + end subroutine stdlib_cgelq + + !> CGELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_cgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(sp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), s(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, & + maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz + real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum + ! Intrinsic Functions + intrinsic :: int,log,max,min,real + ! Executable Statements + ! test the input arguments. + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + smlsiz = stdlib_ilaenv( 9, 'CGELSD', ' ', 0, 0, 0, 0 ) + mnthr = stdlib_ilaenv( 6, 'CGELSD', ' ', m, n, nrhs, -1 ) + nlvl = max( int( log( real( minmn,KIND=sp) / real( smlsiz + 1,KIND=sp) ) /log( & + two ),KIND=ilp) + 1, 0 ) + liwork = 3*minmn*nlvl + 11*minmn + mm = m + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns. + mm = n + maxwrk = max( maxwrk, n*stdlib_ilaenv( 1, 'CGEQRF', ' ', m, n,-1, -1 ) ) + + maxwrk = max( maxwrk, nrhs*stdlib_ilaenv( 1, 'CUNMQR', 'LC', m,nrhs, n, -1 ) ) + + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined. + lrwork = 10*n + 2*n*smlsiz + 8*n*nlvl + 3*smlsiz*nrhs +max( (smlsiz+1)**2, n*(& + 1+nrhs) + 2*nrhs ) + maxwrk = max( maxwrk, 2*n + ( mm + n )*stdlib_ilaenv( 1,'CGEBRD', ' ', mm, n, & + -1, -1 ) ) + maxwrk = max( maxwrk, 2*n + nrhs*stdlib_ilaenv( 1, 'CUNMBR','QLC', mm, nrhs, & + n, -1 ) ) + maxwrk = max( maxwrk, 2*n + ( n - 1 )*stdlib_ilaenv( 1,'CUNMBR', 'PLN', n, & + nrhs, n, -1 ) ) + maxwrk = max( maxwrk, 2*n + n*nrhs ) + minwrk = max( 2*n + mm, 2*n + n*nrhs ) + end if + if( n>m ) then + lrwork = 10*m + 2*m*smlsiz + 8*m*nlvl + 3*smlsiz*nrhs +max( (smlsiz+1)**2, n*(& + 1+nrhs) + 2*nrhs ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows. + maxwrk = m + m*stdlib_ilaenv( 1, 'CGELQF', ' ', m, n, -1,-1 ) + maxwrk = max( maxwrk, m*m + 4*m + 2*m*stdlib_ilaenv( 1,'CGEBRD', ' ', m, m,& + -1, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + nrhs*stdlib_ilaenv( 1,'CUNMBR', 'QLC', m,& + nrhs, m, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + ( m - 1 )*stdlib_ilaenv( 1,'CUNMLQ', & + 'LC', n, nrhs, m, -1 ) ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m*m + 4*m + m*nrhs ) + ! xxx: ensure the path 2a case below is triggered. the workspace + ! calculation should use queries for all routines eventually. + maxwrk = max( maxwrk,4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) + else + ! path 2 - underdetermined. + maxwrk = 2*m + ( n + m )*stdlib_ilaenv( 1, 'CGEBRD', ' ', m,n, -1, -1 ) + + maxwrk = max( maxwrk, 2*m + nrhs*stdlib_ilaenv( 1, 'CUNMBR','QLC', m, nrhs,& + m, -1 ) ) + maxwrk = max( maxwrk, 2*m + m*stdlib_ilaenv( 1, 'CUNMBR','PLN', n, nrhs, m,& + -1 ) ) + maxwrk = max( maxwrk, 2*m + m*nrhs ) + end if + minwrk = max( 2*m + n, 2*m + m*nrhs ) + end if + end if + minwrk = min( minwrk, maxwrk ) + work( 1 ) = maxwrk + iwork( 1 ) = liwork + rwork( 1 ) = lrwork + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + call stdlib_slaset( 'F', minmn, 1, zero, zero, s, 1 ) + rank = 0 + go to 10 + end if + ! scale b if max entry outside range [smlnum,bignum]. + bnrm = stdlib_clange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_clascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! if m < n make sure b(m+1:n,:) = 0 + if( m=n ) then + ! path 1 - overdetermined or exactly determined. + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + nwork = itau + n + ! compute a=q*r. + ! (rworkspace: need n) + ! (cworkspace: need n, prefer n*nb) + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + info ) + ! multiply b by transpose(q). + ! (rworkspace: need n) + ! (cworkspace: need nrhs, prefer nrhs*nb) + call stdlib_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + nwork ), lwork-nwork+1, info ) + ! zero out below r. + if( n>1 ) then + call stdlib_claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + end if + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ie = 1 + nrwork = ie + n + ! bidiagonalize r in a. + ! (rworkspace: need n) + ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) + call stdlib_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r. + ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) + call stdlib_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_clalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of r. + call stdlib_cunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm. + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & + lda + itau = 1 + nwork = m + 1 + ! compute a=l*q. + ! (cworkspace: need 2*m, prefer m+m*nb) + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) + + il = nwork + ! copy l to work(il), zeroing out above its diagonal. + call stdlib_clacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) + itauq = il + ldwork*m + itaup = itauq + m + nwork = itaup + m + ie = 1 + nrwork = ie + m + ! bidiagonalize l in work(il). + ! (rworkspace: need m) + ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb) + call stdlib_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & + itaup ), work( nwork ),lwork-nwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l. + ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_clalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of l. + call stdlib_cunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! zero out below first m rows of b. + call stdlib_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1 ), ldb ) + nwork = itau + m + ! multiply transpose(q) by b. + ! (cworkspace: need nrhs, prefer nrhs*nb) + call stdlib_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& + , lwork-nwork+1, info ) + else + ! path 2 - remaining underdetermined cases. + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ie = 1 + nrwork = ie + m + ! bidiagonalize a. + ! (rworkspace: need m) + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors. + ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) + call stdlib_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_clalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of a. + call stdlib_cunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + end if + ! undo scaling. + if( iascl==1 ) then + call stdlib_clascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_clascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_clascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_clascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 10 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwork + rwork( 1 ) = lrwork + return + end subroutine stdlib_cgelsd + + !> CGELSS: computes the minimum norm solution to a complex linear + !> least squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + + subroutine stdlib_cgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(sp), intent(in) :: rcond + ! Array Arguments + real(sp), intent(out) :: rwork(*), s(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & + ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr + integer(ilp) :: lwork_cgeqrf, lwork_cunmqr, lwork_cgebrd, lwork_cunmbr, lwork_cungbr, & + lwork_cunmlq, lwork_cgelqf + real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr + ! Local Arrays + complex(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + mm = m + mnthr = stdlib_ilaenv( 6, 'CGELSS', ' ', m, n, nrhs, -1 ) + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns + ! compute space needed for stdlib_cgeqrf + call stdlib_cgeqrf( m, n, a, lda, dum(1), dum(1), -1, info ) + lwork_cgeqrf = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cunmqr + call stdlib_cunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1), b,ldb, dum(1), -1, & + info ) + lwork_cunmqr = real( dum(1),KIND=sp) + mm = n + maxwrk = max( maxwrk, n + n*stdlib_ilaenv( 1, 'CGEQRF', ' ', m,n, -1, -1 ) ) + + maxwrk = max( maxwrk, n + nrhs*stdlib_ilaenv( 1, 'CUNMQR', 'LC',m, nrhs, n, -& + 1 ) ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + ! compute space needed for stdlib_cgebrd + call stdlib_cgebrd( mm, n, a, lda, s, s, dum(1), dum(1), dum(1),-1, info ) + + lwork_cgebrd = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cunmbr + call stdlib_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1),b, ldb, dum(1),& + -1, info ) + lwork_cunmbr = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cungbr + call stdlib_cungbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, info ) + lwork_cungbr = real( dum(1),KIND=sp) + ! compute total workspace needed + maxwrk = max( maxwrk, 2*n + lwork_cgebrd ) + maxwrk = max( maxwrk, 2*n + lwork_cunmbr ) + maxwrk = max( maxwrk, 2*n + lwork_cungbr ) + maxwrk = max( maxwrk, n*nrhs ) + minwrk = 2*n + max( nrhs, m ) + end if + if( n>m ) then + minwrk = 2*m + max( nrhs, n ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows + ! compute space needed for stdlib_cgelqf + call stdlib_cgelqf( m, n, a, lda, dum(1), dum(1),-1, info ) + lwork_cgelqf = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cgebrd + call stdlib_cgebrd( m, m, a, lda, s, s, dum(1), dum(1),dum(1), -1, info ) + + lwork_cgebrd = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cunmbr + call stdlib_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_cunmbr = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cungbr + call stdlib_cungbr( 'P', m, m, m, a, lda, dum(1),dum(1), -1, info ) + lwork_cungbr = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cunmlq + call stdlib_cunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1),b, ldb, dum(1), -& + 1, info ) + lwork_cunmlq = real( dum(1),KIND=sp) + ! compute total workspace needed + maxwrk = m + lwork_cgelqf + maxwrk = max( maxwrk, 3*m + m*m + lwork_cgebrd ) + maxwrk = max( maxwrk, 3*m + m*m + lwork_cunmbr ) + maxwrk = max( maxwrk, 3*m + m*m + lwork_cungbr ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + lwork_cunmlq ) + else + ! path 2 - underdetermined + ! compute space needed for stdlib_cgebrd + call stdlib_cgebrd( m, n, a, lda, s, s, dum(1), dum(1),dum(1), -1, info ) + + lwork_cgebrd = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cunmbr + call stdlib_cunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_cunmbr = real( dum(1),KIND=sp) + ! compute space needed for stdlib_cungbr + call stdlib_cungbr( 'P', m, n, m, a, lda, dum(1),dum(1), -1, info ) + lwork_cungbr = real( dum(1),KIND=sp) + maxwrk = 2*m + lwork_cgebrd + maxwrk = max( maxwrk, 2*m + lwork_cunmbr ) + maxwrk = max( maxwrk, 2*m + lwork_cungbr ) + maxwrk = max( maxwrk, n*nrhs ) + end if + end if + maxwrk = max( minwrk, maxwrk ) + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + call stdlib_slaset( 'F', minmn, 1, zero, zero, s, minmn ) + rank = 0 + go to 70 + end if + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! overdetermined case + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + info ) + ! multiply b by transpose(q) + ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) + ! (rworkspace: none) + call stdlib_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + iwork ), lwork-iwork+1, info ) + ! zero out below r + if( n>1 )call stdlib_claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r + ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) + ! (rworkspace: none) + call stdlib_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + irwork = ie + n + ! perform bidiagonal qr iteration + ! multiply b by transpose of left singular vectors + ! compute right singular vectors in a + ! (cworkspace: none) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, 0, nrhs, s, rwork( ie ), a, lda, dum,1, b, ldb, & + rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_csrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_claset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors + ! (cworkspace: need n, prefer n*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_cgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) + + call stdlib_clacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_cgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1, i ),ldb, czero, & + work, n ) + call stdlib_clacpy( 'G', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_cgemv( 'C', n, n, cone, a, lda, b, 1, czero, work, 1 ) + call stdlib_ccopy( n, work, 1, b, 1 ) + end if + else if( n>=mnthr .and. lwork>=3*m+m*m+max( m, nrhs, n-2*m ) )then + ! underdetermined case, m much less than n + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm + ldwork = m + if( lwork>=3*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda + itau = 1 + iwork = m + 1 + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: none) + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) + + il = iwork + ! copy l to work(il), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) + ie = 1 + itauq = il + ldwork*m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(il) + ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & + itaup ), work( iwork ),lwork-iwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l + ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) + ! (rworkspace: none) + call stdlib_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( iwork ),lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in work(il) + ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) + ! (rworkspace: none) + call stdlib_cungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & + lwork-iwork+1, info ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right singular + ! vectors of l in work(il) and multiplying b by transpose of + ! left singular vectors + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, 0, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & + b, ldb, rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_csrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_claset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + iwork = il + m*ldwork + ! multiply b by right singular vectors of l in work(il) + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1 ) then + call stdlib_cgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & + work( iwork ), ldb ) + call stdlib_clacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = ( lwork-iwork+1 ) / m + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_cgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1, i ), & + ldb, czero, work( iwork ), m ) + call stdlib_clacpy( 'G', m, bl, work( iwork ), m, b( 1, i ),ldb ) + end do + else + call stdlib_cgemv( 'C', m, m, cone, work( il ), ldwork, b( 1, 1 ),1, czero, work(& + iwork ), 1 ) + call stdlib_ccopy( m, work( iwork ), 1, b( 1, 1 ), 1 ) + end if + ! zero out below first m rows of b + call stdlib_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1 ), ldb ) + iwork = itau + m + ! multiply transpose(q) by b + ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) + ! (rworkspace: none) + call stdlib_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& + , lwork-iwork+1, info ) + else + ! path 2 - remaining underdetermined cases + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors + ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) + ! (rworkspace: none) + call stdlib_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: none) + call stdlib_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + irwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of a in a and + ! multiplying b by transpose of left singular vectors + ! (cworkspace: none) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'L', m, n, 0, nrhs, s, rwork( ie ), a, lda, dum,1, b, ldb, & + rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_csrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_claset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors of a + ! (cworkspace: need n, prefer n*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_cgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) + + call stdlib_clacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_cgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1, i ),ldb, czero, & + work, n ) + call stdlib_clacpy( 'F', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_cgemv( 'C', m, n, cone, a, lda, b, 1, czero, work, 1 ) + call stdlib_ccopy( n, work, 1, b, 1 ) + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_clascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_clascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_clascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_clascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 70 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_cgelss + + !> CGELSY: computes the minimum-norm solution to a complex linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by unitary transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**H [ inv(T11)*Q1**H*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + + subroutine stdlib_cgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(sp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: imax = 1 + integer(ilp), parameter :: imin = 2 + + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, & + nb4 + real(sp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize + complex(sp) :: c1, c2, s1, s2 + ! Intrinsic Functions + intrinsic :: abs,max,min,real,cmplx + ! Executable Statements + mn = min( m, n ) + ismin = mn + 1 + ismax = 2*mn + 1 + ! test the input arguments. + info = 0 + nb1 = stdlib_ilaenv( 1, 'CGEQRF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'CGERQF', ' ', m, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'CUNMQR', ' ', m, n, nrhs, -1 ) + nb4 = stdlib_ilaenv( 1, 'CUNMRQ', ' ', m, n, nrhs, -1 ) + nb = max( nb1, nb2, nb3, nb4 ) + lwkopt = max( 1, mn+2*n+nb*(n+1), 2*mn+nb*nrhs ) + work( 1 ) = cmplx( lwkopt,KIND=sp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + rank = 0 + go to 70 + end if + bnrm = stdlib_clange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! compute qr factorization with column pivoting of a: + ! a * p = q * r + call stdlib_cgeqp3( m, n, a, lda, jpvt, work( 1 ), work( mn+1 ),lwork-mn, rwork, info ) + + wsize = mn + real( work( mn+1 ),KIND=sp) + ! complex workspace: mn+nb*(n+1). real workspace 2*n. + ! details of householder rotations stored in work(1:mn). + ! determine rank using incremental condition estimation + work( ismin ) = cone + work( ismax ) = cone + smax = abs( a( 1, 1 ) ) + smin = smax + if( abs( a( 1, 1 ) )==zero ) then + rank = 0 + call stdlib_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + go to 70 + else + rank = 1 + end if + 10 continue + if( rank CGEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by short wide + !> LQ factorization (CGELQ) + + pure subroutine stdlib_cgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), t(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * mb + mn = m + else + lw = m * mb + mn = n + end if + if( ( nb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, nb - k ) == 0 ) then + nblcks = ( mn - k ) / ( nb - k ) + else + nblcks = ( mn - k ) / ( nb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_cgemlqt( side, trans, m, n, k, mb, a, lda,t( 6 ), mb, c, ldc, work, info & + ) + else + call stdlib_clamswlq( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),mb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = real( lw,KIND=sp) + return + end subroutine stdlib_cgemlq + + !> CGEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (CGEQR) + + pure subroutine stdlib_cgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + complex(sp), intent(in) :: a(lda,*), t(*) + complex(sp), intent(inout) :: c(ldc,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * nb + mn = m + else + lw = mb * nb + mn = n + end if + if( ( mb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, mb - k )==0 ) then + nblcks = ( mn - k ) / ( mb - k ) + else + nblcks = ( mn - k ) / ( mb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_cgemqrt( side, trans, m, n, k, nb, a, lda, t( 6 ),nb, c, ldc, work, info & + ) + else + call stdlib_clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),nb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_cgemqr + + !> CGEQR: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_cgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'CGEQR ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'CGEQR ', ' ', m, n, 2, -1 ) + else + mb = m + nb = 1 + end if + if( mb>m .or. mb<=n ) mb = m + if( nb>min( m, n ) .or. nb<1 ) nb = 1 + mintsz = n + 5 + if( mb>n .and. m>n ) then + if( mod( m - n, mb - n )==0 ) then + nblcks = ( m - n ) / ( mb - n ) + else + nblcks = ( m - n ) / ( mb - n ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + lminws = .false. + if( ( tsize=n ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=m ) ) then + call stdlib_cgeqrt( m, n, nb, a, lda, t( 6 ), nb, work, info ) + else + call stdlib_clatsqr( m, n, mb, nb, a, lda, t( 6 ), nb, work,lwork, info ) + end if + work( 1 ) = max( 1, nb*n ) + return + end subroutine stdlib_cgeqr + + !> CGESDD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors, by using divide-and-conquer method. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**H, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_cgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, iwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), s(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs + integer(ilp) :: blk, chunk, i, ie, ierr, il, ir, iru, irvt, iscl, itau, itaup, itauq, & + iu, ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr1, mnthr2, nrwork,& + nwork, wrkbl + integer(ilp) :: lwork_cgebrd_mn, lwork_cgebrd_mm, lwork_cgebrd_nn, lwork_cgelqf_mn, & + lwork_cgeqrf_mn, lwork_cungbr_p_mn, lwork_cungbr_p_nn, lwork_cungbr_q_mn, & + lwork_cungbr_q_mm, lwork_cunglq_mn, lwork_cunglq_nn, lwork_cungqr_mm, lwork_cungqr_mn, & + lwork_cunmbr_prc_mm, lwork_cunmbr_qln_mm, lwork_cunmbr_prc_mn, lwork_cunmbr_qln_mn, & + lwork_cunmbr_prc_nn, lwork_cunmbr_qln_nn + real(sp) :: anrm, bignum, eps, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dum(1) + complex(sp) :: cdum(1) + ! Intrinsic Functions + intrinsic :: int,max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + mnthr1 = int( minmn*17.0_sp / 9.0_sp,KIND=ilp) + mnthr2 = int( minmn*5.0_sp / 3.0_sp,KIND=ilp) + wntqa = stdlib_lsame( jobz, 'A' ) + wntqs = stdlib_lsame( jobz, 'S' ) + wntqas = wntqa .or. wntqs + wntqo = stdlib_lsame( jobz, 'O' ) + wntqn = stdlib_lsame( jobz, 'N' ) + lquery = ( lwork==-1 ) + minwrk = 1 + maxwrk = 1 + if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n .and. ldvt=n .and. minmn>0 ) then + ! there is no complex work space needed for bidiagonal svd + ! the realwork space needed for bidiagonal svd (stdlib_sbdsdc,KIND=sp) is + ! bdspac = 3*n*n + 4*n for singular values and vectors; + ! bdspac = 4*n for singular values only; + ! not including e, ru, and rvt matrices. + ! compute space preferred for each routine + call stdlib_cgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_cgebrd_mn = int( cdum(1),KIND=ilp) + call stdlib_cgebrd( n, n, cdum(1), n, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_cgebrd_nn = int( cdum(1),KIND=ilp) + call stdlib_cgeqrf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr ) + lwork_cgeqrf_mn = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'P', n, n, n, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_cungbr_p_nn = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cungbr_q_mm = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'Q', m, n, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cungbr_q_mn = int( cdum(1),KIND=ilp) + call stdlib_cungqr( m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cungqr_mm = int( cdum(1),KIND=ilp) + call stdlib_cungqr( m, n, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cungqr_mn = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_cunmbr_prc_nn = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, n, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_cunmbr_qln_mm = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'Q', 'L', 'N', m, n, n, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_cunmbr_qln_mn = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'Q', 'L', 'N', n, n, n, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_cunmbr_qln_nn = int( cdum(1),KIND=ilp) + if( m>=mnthr1 ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + maxwrk = n + lwork_cgeqrf_mn + maxwrk = max( maxwrk, 2*n + lwork_cgebrd_nn ) + minwrk = 3*n + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + wrkbl = n + lwork_cgeqrf_mn + wrkbl = max( wrkbl, n + lwork_cungqr_mn ) + wrkbl = max( wrkbl, 2*n + lwork_cgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_cunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_cunmbr_prc_nn ) + maxwrk = m*n + n*n + wrkbl + minwrk = 2*n*n + 3*n + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + wrkbl = n + lwork_cgeqrf_mn + wrkbl = max( wrkbl, n + lwork_cungqr_mn ) + wrkbl = max( wrkbl, 2*n + lwork_cgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_cunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_cunmbr_prc_nn ) + maxwrk = n*n + wrkbl + minwrk = n*n + 3*n + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + wrkbl = n + lwork_cgeqrf_mn + wrkbl = max( wrkbl, n + lwork_cungqr_mm ) + wrkbl = max( wrkbl, 2*n + lwork_cgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_cunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_cunmbr_prc_nn ) + maxwrk = n*n + wrkbl + minwrk = n*n + max( 3*n, n + m ) + end if + else if( m>=mnthr2 ) then + ! path 5 (m >> n, but not as much as mnthr1) + maxwrk = 2*n + lwork_cgebrd_mn + minwrk = 2*n + m + if( wntqo ) then + ! path 5o (m >> n, jobz='o') + maxwrk = max( maxwrk, 2*n + lwork_cungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_cungbr_q_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + n*n + else if( wntqs ) then + ! path 5s (m >> n, jobz='s') + maxwrk = max( maxwrk, 2*n + lwork_cungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_cungbr_q_mn ) + else if( wntqa ) then + ! path 5a (m >> n, jobz='a') + maxwrk = max( maxwrk, 2*n + lwork_cungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_cungbr_q_mm ) + end if + else + ! path 6 (m >= n, but not much larger) + maxwrk = 2*n + lwork_cgebrd_mn + minwrk = 2*n + m + if( wntqo ) then + ! path 6o (m >= n, jobz='o') + maxwrk = max( maxwrk, 2*n + lwork_cunmbr_prc_nn ) + maxwrk = max( maxwrk, 2*n + lwork_cunmbr_qln_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + n*n + else if( wntqs ) then + ! path 6s (m >= n, jobz='s') + maxwrk = max( maxwrk, 2*n + lwork_cunmbr_qln_mn ) + maxwrk = max( maxwrk, 2*n + lwork_cunmbr_prc_nn ) + else if( wntqa ) then + ! path 6a (m >= n, jobz='a') + maxwrk = max( maxwrk, 2*n + lwork_cunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*n + lwork_cunmbr_prc_nn ) + end if + end if + else if( minmn>0 ) then + ! there is no complex work space needed for bidiagonal svd + ! the realwork space needed for bidiagonal svd (stdlib_sbdsdc,KIND=sp) is + ! bdspac = 3*m*m + 4*m for singular values and vectors; + ! bdspac = 4*m for singular values only; + ! not including e, ru, and rvt matrices. + ! compute space preferred for each routine + call stdlib_cgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_cgebrd_mn = int( cdum(1),KIND=ilp) + call stdlib_cgebrd( m, m, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_cgebrd_mm = int( cdum(1),KIND=ilp) + call stdlib_cgelqf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr ) + lwork_cgelqf_mn = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'P', m, n, m, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cungbr_p_mn = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'P', n, n, m, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_cungbr_p_nn = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cungbr_q_mm = int( cdum(1),KIND=ilp) + call stdlib_cunglq( m, n, m, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_cunglq_mn = int( cdum(1),KIND=ilp) + call stdlib_cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_cunglq_nn = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'P', 'R', 'C', m, m, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_cunmbr_prc_mm = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'P', 'R', 'C', m, n, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_cunmbr_prc_mn = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, m, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_cunmbr_prc_nn = int( cdum(1),KIND=ilp) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_cunmbr_qln_mm = int( cdum(1),KIND=ilp) + if( n>=mnthr1 ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + maxwrk = m + lwork_cgelqf_mn + maxwrk = max( maxwrk, 2*m + lwork_cgebrd_mm ) + minwrk = 3*m + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + wrkbl = m + lwork_cgelqf_mn + wrkbl = max( wrkbl, m + lwork_cunglq_mn ) + wrkbl = max( wrkbl, 2*m + lwork_cgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_cunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_cunmbr_prc_mm ) + maxwrk = m*n + m*m + wrkbl + minwrk = 2*m*m + 3*m + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + wrkbl = m + lwork_cgelqf_mn + wrkbl = max( wrkbl, m + lwork_cunglq_mn ) + wrkbl = max( wrkbl, 2*m + lwork_cgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_cunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_cunmbr_prc_mm ) + maxwrk = m*m + wrkbl + minwrk = m*m + 3*m + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + wrkbl = m + lwork_cgelqf_mn + wrkbl = max( wrkbl, m + lwork_cunglq_nn ) + wrkbl = max( wrkbl, 2*m + lwork_cgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_cunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_cunmbr_prc_mm ) + maxwrk = m*m + wrkbl + minwrk = m*m + max( 3*m, m + n ) + end if + else if( n>=mnthr2 ) then + ! path 5t (n >> m, but not as much as mnthr1) + maxwrk = 2*m + lwork_cgebrd_mn + minwrk = 2*m + n + if( wntqo ) then + ! path 5to (n >> m, jobz='o') + maxwrk = max( maxwrk, 2*m + lwork_cungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_cungbr_p_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + m*m + else if( wntqs ) then + ! path 5ts (n >> m, jobz='s') + maxwrk = max( maxwrk, 2*m + lwork_cungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_cungbr_p_mn ) + else if( wntqa ) then + ! path 5ta (n >> m, jobz='a') + maxwrk = max( maxwrk, 2*m + lwork_cungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_cungbr_p_nn ) + end if + else + ! path 6t (n > m, but not much larger) + maxwrk = 2*m + lwork_cgebrd_mn + minwrk = 2*m + n + if( wntqo ) then + ! path 6to (n > m, jobz='o') + maxwrk = max( maxwrk, 2*m + lwork_cunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_cunmbr_prc_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + m*m + else if( wntqs ) then + ! path 6ts (n > m, jobz='s') + maxwrk = max( maxwrk, 2*m + lwork_cunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_cunmbr_prc_mn ) + else if( wntqa ) then + ! path 6ta (n > m, jobz='a') + maxwrk = max( maxwrk, 2*m + lwork_cunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_cunmbr_prc_nn ) + end if + end if + end if + maxwrk = max( maxwrk, minwrk ) + end if + if( info==0 ) then + work( 1 ) = stdlib_sroundup_lwork( maxwrk ) + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr1 ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n [tau] + n [work] + ! cworkspace: prefer n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! zero out below r + call stdlib_claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + nrwork = ie + n + ! perform bidiagonal svd, compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_sbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + ir = iu + ldwrku*n + if( lwork >= m*n + n*n + 3*n ) then + ! work(ir) is m by n + ldwrkr = m + else + ldwrkr = ( lwork - n*n - 3*n ) / n + end if + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy r to work( ir ), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_claset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) + ! generate q in a + ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of r in work(iru) and computing right singular vectors + ! of r in work(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) + ! overwrite work(iu) by the left singular vectors of r + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_cunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iu ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by the right singular vectors of r + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in work(ir) and copying to a + ! cworkspace: need n*n [u] + n*n [r] + ! cworkspace: prefer n*n [u] + m*n [r] + ! rworkspace: need 0 + do i = 1, m, ldwrkr + chunk = min( m-i+1, ldwrkr ) + call stdlib_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( iu ), & + ldwrku, czero,work( ir ), ldwrkr ) + call stdlib_clacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is n by n + ldwrkr = n + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_claset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) + ! generate q in a + ! cworkspace: need n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of r + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of r + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! cworkspace: need n*n [r] + ! rworkspace: need 0 + call stdlib_clacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),ldwrkr, czero, & + u, ldu ) + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + itau = iu + ldwrku*n + nwork = itau + n + ! compute a=q*r, copying result to u + ! cworkspace: need n*n [u] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! cworkspace: need n*n [u] + n [tau] + m [work] + ! cworkspace: prefer n*n [u] + n [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ! produce r in a, zeroing out below it + call stdlib_claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) + ! overwrite work(iu) by left singular vectors of r + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_cunmbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & + ldwrku,work( nwork ), lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of r + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! cworkspace: need n*n [u] + ! rworkspace: need 0 + call stdlib_cgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),ldwrku, czero, & + a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_clacpy( 'F', m, n, a, lda, u, ldu ) + end if + else if( m>=mnthr2 ) then + ! mnthr2 <= m < mnthr1 + ! path 5 (m >> n, but not as much as mnthr1) + ! reduce to bidiagonal form without qr decomposition, use + ! stdlib_cungbr and matrix multiplication to compute singular vectors + ie = 1 + nrwork = ie + n + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need n [e] + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5n (m >> n, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_sbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1,dum,1,dum, idum, & + rwork( nrwork ), iwork, info ) + else if( wntqo ) then + iu = nwork + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + ! path 5o (m >> n, jobz='o') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! generate q in a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + if( lwork >= m*n + 3*n ) then + ! work( iu ) is m by n + ldwrku = m + else + ! work(iu) is ldwrku by n + ldwrku = ( lwork - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, + ! storing the result in work(iu), copying to vt + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_clarcm( n, n, rwork( irvt ), n, vt, ldvt,work( iu ), ldwrku, & + rwork( nrwork ) ) + call stdlib_clacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt ) + ! multiply q in a by realmatrix rwork(iru,KIND=sp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] + ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_clacrm( chunk, n, a( i, 1 ), lda, rwork( iru ),n, work( iu ), & + ldwrku, rwork( nrwork ) ) + call stdlib_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 5s (m >> n, jobz='s') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! copy a to u, generate q + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + call stdlib_cungbr( 'Q', m, n, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_clarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', n, n, a, lda, vt, ldvt ) + ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + call stdlib_clacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', m, n, a, lda, u, ldu ) + else + ! path 5a (m >> n, jobz='a') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! copy a to u, generate q + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + call stdlib_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_clarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', n, n, a, lda, vt, ldvt ) + ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + call stdlib_clacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', m, n, a, lda, u, ldu ) + end if + else + ! m < mnthr2 + ! path 6 (m >= n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ! use stdlib_cunmbr to compute singular vectors + ie = 1 + nrwork = ie + n + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need n [e] + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 6n (m >= n, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_sbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + iu = nwork + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + if( lwork >= m*n + 3*n ) then + ! work( iu ) is m by n + ldwrku = m + else + ! work( iu ) is ldwrku by n + ldwrku = ( lwork - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! path 6o (m >= n, jobz='o') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + if( lwork >= m*n + 3*n ) then + ! path 6o-fast + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) + ! overwrite work(iu) by left singular vectors of a, copying + ! to a + ! cworkspace: need 2*n [tauq, taup] + m*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + call stdlib_claset( 'F', m, n, czero, czero, work( iu ),ldwrku ) + call stdlib_clacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & + ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) + call stdlib_clacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) + else + ! path 6o-slow + ! generate q in a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & + lwork-nwork+1, ierr ) + ! multiply q in a by realmatrix rwork(iru,KIND=sp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] + ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_clacrm( chunk, n, a( i, 1 ), lda,rwork( iru ), n, work( iu )& + , ldwrku,rwork( nrwork ) ) + call stdlib_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + end if + else if( wntqs ) then + ! path 6s (m >= n, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_claset( 'F', m, n, czero, czero, u, ldu ) + call stdlib_clacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + else + ! path 6a (m >= n, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! set the right corner of u to identity matrix + call stdlib_claset( 'F', m, m, czero, czero, u, ldu ) + if( m>n ) then + call stdlib_claset( 'F', m-n, m-n, czero, cone,u( n+1, n+1 ), ldu ) + end if + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_clacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr1 ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m [tau] + m [work] + ! cworkspace: prefer m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! zero out above l + call stdlib_claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + nrwork = ie + m + ! perform bidiagonal svd, compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_sbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + ivt = 1 + ldwkvt = m + ! work(ivt) is m by m + il = ivt + ldwkvt*m + if( lwork >= m*n + m*m + 3*m ) then + ! work(il) m by n + ldwrkl = m + chunk = n + else + ! work(il) is m by chunk + ldwrkl = m + chunk = ( lwork - m*m - 3*m ) / m + end if + itau = il + ldwrkl*chunk + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy l to work(il), zeroing about above it + call stdlib_clacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) + + ! generate q in a + ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_cgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) + ! overwrite work(iu) by the left singular vectors of l + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix work(ivt) + ! overwrite work(ivt) by the right singular vectors of l + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + call stdlib_cunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & + work( ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + ! multiply right singular vectors of l in work(il) by q + ! in a, storing result in work(il) and copying to a + ! cworkspace: need m*m [vt] + m*m [l] + ! cworkspace: prefer m*m [vt] + m*n [l] + ! rworkspace: need 0 + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_cgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,a( 1, i ), & + lda, czero, work( il ),ldwrkl ) + call stdlib_clacpy( 'F', m, blk, work( il ), ldwrkl,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + il = 1 + ! work(il) is m by m + ldwrkl = m + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy l to work(il), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) + + ! generate q in a + ! cworkspace: need m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_cgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of l + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by left singular vectors of l + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! copy vt to work(il), multiply right singular vectors of l + ! in work(il) by q in a, storing result in vt + ! cworkspace: need m*m [l] + ! rworkspace: need 0 + call stdlib_clacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,a, lda, czero, & + vt, ldvt ) + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ldwkvt = m + itau = ivt + ldwkvt*m + nwork = itau + m + ! compute a=l*q, copying result to vt + ! cworkspace: need m*m [vt] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! cworkspace: need m*m [vt] + m [tau] + n [work] + ! cworkspace: prefer m*m [vt] + m [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork-& + nwork+1, ierr ) + ! produce l in a, zeroing out above it + call stdlib_claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of l + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix work(ivt) + ! overwrite work(ivt) by right singular vectors of l + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + call stdlib_cunmbr( 'P', 'R', 'C', m, m, m, a, lda,work( itaup ), work( ivt ),& + ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + ! multiply right singular vectors of l in work(ivt) by + ! q in vt, storing result in a + ! cworkspace: need m*m [vt] + ! rworkspace: need 0 + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,vt, ldvt, & + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_clacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else if( n>=mnthr2 ) then + ! mnthr2 <= n < mnthr1 + ! path 5t (n >> m, but not as much as mnthr1) + ! reduce to bidiagonal form without qr decomposition, use + ! stdlib_cungbr and matrix multiplication to compute singular vectors + ie = 1 + nrwork = ie + m + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need m [e] + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5tn (n >> m, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_sbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + ivt = nwork + ! path 5to (n >> m, jobz='o') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! generate p**h in a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), lwork-& + nwork+1, ierr ) + ldwkvt = m + if( lwork >= m*n + 3*m ) then + ! work( ivt ) is m by n + nwork = ivt + ldwkvt*n + chunk = n + else + ! work( ivt ) is m by chunk + chunk = ( lwork - 3*m ) / m + nwork = ivt + ldwkvt*chunk + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + call stdlib_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(irvt,KIND=sp) + ! storing the result in work(ivt), copying to u + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_clacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),ldwkvt, rwork( & + nrwork ) ) + call stdlib_clacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu ) + ! multiply rwork(irvt) by p**h in a, storing the + ! result in work(ivt), copying to a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] + ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_clarcm( m, blk, rwork( irvt ), m, a( 1, i ), lda,work( ivt ), & + ldwkvt, rwork( nrwork ) ) + call stdlib_clacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 5ts (n >> m, jobz='s') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! copy a to vt, generate p**h + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + call stdlib_cungbr( 'P', m, n, m, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_clacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', m, m, a, lda, u, ldu ) + ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + call stdlib_clarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! path 5ta (n >> m, jobz='a') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! copy a to vt, generate p**h + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + call stdlib_cungbr( 'P', n, n, m, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_clacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', m, m, a, lda, u, ldu ) + ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + call stdlib_clarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_clacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else + ! n < mnthr2 + ! path 6t (n > m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ! use stdlib_cunmbr to compute singular vectors + ie = 1 + nrwork = ie + m + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need m [e] + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 6tn (n > m, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_sbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 6to (n > m, jobz='o') + ldwkvt = m + ivt = nwork + if( lwork >= m*n + 3*m ) then + ! work( ivt ) is m by n + call stdlib_claset( 'F', m, n, czero, czero, work( ivt ),ldwkvt ) + nwork = ivt + ldwkvt*n + else + ! work( ivt ) is m by chunk + chunk = ( lwork - 3*m ) / m + nwork = ivt + ldwkvt*chunk + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + if( lwork >= m*n + 3*m ) then + ! path 6to-fast + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix work(ivt) + ! overwrite work(ivt) by right singular vectors of a, + ! copying to a + ! cworkspace: need 2*m [tauq, taup] + m*n [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + + call stdlib_cunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), work( & + ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + call stdlib_clacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) + else + ! path 6to-slow + ! generate p**h in a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] + ! rworkspace: need 0 + call stdlib_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! multiply q in a by realmatrix rwork(iru,KIND=sp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] + ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_clarcm( m, blk, rwork( irvt ), m, a( 1, i ),lda, work( ivt )& + , ldwkvt,rwork( nrwork ) ) + call stdlib_clacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1, i ), lda ) + + end do + end if + else if( wntqs ) then + ! path 6ts (n > m, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_claset( 'F', m, n, czero, czero, vt, ldvt ) + call stdlib_clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + else + ! path 6ta (n > m, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! set all of vt to identity matrix + call stdlib_claset( 'F', n, n, czero, cone, vt, ldvt ) + ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_cunmbr( 'P', 'R', 'C', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1,rwork( ie ), minmn, ierr ) + if( anrm CGESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_cgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( lda CGESVD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**H, not V. + + subroutine stdlib_cgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu, jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), s(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& + wntvs + integer(ilp) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & + iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & + wrkbl + integer(ilp) :: lwork_cgeqrf, lwork_cungqr_n, lwork_cungqr_m, lwork_cgebrd, & + lwork_cungbr_p, lwork_cungbr_q, lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m + real(sp) :: anrm, bignum, eps, smlnum + ! Local Arrays + real(sp) :: dum(1) + complex(sp) :: cdum(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntua = stdlib_lsame( jobu, 'A' ) + wntus = stdlib_lsame( jobu, 'S' ) + wntuas = wntua .or. wntus + wntuo = stdlib_lsame( jobu, 'O' ) + wntun = stdlib_lsame( jobu, 'N' ) + wntva = stdlib_lsame( jobvt, 'A' ) + wntvs = stdlib_lsame( jobvt, 'S' ) + wntvas = wntva .or. wntvs + wntvo = stdlib_lsame( jobvt, 'O' ) + wntvn = stdlib_lsame( jobvt, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then + info = -1 + else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda=n .and. minmn>0 ) then + ! space needed for stdlib_zbdsqr is bdspac = 5*n + mnthr = stdlib_ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 ) + ! compute space needed for stdlib_cgeqrf + call stdlib_cgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_cgeqrf = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cungqr + call stdlib_cungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_cungqr_n = int( cdum(1),KIND=ilp) + call stdlib_cungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_cungqr_m = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cgebrd + call stdlib_cgebrd( n, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, ierr ) + + lwork_cgebrd = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cungbr + call stdlib_cungbr( 'P', n, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + lwork_cungbr_p = int( cdum(1),KIND=ilp) + call stdlib_cungbr( 'Q', n, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + lwork_cungbr_q = int( cdum(1),KIND=ilp) + mnthr = stdlib_ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 ) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + maxwrk = n + lwork_cgeqrf + maxwrk = max( maxwrk, 2*n+lwork_cgebrd ) + if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2*n+lwork_cungbr_p ) + minwrk = 3*n + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + maxwrk = max( n*n+wrkbl, n*n+m*n ) + minwrk = 2*n + m + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or + ! 'a') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_p ) + maxwrk = max( n*n+wrkbl, n*n+m*n ) + minwrk = 2*n + m + else if( wntus .and. wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntus .and. wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_p ) + maxwrk = 2*n*n + wrkbl + minwrk = 2*n + m + else if( wntus .and. wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' or + ! 'a') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_p ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_p ) + maxwrk = 2*n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' or + ! 'a') + wrkbl = n + lwork_cgeqrf + wrkbl = max( wrkbl, n+lwork_cungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_cungbr_p ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + end if + else + ! path 10 (m at least n, but not much larger) + call stdlib_cgebrd( m, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, & + ierr ) + lwork_cgebrd = int( cdum(1),KIND=ilp) + maxwrk = 2*n + lwork_cgebrd + if( wntus .or. wntuo ) then + call stdlib_cungbr( 'Q', m, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + + lwork_cungbr_q = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*n+lwork_cungbr_q ) + end if + if( wntua ) then + call stdlib_cungbr( 'Q', m, m, n, a, lda, cdum(1),cdum(1), -1, ierr ) + + lwork_cungbr_q = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*n+lwork_cungbr_q ) + end if + if( .not.wntvn ) then + maxwrk = max( maxwrk, 2*n+lwork_cungbr_p ) + end if + minwrk = 2*n + m + end if + else if( minmn>0 ) then + ! space needed for stdlib_cbdsqr is bdspac = 5*m + mnthr = stdlib_ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 ) + ! compute space needed for stdlib_cgelqf + call stdlib_cgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_cgelqf = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cunglq + call stdlib_cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,ierr ) + lwork_cunglq_n = int( cdum(1),KIND=ilp) + call stdlib_cunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_cunglq_m = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cgebrd + call stdlib_cgebrd( m, m, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, ierr ) + + lwork_cgebrd = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cungbr p + call stdlib_cungbr( 'P', m, m, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_cungbr_p = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_cungbr q + call stdlib_cungbr( 'Q', m, m, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_cungbr_q = int( cdum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + maxwrk = m + lwork_cgelqf + maxwrk = max( maxwrk, 2*m+lwork_cgebrd ) + if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2*m+lwork_cungbr_q ) + minwrk = 3*m + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + maxwrk = max( m*m+wrkbl, m*m+m*n ) + minwrk = 2*m + n + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', + ! jobvt='o') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_q ) + maxwrk = max( m*m+wrkbl, m*m+m*n ) + minwrk = 2*m + n + else if( wntvs .and. wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntvs .and. wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_q ) + maxwrk = 2*m*m + wrkbl + minwrk = 2*m + n + else if( wntvs .and. wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_q ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_q ) + maxwrk = 2*m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + wrkbl = m + lwork_cgelqf + wrkbl = max( wrkbl, m+lwork_cunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_cgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_cungbr_q ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + end if + else + ! path 10t(n greater than m, but not much larger) + call stdlib_cgebrd( m, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, & + ierr ) + lwork_cgebrd = int( cdum(1),KIND=ilp) + maxwrk = 2*m + lwork_cgebrd + if( wntvs .or. wntvo ) then + ! compute space needed for stdlib_cungbr p + call stdlib_cungbr( 'P', m, n, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_cungbr_p = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*m+lwork_cungbr_p ) + end if + if( wntva ) then + call stdlib_cungbr( 'P', n, n, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_cungbr_p = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*m+lwork_cungbr_p ) + end if + if( .not.wntun ) then + maxwrk = max( maxwrk, 2*m+lwork_cungbr_q ) + end if + minwrk = 2*m + n + end if + end if + maxwrk = max( minwrk, maxwrk ) + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + ! no left singular vectors to be computed + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: need 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out below r + if( n > 1 ) then + call stdlib_claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( iwork ), lwork-iwork+1,ierr ) + ncvt = 0 + if( wntvo .or. wntvas ) then + ! if right singular vectors desired, generate p'. + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + ncvt = n + end if + irwork = ie + n + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a if desired + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,cdum, 1, cdum, & + 1, rwork( irwork ), info ) + ! if right singular vectors desired in vt, copy them there + if( wntvas )call stdlib_clacpy( 'F', n, n, a, lda, vt, ldvt ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + ! n left singular vectors to be overwritten on a and + ! no right singular vectors to be computed + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*n ) then + ! work(iu) is lda by n, work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+n*n ) then + ! work(iu) is lda by n, work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n, work(ir) is n by n + ldwrku = ( lwork-n*n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to work(ir) and zero out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: need 0) + call stdlib_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,work( ir ), & + ldwrkr, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (cworkspace: need n*n+n, prefer n*n+m*n) + ! (rworkspace: 0) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( ir & + ), ldwrkr, czero,work( iu ), ldwrku ) + call stdlib_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) + ! (rworkspace: n) + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing a + ! (cworkspace: need 3*n, prefer 2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a + ! (cworkspace: need 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,a, lda, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+n*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n and work(ir) is n by n + ldwrku = ( lwork-n*n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_claset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt, copying result to work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) and computing right + ! singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( ir ), & + ldwrkr, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (cworkspace: need n*n+n, prefer n*n+m*n) + ! (rworkspace: 0) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( ir & + ), ldwrkr, czero,work( iu ), ldwrku ) + call stdlib_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_claset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: n) + call stdlib_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in a by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& + work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, cdum,& + 1, rwork( irwork ),info ) + end if + else if( wntus ) then + if( wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + ! n left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,1, work( ir ),& + ldwrkr, cdum, 1,rwork( irwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & + czero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_claset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,1, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+3*n ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*n*n+3*n, + ! prefer 2*n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*n*n+3*n-1, + ! prefer 2*n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (cworkspace: need 2*n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, cdum, 1, rwork( irwork ),info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & + czero, u, ldu ) + ! copy right singular vectors of r to a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_clacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_claset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,lda, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' + ! or 'a') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need n*n+3*n-1, + ! prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( iu )& + , ldwrku, cdum, 1,rwork( irwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & + czero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_claset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), & + ldvt ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + else if( wntua ) then + if( wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + ! m left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! copy r to work(ir), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in u + ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,1, work( ir ),& + ldwrkr, cdum, 1,rwork( irwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(ir), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_clacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_claset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,1, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*n*n+3*n, + ! prefer 2*n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*n*n+3*n-1, + ! prefer 2*n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (cworkspace: need 2*n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, cdum, 1, rwork( irwork ),info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_clacpy( 'F', m, n, a, lda, u, ldu ) + ! copy right singular vectors of r from work(ir) to a + call stdlib_clacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_claset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,lda, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' + ! or 'a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need n*n+3*n-1, + ! prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: need 0) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( iu )& + , ldwrku, cdum, 1,rwork( irwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_clacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r from a to vt, zeroing out below it + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_claset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), & + ldvt ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + end if + else + ! m < mnthr + ! path 10 (m at least n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) + ! (rworkspace: need n) + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) + ! (rworkspace: 0) + call stdlib_clacpy( 'L', m, n, a, lda, u, ldu ) + if( wntus )ncu = n + if( wntua )ncu = m + call stdlib_cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_clacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*n, prefer 2*n+n*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + irwork = ie + n + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1, rwork( irwork ),info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,lda, u, ldu, cdum,& + 1, rwork( irwork ),info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1, rwork( irwork ),info ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + ! no right singular vectors to be computed + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out above l + call stdlib_claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( iwork ), lwork-iwork+1,ierr ) + if( wntuo .or. wntuas ) then + ! if left singular vectors desired, generate q + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + irwork = ie + m + nru = 0 + if( wntuo .or. wntuas )nru = m + ! perform bidiagonal qr iteration, computing left singular + ! vectors of a in a if desired + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,a, lda, cdum, & + 1, rwork( irwork ), info ) + ! if left singular vectors desired in u, copy them there + if( wntuas )call stdlib_clacpy( 'F', m, m, a, lda, u, ldu ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! no left singular vectors to be computed + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to work(ir) and zero out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l + ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (cworkspace: need m*m+m, prefer m*m+m*n) + ! (rworkspace: 0) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_cgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1, & + i ), lda, czero,work( iu ), ldwrku ) + call stdlib_clacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,cdum, 1, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing about above it + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),ldu ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u, copying result to work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) + ! generate right vectors bidiagonalizing l in work(ir) + ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u, and computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( ir ), ldwrkr, u, & + ldu, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (cworkspace: need m*m+m, prefer m*m+m*n)) + ! (rworkspace: 0) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_cgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1, & + i ), lda, czero,work( iu ), ldwrku ) + call stdlib_clacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),ldu ) + ! generate q in a + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in a + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,u, ldu, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntvs ) then + if( wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + ! m right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(ir), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & + ldwrkr ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l in + ! work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & + czero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy result to vt + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_claset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,ldvt, cdum, 1, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+3*m ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out below it + call stdlib_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ! generate q in a + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*m*m+3*m, + ! prefer 2*m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*m*m+3*m-1, + ! prefer 2*m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (cworkspace: need 2*m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, cdum, 1, rwork( irwork ),info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & + czero, vt, ldvt ) + ! copy left singular vectors of l to a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_clacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_claset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors of l in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is lda by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need m*m+3*m-1, + ! prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + u, ldu, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & + czero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,u( 1, 2 ), ldu ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + else if( wntva ) then + if( wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + ! n right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! copy l to work(ir), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & + ldwrkr ) + ! generate q in vt + ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need m*m+3*m-1, + ! prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_clacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_claset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,ldvt, cdum, 1, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*m*m+3*m, + ! prefer 2*m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*m*m+3*m-1, + ! prefer 2*m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (cworkspace: need 2*m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, cdum, 1, rwork( irwork ),info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_clacpy( 'F', m, n, a, lda, vt, ldvt ) + ! copy left singular vectors of a from work(ir) to a + call stdlib_clacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_claset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by m + ldwrku = lda + else + ! work(iu) is m by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_clacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + u, ldu, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_clacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_claset( 'U', m-1, m-1, czero, czero,u( 1, 2 ), ldu ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + end if + else + ! n < mnthr + ! path 10t(n greater than m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + ! (rworkspace: m) + call stdlib_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_clacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) + ! (rworkspace: 0) + call stdlib_clacpy( 'U', m, n, a, lda, vt, ldvt ) + if( wntva )nrvt = n + if( wntvs )nrvt = m + call stdlib_cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + irwork = ie + m + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1, rwork( irwork ),info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,lda, u, ldu, cdum,& + 1, rwork( irwork ),info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1, rwork( irwork ),info ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1,rwork( ie ), minmn, ierr ) + if( anrm CGESVDQ: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + subroutine stdlib_cgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) + ! Scalar Arguments + character, intent(in) :: joba, jobp, jobr, jobu, jobv + integer(ilp), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork + integer(ilp), intent(out) :: numrank, info + integer(ilp), intent(inout) :: lcwork + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*), v(ldv,*), cwork(*) + real(sp), intent(out) :: s(*), rwork(*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: ierr, nr, n1, optratio, p, q + integer(ilp) :: lwcon, lwqp3, lwrk_cgelqf, lwrk_cgesvd, lwrk_cgesvd2, lwrk_cgeqp3, & + lwrk_cgeqrf, lwrk_cunmlq, lwrk_cunmqr, lwrk_cunmqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & + lwunq, lwunq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk + logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& + rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr + real(sp) :: big, epsln, rtmp, sconda, sfmin + complex(sp) :: ctmp + ! Local Arrays + complex(sp) :: cdummy(1) + real(sp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,real,sqrt + ! Executable Statements + ! test the input arguments + wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) + wntur = stdlib_lsame( jobu, 'R' ) + wntua = stdlib_lsame( jobu, 'A' ) + wntuf = stdlib_lsame( jobu, 'F' ) + lsvc0 = wntus .or. wntur .or. wntua + lsvec = lsvc0 .or. wntuf + dntwu = stdlib_lsame( jobu, 'N' ) + wntvr = stdlib_lsame( jobv, 'R' ) + wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) + rsvec = wntvr .or. wntva + dntwv = stdlib_lsame( jobv, 'N' ) + accla = stdlib_lsame( joba, 'A' ) + acclm = stdlib_lsame( joba, 'M' ) + conda = stdlib_lsame( joba, 'E' ) + acclh = stdlib_lsame( joba, 'H' ) .or. conda + rowprm = stdlib_lsame( jobp, 'P' ) + rtrans = stdlib_lsame( jobr, 'T' ) + if ( rowprm ) then + iminwrk = max( 1, n + m - 1 ) + rminwrk = max( 2, m, 5*n ) + else + iminwrk = max( 1, n ) + rminwrk = max( 2, 5*n ) + end if + lquery = (liwork == -1 .or. lcwork == -1 .or. lrwork == -1) + info = 0 + if ( .not. ( accla .or. acclm .or. acclh ) ) then + info = -1 + else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = -2 + else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then + info = -3 + else if ( .not.( lsvec .or. dntwu ) ) then + info = -4 + else if ( wntur .and. wntva ) then + info = -5 + else if ( .not.( rsvec .or. dntwv )) then + info = -5 + else if ( m<0 ) then + info = -6 + else if ( ( n<0 ) .or. ( n>m ) ) then + info = -7 + else if ( lda big / sqrt(real(m,KIND=sp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_clascl('G',0,0,sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + call stdlib_claswp( n, a, lda, 1, m-1, iwork(n+1), 1 ) + end if + ! .. at this stage, preemptive scaling is done only to avoid column + ! norms overflows during the qr factorization. the svd procedure should + ! have its own scaling to save the singular values from overflows and + ! underflows. that depends on the svd procedure. + if ( .not.rowprm ) then + rtmp = stdlib_clange( 'M', m, n, a, lda, rwork ) + if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then + info = - 8 + call stdlib_xerbla( 'CGESVDQ', -info ) + return + end if + if ( rtmp > big / sqrt(real(m,KIND=sp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_clascl('G',0,0, sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + end if + ! Qr Factorization With Column Pivoting + ! a * p = q * [ r ] + ! [ 0 ] + do p = 1, n + ! All Columns Are Free Columns + iwork(p) = 0 + end do + call stdlib_cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,rwork, ierr ) + + ! if the user requested accuracy level allows truncation in the + ! computed upper triangular factor, the matrix r is examined and, + ! if possible, replaced with its leading upper trapezoidal part. + epsln = stdlib_slamch('E') + sfmin = stdlib_slamch('S') + ! small = sfmin / epsln + nr = n + if ( accla ) then + ! standard absolute error bound suffices. all sigma_i with + ! sigma_i < n*eps*||a||_f are flushed to zero. this is an + ! aggressive enforcement of lower numerical rank by introducing a + ! backward error of the order of n*eps*||a||_f. + nr = 1 + rtmp = sqrt(real(n,KIND=sp))*epsln + do p = 2, n + if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 + nr = nr + 1 + end do + 3002 continue + elseif ( acclm ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r is used as the criterion for being + ! close-to-rank-deficient. the threshold is set to epsln=stdlib_slamch('e'). + ! [[this can be made more flexible by replacing this hard-coded value + ! with a user specified threshold.]] also, the values that underflow + ! will be truncated. + nr = 1 + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go & + to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! Rrqr Not Authorized To Determine Numerical Rank Except In The + ! obvious case of zero pivots. + ! .. inspect r for exact zeros on the diagonal; + ! r(i,i)=0 => r(i:n,i:n)=0. + nr = 1 + do p = 2, n + if ( abs(a(p,p)) == zero ) go to 3502 + nr = nr + 1 + end do + 3502 continue + if ( conda ) then + ! estimate the scaled condition number of a. use the fact that it is + ! the same as the scaled condition number of r. + ! V Is Used As Workspace + call stdlib_clacpy( 'U', n, n, a, lda, v, ldv ) + ! only the leading nr x nr submatrix of the triangular factor + ! is considered. only if nr=n will this give a reliable error + ! bound. however, even for nr < n, this can be used on an + ! expert level and obtain useful information in the sense of + ! perturbation theory. + do p = 1, nr + rtmp = stdlib_scnrm2( p, v(1,p), 1 ) + call stdlib_csscal( p, one/rtmp, v(1,p), 1 ) + end do + if ( .not. ( lsvec .or. rsvec ) ) then + call stdlib_cpocon( 'U', nr, v, ldv, one, rtmp,cwork, rwork, ierr ) + + else + call stdlib_cpocon( 'U', nr, v, ldv, one, rtmp,cwork(n+1), rwork, ierr ) + + end if + sconda = one / sqrt(rtmp) + ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + ! see the reference [1] for more details. + end if + endif + if ( wntur ) then + n1 = nr + else if ( wntus .or. wntuf) then + n1 = n + else if ( wntua ) then + n1 = m + end if + if ( .not. ( rsvec .or. lsvec ) ) then + ! ....................................................................... + ! Only The Singular Values Are Requested + ! ....................................................................... + if ( rtrans ) then + ! .. compute the singular values of r**h = [a](1:nr,1:n)**h + ! .. set the lower triangle of [a] to [a](1:nr,1:n)**h and + ! the upper triangle of [a] to zero. + do p = 1, min( n, nr ) + a(p,p) = conjg(a(p,p)) + do q = p + 1, n + a(q,p) = conjg(a(p,q)) + if ( q <= nr ) a(p,q) = czero + end do + end do + call stdlib_cgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & + rwork, info ) + else + ! .. compute the singular values of r = [a](1:nr,1:n) + if ( nr > 1 )call stdlib_claset( 'L', nr-1,nr-1, czero,czero, a(2,1), lda ) + + call stdlib_cgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & + rwork, info ) + end if + else if ( lsvec .and. ( .not. rsvec) ) then + ! ....................................................................... + ! The Singular Values And The Left Singular Vectors Requested + ! ......................................................................."""""""" + if ( rtrans ) then + ! .. apply stdlib_cgesvd to r**h + ! .. copy r**h into [u] and overwrite [u] with the right singular + ! vectors of r + do p = 1, nr + do q = p, n + u(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_claset( 'U', nr-1,nr-1, czero,czero, u(1,2), ldu ) + + ! .. the left singular vectors not computed, the nr right singular + ! vectors overwrite [u](1:nr,1:nr) as conjugate transposed. these + ! will be pre-multiplied by q to build the left singular vectors of a. + call stdlib_cgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, nr + u(p,p) = conjg(u(p,p)) + do q = p + 1, nr + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + else + ! Apply Stdlib_Cgesvd To R + ! .. copy r into [u] and overwrite [u] with the left singular vectors + call stdlib_clacpy( 'U', nr, n, a, lda, u, ldu ) + if ( nr > 1 )call stdlib_claset( 'L', nr-1, nr-1, czero, czero, u(2,1), ldu ) + + ! .. the right singular vectors not computed, the nr left singular + ! vectors overwrite [u](1:nr,1:nr) + call stdlib_cgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of + ! r. these will be pre-multiplied by q to build the left singular + ! vectors of a. + end if + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. ( .not.wntuf ) ) then + call stdlib_claset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_claset( 'A',nr,n1-nr,czero,czero,u(1,nr+1), ldu ) + call stdlib_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not.wntuf )call stdlib_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & + cwork(n+1), lcwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_claswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! ....................................................................... + ! The Singular Values And The Right Singular Vectors Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_cgesvd to r**h + ! .. copy r**h into v and overwrite v with the left singular vectors + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + + ! .. the left singular vectors of r**h overwrite v, the right singular + ! vectors not computed + if ( wntvr .or. ( nr == n ) ) then + call stdlib_cgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, nr + v(p,p) = conjg(v(p,p)) + do q = p + 1, nr + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr + 1, n + v(p,q) = conjg(v(q,p)) + end do + end do + end if + call stdlib_clapmt( .false., nr, n, v, ldv, iwork ) + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:n,1:nr) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the qr factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_claset('G', n, n-nr, czero, czero, v(1,nr+1), ldv) + call stdlib_cgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, n + v(p,p) = conjg(v(p,p)) + do q = p + 1, n + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + call stdlib_clapmt( .false., n, n, v, ldv, iwork ) + end if + else + ! Aply Stdlib_Cgesvd To R + ! Copy R Into V And Overwrite V With The Right Singular Vectors + call stdlib_clacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_claset( 'L', nr-1, nr-1, czero, czero, v(2,1), ldv ) + + ! .. the right singular vectors overwrite v, the nr left singular + ! vectors stored in u(1:nr,1:nr) + if ( wntvr .or. ( nr == n ) ) then + call stdlib_cgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_clapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:nr,1:n) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the lq factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_claset('G', n-nr, n, czero,czero, v(nr+1,1), ldv) + call stdlib_cgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_clapmt( .false., n, n, v, ldv, iwork ) + end if + ! .. now [v] contains the adjoint of the matrix of the right singular + ! vectors of a. + end if + else + ! ....................................................................... + ! Full Svd Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_cgesvd to r**h [[this option is left for r + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r**h into [v] and overwrite [v] with the left singular + ! vectors of r**h + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + + ! .. the left singular vectors of r**h overwrite [v], the nr right + ! singular vectors of r**h stored in [u](1:nr,1:nr) as conjugate + ! transposed + call stdlib_cgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + ! Assemble V + do p = 1, nr + v(p,p) = conjg(v(p,p)) + do q = p + 1, nr + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr+1, n + v(p,q) = conjg(v(q,p)) + end do + end do + end if + call stdlib_clapmt( .false., nr, n, v, ldv, iwork ) + do p = 1, nr + u(p,p) = conjg(u(p,p)) + do q = p + 1, nr + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_claset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! .. copy r**h into [v] and overwrite [v] with the left singular + ! vectors of r**h + ! [[the optimal ratio n/nr for using qrf instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'cgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio*nr > n ) then + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_claset('U',nr-1,nr-1, czero,czero, v(1,2),ldv) + + call stdlib_claset('A',n,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_cgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, n + v(p,p) = conjg(v(p,p)) + do q = p + 1, n + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + call stdlib_clapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + do p = 1, n + u(p,p) = conjg(u(p,p)) + do q = p + 1, n + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_claset('A',m-n,n,czero,czero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_claset('A',n,n1-n,czero,czero,u(1,n+1),ldu) + call stdlib_claset('A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) + end if + end if + else + ! .. copy r**h into [u] and overwrite [u] with the right + ! singular vectors of r + do p = 1, nr + do q = p, n + u(q,nr+p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_claset('U',nr-1,nr-1,czero,czero,u(1,nr+2),ldu) + + call stdlib_cgeqrf( n, nr, u(1,nr+1), ldu, cwork(n+1),cwork(n+nr+1), & + lcwork-n-nr, ierr ) + do p = 1, nr + do q = 1, n + v(q,p) = conjg(u(p,nr+q)) + end do + end do + call stdlib_claset('U',nr-1,nr-1,czero,czero,v(1,2),ldv) + call stdlib_cgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, cwork(n+nr+& + 1),lcwork-n-nr,rwork, info ) + call stdlib_claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + call stdlib_cunmqr('R','C', n, n, nr, u(1,nr+1), ldu,cwork(n+1),v,ldv,& + cwork(n+nr+1),lcwork-n-nr,ierr) + call stdlib_clapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_claset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) + end if + end if + end if + end if + else + ! .. apply stdlib_cgesvd to r [[this is the recommended option]] + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r into [v] and overwrite v with the right singular vectors + call stdlib_clacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_claset( 'L', nr-1,nr-1, czero,czero, v(2,1), ldv ) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_cgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_clapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_claset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! The Requested Number Of The Left Singular Vectors + ! is then n1 (n or m) + ! [[the optimal ratio n/nr for using lq instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'cgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio * nr > n ) then + call stdlib_clacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_claset('L', nr-1,nr-1, czero,czero, v(2,1),ldv) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_claset('A', n-nr,n, czero,czero, v(nr+1,1),ldv) + call stdlib_cgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_clapmt( .false., n, n, v, ldv, iwork ) + ! .. now [v] contains the adjoint of the matrix of the right + ! singular vectors of a. the leading n left singular vectors + ! are in [u](1:n,1:n) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_claset('A',m-n,n,czero,czero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_claset('A',n,n1-n,czero,czero,u(1,n+1),ldu) + call stdlib_claset( 'A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) + end if + end if + else + call stdlib_clacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu ) + if ( nr > 1 )call stdlib_claset('L',nr-1,nr-1,czero,czero,u(nr+2,1),ldu) + + call stdlib_cgelqf( nr, n, u(nr+1,1), ldu, cwork(n+1),cwork(n+nr+1), & + lcwork-n-nr, ierr ) + call stdlib_clacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv) + if ( nr > 1 )call stdlib_claset('U',nr-1,nr-1,czero,czero,v(1,2),ldv) + + call stdlib_cgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, cwork(n+nr+& + 1), lcwork-n-nr, rwork, info ) + call stdlib_claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + call stdlib_cunmlq('R','N',n,n,nr,u(nr+1,1),ldu,cwork(n+1),v, ldv, cwork(n+& + nr+1),lcwork-n-nr,ierr) + call stdlib_clapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_claset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + end if + end if + ! .. end of the "r**h or r" branch + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not. wntuf )call stdlib_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & + cwork(n+1), lcwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_claswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + ! ... end of the "full svd" branch + end if + ! check whether some singular values are returned as zeros, e.g. + ! due to underflow, and update the numerical rank. + p = nr + do q = p, 1, -1 + if ( s(q) > zero ) go to 4002 + nr = nr - 1 + end do + 4002 continue + ! .. if numerical rank deficiency is detected, the truncated + ! singular values are set to zero. + if ( nr < n ) call stdlib_slaset( 'G', n-nr,1, zero,zero, s(nr+1), n ) + ! .. undo scaling; this may cause overflow in the largest singular + ! values. + if ( ascaled )call stdlib_slascl( 'G',0,0, one,sqrt(real(m,KIND=sp)), nr,1, s, n, ierr & + ) + if ( conda ) rwork(1) = sconda + rwork(2) = p - nr + ! .. p-nr is the number of singular values that are computed as + ! exact zeros in stdlib_cgesvd() applied to the (possibly truncated) + ! full row rank triangular (trapezoidal) factor of a. + numrank = nr + return + end subroutine stdlib_cgesvdq + + !> CGESVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_cgesvx( fact, trans, n, nrhs, a, lda, af, ldaf, ipiv,equed, r, c, b, ldb, & + x, ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(out) :: berr(*), ferr(*), rwork(*) + real(sp), intent(inout) :: c(*), r(*) + complex(sp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j + real(sp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -12 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + rpvgrw = stdlib_clantr( 'M', 'U', 'N', info, info, af, ldaf,rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_clange( 'M', n, info, a, lda, rwork ) /rpvgrw + end if + rwork( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_clange( norm, n, n, a, lda, rwork ) + rpvgrw = stdlib_clantr( 'M', 'U', 'N', n, n, af, ldaf, rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_clange( 'M', n, n, a, lda, rwork ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_cgecon( norm, n, af, ldaf, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_clacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_cgetrs( trans, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_cgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond CGETSLS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_cgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, tran + integer(ilp) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & + lw2, wsizeo, wsizem, info2 + real(sp) :: anrm, bignum, bnrm, smlnum, dum(1) + complex(sp) :: tq(5), workq(1) + ! Intrinsic Functions + intrinsic :: real,max,min,int + ! Executable Statements + ! test the input arguments. + info = 0 + maxmn = max( m, n ) + tran = stdlib_lsame( trans, 'C' ) + lquery = ( lwork==-1 .or. lwork==-2 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + call stdlib_cgeqr( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_cgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_cgeqr( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_cgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + else + call stdlib_cgelq( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_cgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_cgelq( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_cgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + end if + if( ( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_claset( 'F', maxmn, nrhs, czero, czero, b, ldb ) + go to 50 + end if + brow = m + if ( tran ) then + brow = n + end if + bnrm = stdlib_clange( 'M', brow, nrhs, b, ldb, dum ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_clascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if ( m>=n ) then + ! compute qr factorization of a + call stdlib_cgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + if ( .not.tran ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_cgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& + 1 ), lw2,info ) + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_ctrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = n + else + ! overdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_ctrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = czero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = czero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_cgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_cgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + ! workspace at least m, optimally m*nb. + if( .not.tran ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_ctrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = czero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_cgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_cgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_ctrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_clascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_clascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_clascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_clascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( tszo + lwo,KIND=sp) + return + end subroutine stdlib_cgetsls + + !> CGETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a complex M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in CGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of CGEQRT for more details on the format. + + pure subroutine stdlib_cgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1 + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, & + num_all_row_blocks + ! Intrinsic Functions + intrinsic :: ceiling,real,cmplx,max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m CGGES: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> CGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + + subroutine stdlib_cgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alpha, beta, & + vsl, ldvsl, vsr, ldvsr, work,lwork, rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_c) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, iright, irows, irwrk, & + itau, iwrk, lwkmin, lwkopt + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_cggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_cgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_cunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (complex workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_claset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_clacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_cungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_claset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_cgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + call stdlib_chgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 30 + end if + ! sort eigenvalues alpha/beta if desired + ! (workspace: none needed) + if( wantst ) then + ! undo scaling on eigenvalues before selecting + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + call stdlib_ctgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alpha,beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pvsl, pvsr,dif, work( iwrk ), lwork-iwrk+1, idum, 1, ierr ) + + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_cggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_cggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_clascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_clascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_clascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_clascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 30 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_cgges + + !> CGGESX: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the complex Schur form (S,T), + !> and, optionally, the left and/or right matrices of Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T; computes + !> a reciprocal condition number for the average of the selected + !> eigenvalues (RCONDE); and computes a reciprocal condition number for + !> the right and left deflating subspaces corresponding to the selected + !> eigenvalues (RCONDV). The leading columns of VSL and VSR then form + !> an orthonormal basis for the corresponding left and right eigenspaces + !> (deflating subspaces). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if T is + !> upper triangular with non-negative diagonal and S is upper + !> triangular. + + subroutine stdlib_cggesx( jobvsl, jobvsr, sort, selctg, sense, n, a, lda,b, ldb, sdim, alpha,& + beta, vsl, ldvsl, vsr,ldvsr, rconde, rcondv, work, lwork, rwork,iwork, liwork, bwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, liwork, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rconde(2), rcondv(2), rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_c) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantsb, wantse, & + wantsn, wantst, wantsv + integer(ilp) :: i, icols, ierr, ihi, ijob, ijobvl, ijobvr, ileft, ilo, iright, irows, & + irwrk, itau, iwrk, liwmin, lwrk, maxwrk, minwrk + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pl, pr, smlnum + ! Local Arrays + real(sp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( wantsn ) then + ijob = 0 + else if( wantse ) then + ijob = 1 + else if( wantsv ) then + ijob = 2 + else if( wantsb ) then + ijob = 4 + end if + ! test the input arguments + info = 0 + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda0) then + minwrk = 2*n + maxwrk = n*(1 + stdlib_ilaenv( 1, 'CGEQRF', ' ', n, 1, n, 0 ) ) + maxwrk = max( maxwrk, n*( 1 +stdlib_ilaenv( 1, 'CUNMQR', ' ', n, 1, n, -1 ) ) ) + + if( ilvsl ) then + maxwrk = max( maxwrk, n*( 1 +stdlib_ilaenv( 1, 'CUNGQR', ' ', n, 1, n, -1 ) ) & + ) + end if + lwrk = maxwrk + if( ijob>=1 )lwrk = max( lwrk, n*n/2 ) + else + minwrk = 1 + maxwrk = 1 + lwrk = 1 + end if + work( 1 ) = lwrk + if( wantsn .or. n==0 ) then + liwmin = 1 + else + liwmin = n + 2 + end if + iwork( 1 ) = liwmin + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_cggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_cgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the unitary transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_cunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (complex workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_claset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_clacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_cungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_claset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_cgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + call stdlib_chgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 40 + end if + ! sort eigenvalues alpha/beta and compute the reciprocal of + ! condition number(s) + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + ! reorder eigenvalues, transform generalized schur vectors, and + ! compute reciprocal condition numbers + ! (complex workspace: if ijob >= 1, need max(1, 2*sdim*(n-sdim)) + ! otherwise, need 1 ) + call stdlib_ctgsen( ijob, ilvsl, ilvsr, bwork, n, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pl, pr,dif, work( iwrk ), lwork-iwrk+1, iwork, liwork,ierr & + ) + if( ijob>=1 )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( ierr==-21 ) then + ! not enough complex workspace + info = -21 + else + if( ijob==1 .or. ijob==4 ) then + rconde( 1 ) = pl + rconde( 2 ) = pr + end if + if( ijob==2 .or. ijob==4 ) then + rcondv( 1 ) = dif( 1 ) + rcondv( 2 ) = dif( 2 ) + end if + if( ierr==1 )info = n + 3 + end if + end if + ! apply permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_cggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_cggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_clascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_clascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_clascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_clascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 40 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwmin + return + end subroutine stdlib_cggesx + + !> CGGEV: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_cggev( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, irwrk,& + itau, iwrk, jc, jr, lwkmin, lwkopt + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(sp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real,sqrt + ! Statement Functions + real(sp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=sp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_cggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_cgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_cunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + ! (complex workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_claset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_clacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_cungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_claset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_cgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_cgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur form and schur vectors) + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_chgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 70 + end if + ! compute eigenvectors + ! (real workspace: need 2*n) + ! (complex workspace: need 2*n) + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_ctgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), rwork( irwrk ),ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 70 + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_cggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), n, vl,& + ldvl, ierr ) + loop_30: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp CGGEVX: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B) the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> Optionally, it also computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for + !> the eigenvalues (RCONDE), and reciprocal condition numbers for the + !> right eigenvectors (RCONDV). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j) . + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B. + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_cggevx( balanc, jobvl, jobvr, sense, n, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, ilo, ihi,lscale, rscale, abnrm, bbnrm, rconde, rcondv,work, lwork, rwork, & + iwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + real(sp), intent(out) :: abnrm, bbnrm + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: lscale(*), rconde(*), rcondv(*), rscale(*), rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery, noscl, wantsb, wantse, wantsn, & + wantsv + character :: chtemp + integer(ilp) :: i, icols, ierr, ijobvl, ijobvr, in, irows, itau, iwrk, iwrk1, j, jc, & + jr, m, maxwrk, minwrk + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(sp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real,sqrt + ! Statement Functions + real(sp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=sp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + noscl = stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'P' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( .not.( noscl .or. stdlib_lsame( balanc,'S' ) .or.stdlib_lsame( balanc, 'B' ) ) ) & + then + info = -1 + else if( ijobvl<=0 ) then + info = -2 + else if( ijobvr<=0 ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsb .or. wantsv ) )then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute and/or balance the matrix pair (a,b) + ! (real workspace: need 6*n if balanc = 's' or 'b', 1 otherwise) + call stdlib_cggbal( balanc, n, a, lda, b, ldb, ilo, ihi, lscale, rscale,rwork, ierr ) + + ! compute abnrm and bbnrm + abnrm = stdlib_clange( '1', n, n, a, lda, rwork( 1 ) ) + if( ilascl ) then + rwork( 1 ) = abnrm + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, 1, 1, rwork( 1 ), 1,ierr ) + abnrm = rwork( 1 ) + end if + bbnrm = stdlib_clange( '1', n, n, b, ldb, rwork( 1 ) ) + if( ilbscl ) then + rwork( 1 ) = bbnrm + call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, 1, 1, rwork( 1 ), 1,ierr ) + bbnrm = rwork( 1 ) + end if + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb ) + irows = ihi + 1 - ilo + if( ilv .or. .not.wantsn ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_cgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the unitary transformation to a + ! (complex workspace: need n, prefer n*nb) + call stdlib_cunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl and/or vr + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_claset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_clacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_cungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + if( ilvr )call stdlib_claset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv .or. .not.wantsn ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_cgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_cgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + if( ilv .or. .not.wantsn ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_chgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 90 + end if + ! compute eigenvectors and estimate condition numbers if desired + ! stdlib_ctgevc: (complex workspace: need 2*n ) + ! (real workspace: need 2*n ) + ! stdlib_ctgsna: (complex workspace: need 2*n*n if sense='v' or 'b') + ! (integer workspace: need n+2 ) + if( ilv .or. .not.wantsn ) then + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_ctgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, n,& + in, work( iwrk ), rwork,ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 90 + end if + end if + if( .not.wantsn ) then + ! compute eigenvectors (stdlib_ctgevc) and estimate condition + ! numbers (stdlib_ctgsna). note that the definition of the condition + ! number is not invariant under transformation (u,v) to + ! (q*u, z*v), where (u,v) are eigenvectors of the generalized + ! schur form (s,t), q and z are orthogonal matrices. in order + ! to avoid using extra 2*n*n workspace, we have to + ! re-calculate eigenvectors and estimate the condition numbers + ! one at a time. + do i = 1, n + do j = 1, n + bwork( j ) = .false. + end do + bwork( i ) = .true. + iwrk = n + 1 + iwrk1 = iwrk + n + if( wantse .or. wantsb ) then + call stdlib_ctgevc( 'B', 'S', bwork, n, a, lda, b, ldb,work( 1 ), n, work( & + iwrk ), n, 1, m,work( iwrk1 ), rwork, ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 90 + end if + end if + call stdlib_ctgsna( sense, 'S', bwork, n, a, lda, b, ldb,work( 1 ), n, work( & + iwrk ), n, rconde( i ),rcondv( i ), 1, m, work( iwrk1 ),lwork-iwrk1+1, iwork, & + ierr ) + end do + end if + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_cggbak( balanc, 'L', n, ilo, ihi, lscale, rscale, n, vl,ldvl, ierr ) + + loop_50: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp CHBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. + + subroutine stdlib_chbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, iscale + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_clascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_clascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_chbtrd to reduce hermitian band matrix to tridiagonal form. + inde = 1 + call stdlib_chbtrd( jobz, uplo, n, kd, ab, ldab, w, rwork( inde ), z,ldz, work, iinfo ) + + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, call stdlib_csteqr. + if( .not.wantz ) then + call stdlib_ssterf( n, w, rwork( inde ), info ) + else + indrwk = inde + n + call stdlib_csteqr( jobz, n, w, rwork( inde ), z, ldz,rwork( indrwk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_chbev + + !> CHBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_chbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, rwork, lrwork, & + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indwk2, indwrk, iscale, liwmin, llrwk, llwk2, & + lrwmin, lwmin + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 .or. lrwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1 + lrwmin = 1 + liwmin = 1 + else + if( wantz ) then + lwmin = 2*n**2 + lrwmin = 1 + 5*n + 2*n**2 + liwmin = 3 + 5*n + else + lwmin = n + lrwmin = n + liwmin = 1 + end if + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_clascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_clascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_chbtrd to reduce hermitian band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + indwk2 = 1 + n*n + llwk2 = lwork - indwk2 + 1 + llrwk = lrwork - indwrk + 1 + call stdlib_chbtrd( jobz, uplo, n, kd, ab, ldab, w, rwork( inde ), z,ldz, work, iinfo ) + + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, call stdlib_cstedc. + if( .not.wantz ) then + call stdlib_ssterf( n, w, rwork( inde ), info ) + else + call stdlib_cstedc( 'I', n, w, rwork( inde ), work, n, work( indwk2 ),llwk2, rwork( & + indwrk ), llrwk, iwork, liwork,info ) + call stdlib_cgemm( 'N', 'N', n, n, n, cone, z, ldz, work, n, czero,work( indwk2 ), & + n ) + call stdlib_clacpy( 'A', n, n, work( indwk2 ), n, z, ldz ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_chbevd + + !> CHBEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian band matrix A. Eigenvalues and eigenvectors + !> can be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_chbevx( jobz, range, uplo, n, kd, ab, ldab, q, ldq, vl,vu, il, iu, abstol, & + m, w, z, ldz, work, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, kd, ldab, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ab(ldab,*) + complex(sp), intent(out) :: q(ldq,*), work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indwrk, iscale, itmp1, j, jj, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + complex(sp) :: ctmp1 + ! Intrinsic Functions + intrinsic :: max,min,real,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=tmp1 ) )m = 0 + end if + if( m==1 ) then + w( 1 ) = real( ctmp1,KIND=sp) + if( wantz )z( 1, 1 ) = cone + end if + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if ( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + endif + anrm = stdlib_clanhb( 'M', uplo, n, kd, ab, ldab, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_clascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_clascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_chbtrd to reduce hermitian band matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indwrk = 1 + call stdlib_chbtrd( jobz, uplo, n, kd, ab, ldab, rwork( indd ),rwork( inde ), q, ldq, & + work( indwrk ), iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_ssterf or stdlib_csteqr. if this fails for some + ! eigenvalue, then try stdlib_sstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_scopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_scopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_ssterf( n, w, rwork( indee ), info ) + else + call stdlib_clacpy( 'A', n, n, q, ldq, z, ldz ) + call stdlib_scopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_csteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_cstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_cstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_cstein. + do j = 1, m + call stdlib_ccopy( n, z( 1, j ), 1, work( 1 ), 1 ) + call stdlib_cgemv( 'N', n, n, cone, q, ldq, work, 1, czero,z( 1, j ), 1 ) + end do + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) CHBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. + + pure subroutine stdlib_chbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, n + ! Array Arguments + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwrk + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab CHBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_chbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, rwork, lrwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwk2, indwrk, liwmin, llrwk, llwk2, lrwmin, & + lwmin + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1+n + lrwmin = 1+n + liwmin = 1 + else if( wantz ) then + lwmin = 2*n**2 + lrwmin = 1 + 5*n + 2*n**2 + liwmin = 3 + 5*n + else + lwmin = n + lrwmin = n + liwmin = 1 + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab CHBGVX: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. Eigenvalues and + !> eigenvectors can be selected by specifying either all eigenvalues, + !> a range of values or a range of indices for the desired eigenvalues. + + pure subroutine stdlib_chbgvx( jobz, range, uplo, n, ka, kb, ab, ldab, bb,ldbb, q, ldq, vl, & + vu, il, iu, abstol, m, w, z,ldz, work, rwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ka, kb, ldab, ldbb, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(sp), intent(out) :: q(ldq,*), work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, test, upper, valeig, wantz + character :: order, vect + integer(ilp) :: i, iinfo, indd, inde, indee, indibl, indisp, indiwk, indrwk, indwrk, & + itmp1, j, jj, nsplit + real(sp) :: tmp1 + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ka<0 ) then + info = -5 + else if( kb<0 .or. kb>ka ) then + info = -6 + else if( ldab0 .and. vu<=vl )info = -14 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -15 + else if ( iun ) then + info = -16 + end if + end if + end if + if( info==0) then + if( ldz<1 .or. ( wantz .and. ldz CHEEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_cheevd( jobz, uplo, n, a, lda, w, work, lwork, rwork,lrwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwk2, indwrk, iscale, liopt, & + liwmin, llrwk, llwork, llwrk2, lopt, lropt, lrwmin, lwmin + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_clascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_chetrd to reduce hermitian matrix to tridiagonal form. + inde = 1 + indtau = 1 + indwrk = indtau + n + indrwk = inde + n + indwk2 = indwrk + n*n + llwork = lwork - indwrk + 1 + llwrk2 = lwork - indwk2 + 1 + llrwk = lrwork - indrwk + 1 + call stdlib_chetrd( uplo, n, a, lda, w, rwork( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_cstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_cunmtr to multiply it to the + ! householder transformations represented as householder vectors in + ! a. + if( .not.wantz ) then + call stdlib_ssterf( n, w, rwork( inde ), info ) + else + call stdlib_cstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, rwork( indrwk ), llrwk,iwork, liwork, info ) + call stdlib_cunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),work( indwrk ), n, & + work( indwk2 ), llwrk2, iinfo ) + call stdlib_clacpy( 'A', n, n, work( indwrk ), n, a, lda ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lopt + rwork( 1 ) = lropt + iwork( 1 ) = liopt + return + end subroutine stdlib_cheevd + + !> CHEGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_chegvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, lrwork,& + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: liopt, liwmin, lopt, lropt, lrwmin, lwmin + ! Intrinsic Functions + intrinsic :: max,real + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1 + lrwmin = 1 + liwmin = 1 + else if( wantz ) then + lwmin = 2*n + n*n + lrwmin = 1 + 5*n + 2*n*n + liwmin = 3 + 5*n + else + lwmin = n + 1 + lrwmin = n + liwmin = 1 + end if + lopt = lwmin + lropt = lrwmin + liopt = liwmin + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda CHPEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_chpevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,rwork, lrwork, iwork, & + liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ap(*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwrk, iscale, liwmin, llrwk, & + llwrk, lrwmin, lwmin + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_csscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_chptrd to reduce hermitian packed matrix to tridiagonal form. + inde = 1 + indtau = 1 + indrwk = inde + n + indwrk = indtau + n + llwrk = lwork - indwrk + 1 + llrwk = lrwork - indrwk + 1 + call stdlib_chptrd( uplo, n, ap, w, rwork( inde ), work( indtau ),iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_cupgtr to generate the orthogonal matrix, then call stdlib_cstedc. + if( .not.wantz ) then + call stdlib_ssterf( n, w, rwork( inde ), info ) + else + call stdlib_cstedc( 'I', n, w, rwork( inde ), z, ldz, work( indwrk ),llwrk, rwork( & + indrwk ), llrwk, iwork, liwork,info ) + call stdlib_cupmtr( 'L', uplo, 'N', n, n, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_chpevd + + !> CHPGVD: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_chpgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, rwork, lrwork,& + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: rwork(*), w(*) + complex(sp), intent(inout) :: ap(*), bp(*) + complex(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: j, liwmin, lrwmin, lwmin, neig + ! Intrinsic Functions + intrinsic :: max,real + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, neig + call stdlib_ctpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_ctpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_chpgvd + + !> CGEES: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A complex matrix is in Schur form if it is upper triangular. + + subroutine stdlib_cgees( jobvs, sort, select, n, a, lda, sdim, w, vs,ldvs, work, lwork, & + rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: vs(ldvs,*), w(*), work(*) + ! Function Arguments + procedure(stdlib_select_c) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantst, wantvs + integer(ilp) :: hswork, i, ibal, icond, ierr, ieval, ihi, ilo, itau, iwrk, maxwrk, & + minwrk + real(sp) :: anrm, bignum, cscale, eps, s, sep, smlnum + ! Local Arrays + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_clascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_cgebal( 'P', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = n + itau + call stdlib_cgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_clacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate unitary matrix in vs + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cunghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'S', jobvs, n, ilo, ihi, a, lda, w, vs, ldvs,work( iwrk ), lwork-& + iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea )call stdlib_clascl( 'G', 0, 0, cscale, anrm, n, 1, w, n, ierr ) + do i = 1, n + bwork( i ) = select( w( i ) ) + end do + ! reorder eigenvalues and transform schur vectors + ! (cworkspace: none) + ! (rworkspace: none) + call stdlib_ctrsen( 'N', jobvs, bwork, n, a, lda, vs, ldvs, w, sdim,s, sep, work( & + iwrk ), lwork-iwrk+1, icond ) + end if + if( wantvs ) then + ! undo balancing + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_cgebak( 'P', 'R', n, ilo, ihi, rwork( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_clascl( 'U', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_ccopy( n, a, lda+1, w, 1 ) + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_cgees + + !> CGEESX: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left; + !> computes a reciprocal condition number for the average of the + !> selected eigenvalues (RCONDE); and computes a reciprocal condition + !> number for the right invariant subspace corresponding to the + !> selected eigenvalues (RCONDV). The leading columns of Z form an + !> orthonormal basis for this invariant subspace. + !> For further explanation of the reciprocal condition numbers RCONDE + !> and RCONDV, see Section 4.10_sp of the LAPACK Users' Guide (where + !> these quantities are called s and sep respectively). + !> A complex matrix is in Schur form if it is upper triangular. + + subroutine stdlib_cgeesx( jobvs, sort, select, sense, n, a, lda, sdim, w,vs, ldvs, rconde, & + rcondv, work, lwork, rwork,bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + real(sp), intent(out) :: rconde, rcondv + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: vs(ldvs,*), w(*), work(*) + ! Function Arguments + procedure(stdlib_select_c) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantsb, wantse, wantsn, wantst, wantsv, wantvs + integer(ilp) :: hswork, i, ibal, icond, ierr, ieval, ihi, ilo, itau, iwrk, lwrk, & + maxwrk, minwrk + real(sp) :: anrm, bignum, cscale, eps, smlnum + ! Local Arrays + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_clascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_cgebal( 'P', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = n + itau + call stdlib_cgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_clacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate unitary matrix in vs + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cunghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'S', jobvs, n, ilo, ihi, a, lda, w, vs, ldvs,work( iwrk ), lwork-& + iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea )call stdlib_clascl( 'G', 0, 0, cscale, anrm, n, 1, w, n, ierr ) + do i = 1, n + bwork( i ) = select( w( i ) ) + end do + ! reorder eigenvalues, transform schur vectors, and compute + ! reciprocal condition numbers + ! (cworkspace: if sense is not 'n', need 2*sdim*(n-sdim) + ! otherwise, need none ) + ! (rworkspace: none) + call stdlib_ctrsen( sense, jobvs, bwork, n, a, lda, vs, ldvs, w, sdim,rconde, & + rcondv, work( iwrk ), lwork-iwrk+1,icond ) + if( .not.wantsn )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( icond==-14 ) then + ! not enough complex workspace + info = -15 + end if + end if + if( wantvs ) then + ! undo balancing + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_cgebak( 'P', 'R', n, ilo, ihi, rwork( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_clascl( 'U', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_ccopy( n, a, lda+1, w, 1 ) + if( ( wantsv .or. wantsb ) .and. info==0 ) then + dum( 1 ) = rcondv + call stdlib_slascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + rcondv = dum( 1 ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_cgeesx + + !> CGEEV: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + + subroutine stdlib_cgeev( jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: vl(ldvl,*), vr(ldvr,*), w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr + character :: side + integer(ilp) :: hswork, i, ibal, ierr, ihi, ilo, irwork, itau, iwrk, k, lwork_trevc, & + maxwrk, minwrk, nout + real(sp) :: anrm, bignum, cscale, eps, scl, smlnum + complex(sp) :: tmp + ! Local Arrays + logical(lk) :: select(1) + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag,max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -1 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_clascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_cgebal( 'B', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = itau + n + call stdlib_cgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_clacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate unitary matrix in vl + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cunghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vl, ldvl,work( iwrk ), lwork-& + iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_clacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_clacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate unitary matrix in vr + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cunghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + else + ! compute eigenvalues only + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'E', 'N', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + end if + ! if info /= 0 from stdlib_chseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (cworkspace: need 2*n, prefer n + 2*n*nb) + ! (rworkspace: need 2*n) + irwork = ibal + n + call stdlib_ctrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1,rwork( irwork ), n, ierr ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_cgebak( 'B', 'L', n, ilo, ihi, rwork( ibal ), n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_scnrm2( n, vl( 1, i ), 1 ) + call stdlib_csscal( n, scl, vl( 1, i ), 1 ) + do k = 1, n + rwork( irwork+k-1 ) = real( vl( k, i ),KIND=sp)**2 +aimag( vl( k, i ) )& + **2 + end do + k = stdlib_isamax( n, rwork( irwork ), 1 ) + tmp = conjg( vl( k, i ) ) / sqrt( rwork( irwork+k-1 ) ) + call stdlib_cscal( n, tmp, vl( 1, i ), 1 ) + vl( k, i ) = cmplx( real( vl( k, i ),KIND=sp), zero,KIND=sp) + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_cgebak( 'B', 'R', n, ilo, ihi, rwork( ibal ), n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_scnrm2( n, vr( 1, i ), 1 ) + call stdlib_csscal( n, scl, vr( 1, i ), 1 ) + do k = 1, n + rwork( irwork+k-1 ) = real( vr( k, i ),KIND=sp)**2 +aimag( vr( k, i ) )& + **2 + end do + k = stdlib_isamax( n, rwork( irwork ), 1 ) + tmp = conjg( vr( k, i ) ) / sqrt( rwork( irwork+k-1 ) ) + call stdlib_cscal( n, tmp, vr( 1, i ), 1 ) + vr( k, i ) = cmplx( real( vr( k, i ),KIND=sp), zero,KIND=sp) + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_clascl( 'G', 0, 0, cscale, anrm, n-info, 1, w( info+1 ),max( n-info, 1 )& + , ierr ) + if( info>0 ) then + call stdlib_clascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, w, n, ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_cgeev + + !> CGEEVX: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues + !> (RCONDE), and reciprocal condition numbers for the right + !> eigenvectors (RCONDV). + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + !> Balancing a matrix means permuting the rows and columns to make it + !> more nearly upper triangular, and applying a diagonal similarity + !> transformation D * A * D**(-1), where D is a diagonal matrix, to + !> make its rows and columns closer in norm and the condition numbers + !> of its eigenvalues and eigenvectors smaller. The computed + !> reciprocal condition numbers correspond to the balanced matrix. + !> Permuting rows and columns will not change the condition numbers + !> (in exact arithmetic) but diagonal scaling will. For further + !> explanation of balancing, see section 4.10.2_sp of the LAPACK + !> Users' Guide. + + subroutine stdlib_cgeevx( balanc, jobvl, jobvr, sense, n, a, lda, w, vl,ldvl, vr, ldvr, ilo, & + ihi, scale, abnrm, rconde,rcondv, work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + real(sp), intent(out) :: abnrm + ! Array Arguments + real(sp), intent(out) :: rconde(*), rcondv(*), rwork(*), scale(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: vl(ldvl,*), vr(ldvr,*), w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr, wntsnb, wntsne, wntsnn, wntsnv + character :: job, side + integer(ilp) :: hswork, i, icond, ierr, itau, iwrk, k, lwork_trevc, maxwrk, minwrk, & + nout + real(sp) :: anrm, bignum, cscale, eps, scl, smlnum + complex(sp) :: tmp + ! Local Arrays + logical(lk) :: select(1) + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag,max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + wntsnn = stdlib_lsame( sense, 'N' ) + wntsne = stdlib_lsame( sense, 'E' ) + wntsnv = stdlib_lsame( sense, 'V' ) + wntsnb = stdlib_lsame( sense, 'B' ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'S' ) & + .or.stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) ) then + info = -1 + else if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -2 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -3 + else if( .not.( wntsnn .or. wntsne .or. wntsnb .or. wntsnv ) .or.( ( wntsne .or. & + wntsnb ) .and. .not.( wantvl .and.wantvr ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_clascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix and compute abnrm + call stdlib_cgebal( balanc, n, a, lda, ilo, ihi, scale, ierr ) + abnrm = stdlib_clange( '1', n, n, a, lda, dum ) + if( scalea ) then + dum( 1 ) = abnrm + call stdlib_slascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + abnrm = dum( 1 ) + end if + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = itau + n + call stdlib_cgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_clacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate unitary matrix in vl + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cunghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vl, ldvl,work( iwrk ), lwork-& + iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_clacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_clacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate unitary matrix in vr + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_cunghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + else + ! compute eigenvalues only + ! if condition numbers desired, compute schur form + if( wntsnn ) then + job = 'E' + else + job = 'S' + end if + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_chseqr( job, 'N', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + end if + ! if info /= 0 from stdlib_chseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (cworkspace: need 2*n, prefer n + 2*n*nb) + ! (rworkspace: need n) + call stdlib_ctrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1,rwork, n, ierr ) + end if + ! compute condition numbers if desired + ! (cworkspace: need n*n+2*n unless sense = 'e') + ! (rworkspace: need 2*n unless sense = 'e') + if( .not.wntsnn ) then + call stdlib_ctrsna( sense, 'A', select, n, a, lda, vl, ldvl, vr, ldvr,rconde, & + rcondv, n, nout, work( iwrk ), n, rwork,icond ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + call stdlib_cgebak( balanc, 'L', n, ilo, ihi, scale, n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_scnrm2( n, vl( 1, i ), 1 ) + call stdlib_csscal( n, scl, vl( 1, i ), 1 ) + do k = 1, n + rwork( k ) = real( vl( k, i ),KIND=sp)**2 +aimag( vl( k, i ) )**2 + end do + k = stdlib_isamax( n, rwork, 1 ) + tmp = conjg( vl( k, i ) ) / sqrt( rwork( k ) ) + call stdlib_cscal( n, tmp, vl( 1, i ), 1 ) + vl( k, i ) = cmplx( real( vl( k, i ),KIND=sp), zero,KIND=sp) + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + call stdlib_cgebak( balanc, 'R', n, ilo, ihi, scale, n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_scnrm2( n, vr( 1, i ), 1 ) + call stdlib_csscal( n, scl, vr( 1, i ), 1 ) + do k = 1, n + rwork( k ) = real( vr( k, i ),KIND=sp)**2 +aimag( vr( k, i ) )**2 + end do + k = stdlib_isamax( n, rwork, 1 ) + tmp = conjg( vr( k, i ) ) / sqrt( rwork( k ) ) + call stdlib_cscal( n, tmp, vr( 1, i ), 1 ) + vr( k, i ) = cmplx( real( vr( k, i ),KIND=sp), zero,KIND=sp) + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_clascl( 'G', 0, 0, cscale, anrm, n-info, 1, w( info+1 ),max( n-info, 1 )& + , ierr ) + if( info==0 ) then + if( ( wntsnv .or. wntsnb ) .and. icond==0 )call stdlib_slascl( 'G', 0, 0, cscale,& + anrm, n, 1, rcondv, n,ierr ) + else + call stdlib_clascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, w, n, ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_cgeevx + + !> CGEJSV: computes the singular value decomposition (SVD) of a complex M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^*, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and + !> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + + pure subroutine stdlib_cgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & + v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldv, lwork, lrwork, m, n + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: u(ldu,*), v(ldv,*), cwork(lwork) + real(sp), intent(out) :: sva(n), rwork(lrwork) + integer(ilp), intent(out) :: iwork(*) + character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv + ! =========================================================================== + + + ! Local Scalars + complex(sp) :: ctemp + real(sp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & + entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc + integer(ilp) :: ierr, n1, nr, numrank, p, q, warning + logical(lk) :: almort, defr, errest, goscal, jracc, kill, lquery, lsvec, l2aber, & + l2kill, l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp + integer(ilp) :: optwrk, minwrk, minrwrk, miniwrk + integer(ilp) :: lwcon, lwlqf, lwqp3, lwqrf, lwunmlq, lwunmqr, lwunmqrm, lwsvdj, & + lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff + integer(ilp) :: lwrk_cgelqf, lwrk_cgeqp3, lwrk_cgeqp3n, lwrk_cgeqrf, lwrk_cgesvj, & + lwrk_cgesvjv, lwrk_cgesvju, lwrk_cunmlq, lwrk_cunmqr, lwrk_cunmqrm + ! Local Arrays + complex(sp) :: cdummy(1) + real(sp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,cmplx,conjg,log,max,min,real,nint,sqrt + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + jracc = stdlib_lsame( jobv, 'J' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc + rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) + l2rank = stdlib_lsame( joba, 'R' ) + l2aber = stdlib_lsame( joba, 'A' ) + errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) + l2tran = stdlib_lsame( jobt, 'T' ) .and. ( m == n ) + l2kill = stdlib_lsame( jobr, 'R' ) + defr = stdlib_lsame( jobr, 'N' ) + l2pert = stdlib_lsame( jobp, 'P' ) + lquery = ( lwork == -1 ) .or. ( lrwork == -1 ) + if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & + then + info = - 1 + else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.( stdlib_lsame( jobu, 'W' ) & + .and. rsvec .and. l2tran ) ) ) then + info = - 2 + else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.( stdlib_lsame( jobv, 'W' ) & + .and. lsvec .and. l2tran ) ) ) then + info = - 3 + else if ( .not. ( l2kill .or. defr ) ) then + info = - 4 + else if ( .not. ( stdlib_lsame(jobt,'T') .or. stdlib_lsame(jobt,'N') ) ) then + info = - 5 + else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = - 6 + else if ( m < 0 ) then + info = - 7 + else if ( ( n < 0 ) .or. ( n > m ) ) then + info = - 8 + else if ( lda < m ) then + info = - 10 + else if ( lsvec .and. ( ldu < m ) ) then + info = - 13 + else if ( rsvec .and. ( ldv < n ) ) then + info = - 15 + else + ! #:) + info = 0 + end if + if ( info == 0 ) then + ! Compute The Minimal And The Optimal Workspace Lengths + ! [[the expressions for computing the minimal and the optimal + ! values of lcwork, lrwork are written with a lot of redundancy and + ! can be simplified. however, this verbose form is useful for + ! maintenance and modifications of the code.]] + ! .. minimal workspace length for stdlib_cgeqp3 of an m x n matrix, + ! stdlib_cgeqrf of an n x n matrix, stdlib_cgelqf of an n x n matrix, + ! stdlib_cunmlq for computing n x n matrix, stdlib_cunmqr for computing n x n + ! matrix, stdlib_cunmqr for computing m x n matrix, respectively. + lwqp3 = n+1 + lwqrf = max( 1, n ) + lwlqf = max( 1, n ) + lwunmlq = max( 1, n ) + lwunmqr = max( 1, n ) + lwunmqrm = max( 1, m ) + ! Minimal Workspace Length For Stdlib_Cpocon Of An N X N Matrix + lwcon = 2 * n + ! .. minimal workspace length for stdlib_cgesvj of an n x n matrix, + ! without and with explicit accumulation of jacobi rotations + lwsvdj = max( 2 * n, 1 ) + lwsvdjv = max( 2 * n, 1 ) + ! .. minimal real workspace length for stdlib_cgeqp3, stdlib_cpocon, stdlib_cgesvj + lrwqp3 = 2 * n + lrwcon = n + lrwsvdj = n + if ( lquery ) then + call stdlib_cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,rdummy, ierr ) + + lwrk_cgeqp3 = real( cdummy(1),KIND=sp) + call stdlib_cgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr ) + lwrk_cgeqrf = real( cdummy(1),KIND=sp) + call stdlib_cgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr ) + lwrk_cgelqf = real( cdummy(1),KIND=sp) + end if + minwrk = 2 + optwrk = 2 + miniwrk = n + if ( .not. (lsvec .or. rsvec ) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If + ! only the singular values are requested + if ( errest ) then + minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj ) + else + minwrk = max( n+lwqp3, n+lwqrf, lwsvdj ) + end if + if ( lquery ) then + call stdlib_cgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,ldv, cdummy, -1,& + rdummy, -1, ierr ) + lwrk_cgesvj = real( cdummy(1),KIND=sp) + if ( errest ) then + optwrk = max( n+lwrk_cgeqp3, n**2+lwcon,n+lwrk_cgeqrf, lwrk_cgesvj ) + + else + optwrk = max( n+lwrk_cgeqp3, n+lwrk_cgeqrf,lwrk_cgesvj ) + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else if ( rsvec .and. (.not.lsvec) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! singular values and the right singular vectors are requested + if ( errest ) then + minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,2*n+lwqrf, n+lwsvdj, n+& + lwunmlq ) + else + minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,n+lwsvdj, n+lwunmlq ) + + end if + if ( lquery ) then + call stdlib_cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1, & + rdummy, -1, ierr ) + lwrk_cgesvj = real( cdummy(1),KIND=sp) + call stdlib_cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -1, & + ierr ) + lwrk_cunmlq = real( cdummy(1),KIND=sp) + if ( errest ) then + optwrk = max( n+lwrk_cgeqp3, lwcon, lwrk_cgesvj,n+lwrk_cgelqf, 2*n+& + lwrk_cgeqrf,n+lwrk_cgesvj, n+lwrk_cunmlq ) + else + optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvj,n+lwrk_cgelqf,2*n+lwrk_cgeqrf, n+& + lwrk_cgesvj,n+lwrk_cunmlq ) + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else if ( lsvec .and. (.not.rsvec) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! singular values and the left singular vectors are requested + if ( errest ) then + minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm ) + else + minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm ) + end if + if ( lquery ) then + call stdlib_cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1, & + rdummy, -1, ierr ) + lwrk_cgesvj = real( cdummy(1),KIND=sp) + call stdlib_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_cunmqrm = real( cdummy(1),KIND=sp) + if ( errest ) then + optwrk = n + max( lwrk_cgeqp3, lwcon, n+lwrk_cgeqrf,lwrk_cgesvj, & + lwrk_cunmqrm ) + else + optwrk = n + max( lwrk_cgeqp3, n+lwrk_cgeqrf,lwrk_cgesvj, lwrk_cunmqrm ) + + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! full svd is requested + if ( .not. jracc ) then + if ( errest ) then + minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,2*n+lwqrf, 2*n+& + lwqp3,2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,2*n+n**2+n+lwsvdj, 2*n+& + n**2+n+lwsvdjv,2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,n+n**2+lwsvdj, n+& + lwunmqrm ) + else + minwrk = max( n+lwqp3, 2*n+n**2+lwcon,2*n+lwqrf, 2*n+& + lwqp3,2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,2*n+n**2+n+lwsvdj, 2*n+& + n**2+n+lwsvdjv,2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,n+n**2+lwsvdj, & + n+lwunmqrm ) + end if + miniwrk = miniwrk + n + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else + if ( errest ) then + minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,2*n+n**2+lwsvdjv, 2*n+n**2+n+& + lwunmqr,n+lwunmqrm ) + else + minwrk = max( n+lwqp3, 2*n+lwqrf,2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,n+& + lwunmqrm ) + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + end if + if ( lquery ) then + call stdlib_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_cunmqrm = real( cdummy(1),KIND=sp) + call stdlib_cunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_cunmqr = real( cdummy(1),KIND=sp) + if ( .not. jracc ) then + call stdlib_cgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1,rdummy, ierr ) + + lwrk_cgeqp3n = real( cdummy(1),KIND=sp) + call stdlib_cgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_cgesvj = real( cdummy(1),KIND=sp) + call stdlib_cgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_cgesvju = real( cdummy(1),KIND=sp) + call stdlib_cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_cgesvjv = real( cdummy(1),KIND=sp) + call stdlib_cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -& + 1, ierr ) + lwrk_cunmlq = real( cdummy(1),KIND=sp) + if ( errest ) then + optwrk = max( n+lwrk_cgeqp3, n+lwcon,2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,& + 2*n+lwrk_cgeqp3n,2*n+n**2+n+lwrk_cgelqf,2*n+n**2+n+n**2+lwcon,2*n+& + n**2+n+lwrk_cgesvj,2*n+n**2+n+lwrk_cgesvjv,2*n+n**2+n+lwrk_cunmqr,2*n+& + n**2+n+lwrk_cunmlq,n+n**2+lwrk_cgesvju,n+lwrk_cunmqrm ) + else + optwrk = max( n+lwrk_cgeqp3,2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,2*n+& + lwrk_cgeqp3n,2*n+n**2+n+lwrk_cgelqf,2*n+n**2+n+n**2+lwcon,2*n+n**2+n+& + lwrk_cgesvj,2*n+n**2+n+lwrk_cgesvjv,2*n+n**2+n+lwrk_cunmqr,2*n+n**2+n+& + lwrk_cunmlq,n+n**2+lwrk_cgesvju,n+lwrk_cunmqrm ) + end if + else + call stdlib_cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_cgesvjv = real( cdummy(1),KIND=sp) + call stdlib_cunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,v, ldv, cdummy,& + -1, ierr ) + lwrk_cunmqr = real( cdummy(1),KIND=sp) + call stdlib_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -& + 1, ierr ) + lwrk_cunmqrm = real( cdummy(1),KIND=sp) + if ( errest ) then + optwrk = max( n+lwrk_cgeqp3, n+lwcon,2*n+lwrk_cgeqrf, 2*n+n**2,2*n+& + n**2+lwrk_cgesvjv,2*n+n**2+n+lwrk_cunmqr,n+lwrk_cunmqrm ) + else + optwrk = max( n+lwrk_cgeqp3, 2*n+lwrk_cgeqrf,2*n+n**2, 2*n+n**2+& + lwrk_cgesvjv,2*n+n**2+n+lwrk_cunmqr,n+lwrk_cunmqrm ) + end if + end if + end if + if ( l2tran .or. rowpiv ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + end if + end if + minwrk = max( 2, minwrk ) + optwrk = max( optwrk, minwrk ) + if ( lwork < minwrk .and. (.not.lquery) ) info = - 17 + if ( lrwork < minrwrk .and. (.not.lquery) ) info = - 19 + end if + if ( info /= 0 ) then + ! #:( + call stdlib_xerbla( 'CGEJSV', - info ) + return + else if ( lquery ) then + cwork(1) = optwrk + cwork(2) = minwrk + rwork(1) = minrwrk + iwork(1) = max( 4, miniwrk ) + return + end if + ! quick return for void matrix (y3k safe) + ! #:) + if ( ( m == 0 ) .or. ( n == 0 ) ) then + iwork(1:4) = 0 + rwork(1:7) = 0 + return + endif + ! determine whether the matrix u should be m x n or m x m + if ( lsvec ) then + n1 = n + if ( stdlib_lsame( jobu, 'F' ) ) n1 = m + end if + ! set numerical parameters + ! ! note: make sure stdlib_slamch() does not fail on the target architecture. + epsln = stdlib_slamch('EPSILON') + sfmin = stdlib_slamch('SAFEMINIMUM') + small = sfmin / epsln + big = stdlib_slamch('O') + ! big = one / sfmin + ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n + ! (!) if necessary, scale sva() to protect the largest norm from + ! overflow. it is possible that this scaling pushes the smallest + ! column norm left from the underflow threshold (extreme case). + scalem = one / sqrt(real(m,KIND=sp)*real(n,KIND=sp)) + noscal = .true. + goscal = .true. + do p = 1, n + aapp = zero + aaqq = one + call stdlib_classq( m, a(1,p), 1, aapp, aaqq ) + if ( aapp > big ) then + info = - 9 + call stdlib_xerbla( 'CGEJSV', -info ) + return + end if + aaqq = sqrt(aaqq) + if ( ( aapp < (big / aaqq) ) .and. noscal ) then + sva(p) = aapp * aaqq + else + noscal = .false. + sva(p) = aapp * ( aaqq * scalem ) + if ( goscal ) then + goscal = .false. + call stdlib_sscal( p-1, scalem, sva, 1 ) + end if + end if + end do + if ( noscal ) scalem = one + aapp = zero + aaqq = big + do p = 1, n + aapp = max( aapp, sva(p) ) + if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) + end do + ! quick return for zero m x n matrix + ! #:) + if ( aapp == zero ) then + if ( lsvec ) call stdlib_claset( 'G', m, n1, czero, cone, u, ldu ) + if ( rsvec ) call stdlib_claset( 'G', n, n, czero, cone, v, ldv ) + rwork(1) = one + rwork(2) = one + if ( errest ) rwork(3) = one + if ( lsvec .and. rsvec ) then + rwork(4) = one + rwork(5) = one + end if + if ( l2tran ) then + rwork(6) = zero + rwork(7) = zero + end if + iwork(1) = 0 + iwork(2) = 0 + iwork(3) = 0 + iwork(4) = -1 + return + end if + ! issue warning if denormalized column norms detected. override the + ! high relative accuracy request. issue licence to kill nonzero columns + ! (set them to zero) whose norm is less than sigma_max / big (roughly). + ! #:( + warning = 0 + if ( aaqq <= sfmin ) then + l2rank = .true. + l2kill = .true. + warning = 1 + end if + ! quick return for one-column matrix + ! #:) + if ( n == 1 ) then + if ( lsvec ) then + call stdlib_clascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr ) + call stdlib_clacpy( 'A', m, 1, a, lda, u, ldu ) + ! computing all m left singular vectors of the m x 1 matrix + if ( n1 /= n ) then + call stdlib_cgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr ) + call stdlib_cungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr ) + call stdlib_ccopy( m, a(1,1), 1, u(1,1), 1 ) + end if + end if + if ( rsvec ) then + v(1,1) = cone + end if + if ( sva(1) < (big*scalem) ) then + sva(1) = sva(1) / scalem + scalem = one + end if + rwork(1) = one / scalem + rwork(2) = one + if ( sva(1) /= zero ) then + iwork(1) = 1 + if ( ( sva(1) / scalem) >= sfmin ) then + iwork(2) = 1 + else + iwork(2) = 0 + end if + else + iwork(1) = 0 + iwork(2) = 0 + end if + iwork(3) = 0 + iwork(4) = -1 + if ( errest ) rwork(3) = one + if ( lsvec .and. rsvec ) then + rwork(4) = one + rwork(5) = one + end if + if ( l2tran ) then + rwork(6) = zero + rwork(7) = zero + end if + return + end if + transp = .false. + aatmax = -one + aatmin = big + if ( rowpiv .or. l2tran ) then + ! compute the row norms, needed to determine row pivoting sequence + ! (in the case of heavily row weighted a, row pivoting is strongly + ! advised) and to collect information needed to compare the + ! structures of a * a^* and a^* * a (in the case l2tran==.true.). + if ( l2tran ) then + do p = 1, m + xsc = zero + temp1 = one + call stdlib_classq( n, a(p,1), lda, xsc, temp1 ) + ! stdlib_classq gets both the ell_2 and the ell_infinity norm + ! in one pass through the vector + rwork(m+p) = xsc * scalem + rwork(p) = xsc * (scalem*sqrt(temp1)) + aatmax = max( aatmax, rwork(p) ) + if (rwork(p) /= zero)aatmin = min(aatmin,rwork(p)) + end do + else + do p = 1, m + rwork(m+p) = scalem*abs( a(p,stdlib_icamax(n,a(p,1),lda)) ) + aatmax = max( aatmax, rwork(m+p) ) + aatmin = min( aatmin, rwork(m+p) ) + end do + end if + end if + ! for square matrix a try to determine whether a^* would be better + ! input for the preconditioned jacobi svd, with faster convergence. + ! the decision is based on an o(n) function of the vector of column + ! and row norms of a, based on the shannon entropy. this should give + ! the right choice in most cases when the difference actually matters. + ! it may fail and pick the slower converging side. + entra = zero + entrat = zero + if ( l2tran ) then + xsc = zero + temp1 = one + call stdlib_slassq( n, sva, 1, xsc, temp1 ) + temp1 = one / temp1 + entra = zero + do p = 1, n + big1 = ( ( sva(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entra = entra + big1 * log(big1) + end do + entra = - entra / log(real(n,KIND=sp)) + ! now, sva().^2/trace(a^* * a) is a point in the probability simplex. + ! it is derived from the diagonal of a^* * a. do the same with the + ! diagonal of a * a^*, compute the entropy of the corresponding + ! probability distribution. note that a * a^* and a^* * a have the + ! same trace. + entrat = zero + do p = 1, m + big1 = ( ( rwork(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entrat = entrat + big1 * log(big1) + end do + entrat = - entrat / log(real(m,KIND=sp)) + ! analyze the entropies and decide a or a^*. smaller entropy + ! usually means better input for the algorithm. + transp = ( entrat < entra ) + ! if a^* is better than a, take the adjoint of a. this is allowed + ! only for square matrices, m=n. + if ( transp ) then + ! in an optimal implementation, this trivial transpose + ! should be replaced with faster transpose. + do p = 1, n - 1 + a(p,p) = conjg(a(p,p)) + do q = p + 1, n + ctemp = conjg(a(q,p)) + a(q,p) = conjg(a(p,q)) + a(p,q) = ctemp + end do + end do + a(n,n) = conjg(a(n,n)) + do p = 1, n + rwork(m+p) = sva(p) + sva(p) = rwork(p) + ! previously computed row 2-norms are now column 2-norms + ! of the transposed matrix + end do + temp1 = aapp + aapp = aatmax + aatmax = temp1 + temp1 = aaqq + aaqq = aatmin + aatmin = temp1 + kill = lsvec + lsvec = rsvec + rsvec = kill + if ( lsvec ) n1 = n + rowpiv = .true. + end if + end if + ! end if l2tran + ! scale the matrix so that its maximal singular value remains less + ! than sqrt(big) -- the matrix is scaled so that its maximal column + ! has euclidean norm equal to sqrt(big/n). the only reason to keep + ! sqrt(big) instead of big is the fact that stdlib_cgejsv uses lapack and + ! blas routines that, in some implementations, are not capable of + ! working in the full interval [sfmin,big] and that they may provoke + ! overflows in the intermediate results. if the singular values spread + ! from sfmin to big, then stdlib_cgesvj will compute them. so, in that case, + ! one should use stdlib_cgesvj instead of stdlib_cgejsv. + big1 = sqrt( big ) + temp1 = sqrt( big / real(n,KIND=sp) ) + ! >> for future updates: allow bigger range, i.e. the largest column + ! will be allowed up to big/n and stdlib_cgesvj will do the rest. however, for + ! this all other (lapack) components must allow such a range. + ! temp1 = big/real(n,KIND=sp) + ! temp1 = big * epsln this should 'almost' work with current lapack components + call stdlib_slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr ) + if ( aaqq > (aapp * sfmin) ) then + aaqq = ( aaqq / aapp ) * temp1 + else + aaqq = ( aaqq * temp1 ) / aapp + end if + temp1 = temp1 * scalem + call stdlib_clascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr ) + ! to undo scaling at the end of this procedure, multiply the + ! computed singular values with uscal2 / uscal1. + uscal1 = temp1 + uscal2 = aapp + if ( l2kill ) then + ! l2kill enforces computation of nonzero singular values in + ! the restricted range of condition number of the initial a, + ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). + xsc = sqrt( sfmin ) + else + xsc = small + ! now, if the condition number of a is too big, + ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, + ! as a precaution measure, the full svd is computed using stdlib_cgesvj + ! with accumulated jacobi rotations. this provides numerically + ! more robust computation, at the cost of slightly increased run + ! time. depending on the concrete implementation of blas and lapack + ! (i.e. how they behave in presence of extreme ill-conditioning) the + ! implementor may decide to remove this switch. + if ( ( aaqq= (temp1*abs(a(1,1))) ) then + nr = nr + 1 + else + go to 3002 + end if + end do + 3002 continue + else if ( l2rank ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r1 is used as the criterion for + ! close-to-rank-deficient. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & + l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! the goal is high relative accuracy. however, if the matrix + ! has high scaled condition number the relative accuracy is in + ! general not feasible. later on, a condition number estimator + ! will be deployed to estimate the scaled condition number. + ! here we just remove the underflowed part of the triangular + ! factor. this prevents the situation in which the code is + ! working hard to get the accuracy not warranted by the data. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to & + 3302 + nr = nr + 1 + end do + 3302 continue + end if + almort = .false. + if ( nr == n ) then + maxprj = one + do p = 2, n + temp1 = abs(a(p,p)) / sva(iwork(p)) + maxprj = min( maxprj, temp1 ) + end do + if ( maxprj**2 >= one - real(n,KIND=sp)*epsln ) almort = .true. + end if + sconda = - one + condr1 = - one + condr2 = - one + if ( errest ) then + if ( n == nr ) then + if ( rsvec ) then + ! V Is Available As Workspace + call stdlib_clacpy( 'U', n, n, a, lda, v, ldv ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_csscal( p, one/temp1, v(1,p), 1 ) + end do + if ( lsvec )then + call stdlib_cpocon( 'U', n, v, ldv, one, temp1,cwork(n+1), rwork, ierr ) + + else + call stdlib_cpocon( 'U', n, v, ldv, one, temp1,cwork, rwork, ierr ) + + end if + else if ( lsvec ) then + ! U Is Available As Workspace + call stdlib_clacpy( 'U', n, n, a, lda, u, ldu ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_csscal( p, one/temp1, u(1,p), 1 ) + end do + call stdlib_cpocon( 'U', n, u, ldu, one, temp1,cwork(n+1), rwork, ierr ) + + else + call stdlib_clacpy( 'U', n, n, a, lda, cwork, n ) + ! [] call stdlib_clacpy( 'u', n, n, a, lda, cwork(n+1), n ) + ! change: here index shifted by n to the left, cwork(1:n) + ! not needed for sigma only computation + do p = 1, n + temp1 = sva(iwork(p)) + ! [] call stdlib_csscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 ) + call stdlib_csscal( p, one/temp1, cwork((p-1)*n+1), 1 ) + end do + ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths + ! [] call stdlib_cpocon( 'u', n, cwork(n+1), n, one, temp1, + ! [] $ cwork(n+n*n+1), rwork, ierr ) + call stdlib_cpocon( 'U', n, cwork, n, one, temp1,cwork(n*n+1), rwork, ierr ) + + end if + if ( temp1 /= zero ) then + sconda = one / sqrt(temp1) + else + sconda = - one + end if + ! sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1). + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + else + sconda = - one + end if + end if + l2pert = l2pert .and. ( abs( a(1,1)/a(nr,nr) ) > sqrt(big1) ) + ! if there is no violent scaling, artificial perturbation is not needed. + ! phase 3: + if ( .not. ( rsvec .or. lsvec ) ) then + ! singular values only + ! .. transpose a(1:nr,1:n) + do p = 1, min( n-1, nr ) + call stdlib_ccopy( n-p, a(p,p+1), lda, a(p+1,p), 1 ) + call stdlib_clacgv( n-p+1, a(p,p), 1 ) + end do + if ( nr == n ) a(n,n) = conjg(a(n,n)) + ! the following two do-loops introduce small relative perturbation + ! into the strict upper triangle of the lower triangular matrix. + ! small entries below the main diagonal are also changed. + ! this modification is useful if the computing environment does not + ! provide/allow flush to zero underflow, for it prevents many + ! annoying denormalized numbers in case of strongly scaled matrices. + ! the perturbation is structured so that it does not introduce any + ! new perturbation of the singular values, and it does not destroy + ! the job done by the preconditioner. + ! the licence for this perturbation is in the variable l2pert, which + ! should be .false. if flush to zero underflow is active. + if ( .not. almort ) then + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=sp) + do q = 1, nr + ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=sp) + do p = 1, n + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & + ctemp + ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) + end do + end do + else + call stdlib_claset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda ) + end if + ! Second Preconditioning Using The Qr Factorization + call stdlib_cgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr ) + ! And Transpose Upper To Lower Triangular + do p = 1, nr - 1 + call stdlib_ccopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 ) + call stdlib_clacgv( nr-p+1, a(p,p), 1 ) + end do + end if + ! row-cyclic jacobi svd algorithm with column pivoting + ! .. again some perturbation (a "background noise") is added + ! to drown denormals + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=sp) + do q = 1, nr + ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=sp) + do p = 1, nr + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & + ctemp + ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) + end do + end do + else + call stdlib_claset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda ) + end if + ! .. and one-sided jacobi rotations are started on a lower + ! triangular matrix (plus perturbation which is ignored in + ! the part which destroys triangular form (confusing?!)) + call stdlib_cgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,n, v, ldv, cwork, lwork, & + rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + else if ( ( rsvec .and. ( .not. lsvec ) .and. ( .not. jracc ) ).or.( jracc .and. ( & + .not. lsvec ) .and. ( nr /= n ) ) ) then + ! -> singular values and right singular vectors <- + if ( almort ) then + ! In This Case Nr Equals N + do p = 1, nr + call stdlib_ccopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_clacgv( n-p+1, v(p,p), 1 ) + end do + call stdlib_claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv ) + call stdlib_cgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,cwork, lwork, & + rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + else + ! .. two more qr factorizations ( one qrf is not enough, two require + ! accumulated product of jacobi rotations, three are perfect ) + call stdlib_claset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda ) + call stdlib_cgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr) + call stdlib_clacpy( 'L', nr, nr, a, lda, v, ldv ) + call stdlib_claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv ) + call stdlib_cgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr + call stdlib_ccopy( nr-p+1, v(p,p), ldv, v(p,p), 1 ) + call stdlib_clacgv( nr-p+1, v(p,p), 1 ) + end do + call stdlib_claset('U', nr-1, nr-1, czero, czero, v(1,2), ldv) + call stdlib_cgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, cwork(n+1), & + lwork-n, rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_claset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv ) + call stdlib_claset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv ) + call stdlib_claset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv ) + end if + call stdlib_cunmlq( 'L', 'C', n, n, nr, a, lda, cwork,v, ldv, cwork(n+1), lwork-n, & + ierr ) + end if + ! Permute The Rows Of V + ! do 8991 p = 1, n + ! call stdlib_ccopy( n, v(p,1), ldv, a(iwork(p),1), lda ) + 8991 continue + ! call stdlib_clacpy( 'all', n, n, a, lda, v, ldv ) + call stdlib_clapmr( .false., n, n, v, ldv, iwork ) + if ( transp ) then + call stdlib_clacpy( 'A', n, n, v, ldv, u, ldu ) + end if + else if ( jracc .and. (.not. lsvec) .and. ( nr== n ) ) then + call stdlib_claset( 'L', n-1,n-1, czero, czero, a(2,1), lda ) + call stdlib_cgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,cwork, lwork, rwork, & + lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + call stdlib_clapmr( .false., n, n, v, ldv, iwork ) + else if ( lsvec .and. ( .not. rsvec ) ) then + ! Singular Values And Left Singular Vectors + ! Second Preconditioning Step To Avoid Need To Accumulate + ! jacobi rotations in the jacobi iterations. + do p = 1, nr + call stdlib_ccopy( n-p+1, a(p,p), lda, u(p,p), 1 ) + call stdlib_clacgv( n-p+1, u(p,p), 1 ) + end do + call stdlib_claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + call stdlib_cgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr - 1 + call stdlib_ccopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 ) + call stdlib_clacgv( n-p+1, u(p,p), 1 ) + end do + call stdlib_claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + call stdlib_cgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, cwork(n+1), lwork-& + n, rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < m ) then + call stdlib_claset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_claset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu ) + call stdlib_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu ) + end if + end if + call stdlib_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & + ierr ) + if ( rowpiv )call stdlib_claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + do p = 1, n1 + xsc = one / stdlib_scnrm2( m, u(1,p), 1 ) + call stdlib_csscal( m, xsc, u(1,p), 1 ) + end do + if ( transp ) then + call stdlib_clacpy( 'A', n, n, u, ldu, v, ldv ) + end if + else + ! Full Svd + if ( .not. jracc ) then + if ( .not. almort ) then + ! second preconditioning step (qrf [with pivoting]) + ! note that the composition of transpose, qrf and transpose is + ! equivalent to an lqf call. since in many libraries the qrf + ! seems to be better optimized than the lqf, we do explicit + ! transpose and use the qrf. this is subject to changes in an + ! optimized implementation of stdlib_cgejsv. + do p = 1, nr + call stdlib_ccopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_clacgv( n-p+1, v(p,p), 1 ) + end do + ! The Following Two Loops Perturb Small Entries To Avoid + ! denormals in the second qr factorization, where they are + ! as good as zeros. this is done to avoid painfully slow + ! computation with denormals. the relative size of the perturbation + ! is a parameter that can be changed by the implementer. + ! this perturbation device will be obsolete on machines with + ! properly implemented arithmetic. + ! to switch it off, set l2pert=.false. to remove it from the + ! code, remove the action under l2pert=.true., leave the else part. + ! the following two loops should be blocked and fused with the + ! transposed copy above. + if ( l2pert ) then + xsc = sqrt(small) + do q = 1, nr + ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=sp) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + ctemp + ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv ) + end if + ! estimate the row scaled condition number of r1 + ! (if r1 is rectangular, n > nr, then the condition number + ! of the leading nr x nr submatrix is estimated.) + call stdlib_clacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr ) + do p = 1, nr + temp1 = stdlib_scnrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1) + call stdlib_csscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1) + end do + call stdlib_cpocon('L',nr,cwork(2*n+1),nr,one,temp1,cwork(2*n+nr*nr+1),rwork,& + ierr) + condr1 = one / sqrt(temp1) + ! Here Need A Second Opinion On The Condition Number + ! Then Assume Worst Case Scenario + ! r1 is ok for inverse <=> condr1 < real(n,KIND=sp) + ! more conservative <=> condr1 < sqrt(real(n,KIND=sp)) + cond_ok = sqrt(sqrt(real(nr,KIND=sp))) + ! [tp] cond_ok is a tuning parameter. + if ( condr1 < cond_ok ) then + ! .. the second qrf without pivoting. note: in an optimized + ! implementation, this qrf should be implemented as the qrf + ! of a lower triangular matrix. + ! r1^* = q2 * r2 + call stdlib_cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + if ( l2pert ) then + xsc = sqrt(small)/epsln + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=sp) + if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp + ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) + end do + end do + end if + if ( nr /= n )call stdlib_clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n ) + + ! .. save ... + ! This Transposed Copy Should Be Better Than Naive + do p = 1, nr - 1 + call stdlib_ccopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 ) + call stdlib_clacgv(nr-p+1, v(p,p), 1 ) + end do + v(nr,nr)=conjg(v(nr,nr)) + condr2 = condr1 + else + ! .. ill-conditioned case: second qrf with pivoting + ! note that windowed pivoting would be equally good + ! numerically, and more run-time efficient. so, in + ! an optimal implementation, the next call to stdlib_cgeqp3 + ! should be replaced with eg. call cgeqpx (acm toms #782) + ! with properly (carefully) chosen parameters. + ! r1^* * p2 = q2 * r2 + do p = 1, nr + iwork(n+p) = 0 + end do + call stdlib_cgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),cwork(2*n+1), lwork-& + 2*n, rwork, ierr ) + ! * call stdlib_cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1), + ! * $ lwork-2*n, ierr ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=sp) + if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp + ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) + end do + end do + end if + call stdlib_clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=sp) + ! v(p,q) = - temp1*( v(q,p) / abs(v(q,p)) ) + v(p,q) = - ctemp + end do + end do + else + call stdlib_claset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv ) + end if + ! now, compute r2 = l3 * q3, the lq factorization. + call stdlib_cgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),cwork(2*n+n*nr+nr+1), & + lwork-2*n-n*nr-nr, ierr ) + ! And Estimate The Condition Number + call stdlib_clacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr ) + do p = 1, nr + temp1 = stdlib_scnrm2( p, cwork(2*n+n*nr+nr+p), nr ) + call stdlib_csscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr ) + end do + call stdlib_cpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,cwork(2*n+n*nr+& + nr+nr*nr+1),rwork,ierr ) + condr2 = one / sqrt(temp1) + if ( condr2 >= cond_ok ) then + ! Save The Householder Vectors Used For Q3 + ! (this overwrites the copy of r2, as it will not be + ! needed in this branch, but it does not overwritte the + ! huseholder vectors of q2.). + call stdlib_clacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n ) + ! And The Rest Of The Information On Q3 Is In + ! work(2*n+n*nr+1:2*n+n*nr+n) + end if + end if + if ( l2pert ) then + xsc = sqrt(small) + do q = 2, nr + ctemp = xsc * v(q,q) + do p = 1, q - 1 + ! v(p,q) = - temp1*( v(p,q) / abs(v(p,q)) ) + v(p,q) = - ctemp + end do + end do + else + call stdlib_claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + end if + ! second preconditioning finished; continue with jacobi svd + ! the input matrix is lower trinagular. + ! recover the right singular vectors as solution of a well + ! conditioned triangular matrix equation. + if ( condr1 < cond_ok ) then + call stdlib_cgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,cwork(2*n+n*nr+nr+1)& + ,lwork-2*n-n*nr-nr,rwork,lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, nr + call stdlib_ccopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_csscal( nr, sva(p), v(1,p), 1 ) + end do + ! Pick The Right Matrix Equation And Solve It + if ( nr == n ) then + ! :)) .. best case, r1 is inverted. the solution of this matrix + ! equation is q2*v2 = the product of the jacobi rotations + ! used in stdlib_cgesvj, premultiplied with the orthogonal matrix + ! from the second qr factorization. + call stdlib_ctrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv) + else + ! .. r1 is well conditioned, but non-square. adjoint of r2 + ! is inverted to get the product of the jacobi rotations + ! used in stdlib_cgesvj. the q-factor from the second qr + ! factorization is then built in explicitly. + call stdlib_ctrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),n,v,ldv) + if ( nr < n ) then + call stdlib_claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_cunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(& + 2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) + end if + else if ( condr2 < cond_ok ) then + ! the matrix r2 is inverted. the solution of the matrix equation + ! is q3^* * v3 = the product of the jacobi rotations (appplied to + ! the lower triangular l3 from the lq factorization of + ! r2=l3*q3), pre-multiplied with the transposed q3. + call stdlib_cgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, nr + call stdlib_ccopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_csscal( nr, sva(p), u(1,p), 1 ) + end do + call stdlib_ctrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,u,ldu) + ! Apply The Permutation From The Second Qr Factorization + do q = 1, nr + do p = 1, nr + cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = cwork(2*n+n*nr+nr+p) + end do + end do + if ( nr < n ) then + call stdlib_claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + else + ! last line of defense. + ! #:( this is a rather pathological case: no scaled condition + ! improvement after two pivoted qr factorizations. other + ! possibility is that the rank revealing qr factorization + ! or the condition estimator has failed, or the cond_ok + ! is set very close to one (which is unnecessary). normally, + ! this branch should never be executed, but in rare cases of + ! failure of the rrqr or condition estimator, the last line of + ! defense ensures that stdlib_cgejsv completes the task. + ! compute the full svd of l3 using stdlib_cgesvj with explicit + ! accumulation of jacobi rotations. + call stdlib_cgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + call stdlib_cunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,cwork(2*n+n*nr+1), & + u, ldu, cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) + do q = 1, nr + do p = 1, nr + cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = cwork(2*n+n*nr+nr+p) + end do + end do + end if + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=sp)) * epsln + do q = 1, n + do p = 1, n + cwork(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = cwork(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_scnrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_csscal( n, xsc,& + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_claset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_claset('A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! matrix u. this applies to all cases. + call stdlib_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& + n, ierr ) + ! the columns of u are normalized. the cost is o(m*n) flops. + temp1 = sqrt(real(m,KIND=sp)) * epsln + do p = 1, nr + xsc = one / stdlib_scnrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_csscal( m, xsc,& + u(1,p), 1 ) + end do + ! if the initial qrf is computed with row pivoting, the left + ! singular vectors must be adjusted. + if ( rowpiv )call stdlib_claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + else + ! The Initial Matrix A Has Almost Orthogonal Columns And + ! the second qrf is not needed + call stdlib_clacpy( 'U', n, n, a, lda, cwork(n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, n + ctemp = xsc * cwork( n + (p-1)*n + p ) + do q = 1, p - 1 + ! cwork(n+(q-1)*n+p)=-temp1 * ( cwork(n+(p-1)*n+q) / + ! $ abs(cwork(n+(p-1)*n+q)) ) + cwork(n+(q-1)*n+p)=-ctemp + end do + end do + else + call stdlib_claset( 'L',n-1,n-1,czero,czero,cwork(n+2),n ) + end if + call stdlib_cgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,n, u, ldu, cwork(n+& + n*n+1), lwork-n-n*n, rwork, lrwork,info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, n + call stdlib_ccopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 ) + call stdlib_csscal( n, sva(p), cwork(n+(p-1)*n+1), 1 ) + end do + call stdlib_ctrsm( 'L', 'U', 'N', 'N', n, n,cone, a, lda, cwork(n+1), n ) + do p = 1, n + call stdlib_ccopy( n, cwork(n+p), n, v(iwork(p),1), ldv ) + end do + temp1 = sqrt(real(n,KIND=sp))*epsln + do p = 1, n + xsc = one / stdlib_scnrm2( n, v(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_csscal( n, xsc,& + v(1,p), 1 ) + end do + ! assemble the left singular vector matrix u (m x n). + if ( n < m ) then + call stdlib_claset( 'A', m-n, n, czero, czero, u(n+1,1), ldu ) + if ( n < n1 ) then + call stdlib_claset('A',n, n1-n, czero, czero, u(1,n+1),ldu) + call stdlib_claset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu) + end if + end if + call stdlib_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& + n, ierr ) + temp1 = sqrt(real(m,KIND=sp))*epsln + do p = 1, n1 + xsc = one / stdlib_scnrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_csscal( m, xsc,& + u(1,p), 1 ) + end do + if ( rowpiv )call stdlib_claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + end if + ! end of the >> almost orthogonal case << in the full svd + else + ! this branch deploys a preconditioned jacobi svd with explicitly + ! accumulated rotations. it is included as optional, mainly for + ! experimental purposes. it does perform well, and can also be used. + ! in this implementation, this branch will be automatically activated + ! if the condition number sigma_max(a) / sigma_min(a) is predicted + ! to be greater than the overflow threshold. this is because the + ! a posteriori computation of the singular vectors assumes robust + ! implementation of blas and some lapack procedures, capable of working + ! in presence of extreme values, e.g. when the singular values spread from + ! the underflow to the overflow threshold. + do p = 1, nr + call stdlib_ccopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_clacgv( n-p+1, v(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 1, nr + ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=sp) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + ctemp + ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv ) + end if + call stdlib_cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + call stdlib_clacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n ) + do p = 1, nr + call stdlib_ccopy( nr-p+1, v(p,p), ldv, u(p,p), 1 ) + call stdlib_clacgv( nr-p+1, u(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 2, nr + do p = 1, q - 1 + ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),zero,KIND=sp) + ! u(p,q) = - temp1 * ( u(q,p) / abs(u(q,p)) ) + u(p,q) = - ctemp + end do + end do + else + call stdlib_claset('U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + end if + call stdlib_cgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, cwork(2*n+n*nr+1),& + lwork-2*n-n*nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_claset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv ) + end if + call stdlib_cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+n*nr+& + nr+1),lwork-2*n-n*nr-nr,ierr ) + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=sp)) * epsln + do q = 1, n + do p = 1, n + cwork(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = cwork(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_scnrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_csscal( n, xsc,& + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_claset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_claset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu) + call stdlib_claset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu) + end if + end if + call stdlib_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & + ierr ) + if ( rowpiv )call stdlib_claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + end if + if ( transp ) then + ! .. swap u and v because the procedure worked on a^* + do p = 1, n + call stdlib_cswap( n, u(1,p), 1, v(1,p), 1 ) + end do + end if + end if + ! end of the full svd + ! undo scaling, if necessary (and possible) + if ( uscal2 <= (big/sva(1))*uscal1 ) then + call stdlib_slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr ) + uscal1 = one + uscal2 = one + end if + if ( nr < n ) then + do p = nr+1, n + sva(p) = zero + end do + end if + rwork(1) = uscal2 * scalem + rwork(2) = uscal1 + if ( errest ) rwork(3) = sconda + if ( lsvec .and. rsvec ) then + rwork(4) = condr1 + rwork(5) = condr2 + end if + if ( l2tran ) then + rwork(6) = entra + rwork(7) = entrat + end if + iwork(1) = nr + iwork(2) = numrank + iwork(3) = warning + if ( transp ) then + iwork(4) = 1 + else + iwork(4) = -1 + end if + return + end subroutine stdlib_cgejsv + + !> CGESVJ: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + pure subroutine stdlib_cgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, lwork, & + rwork, lrwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, lrwork, m, mv, n + character, intent(in) :: joba, jobu, jobv + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), v(ldv,*), cwork(lwork) + real(sp), intent(inout) :: rwork(lrwork) + real(sp), intent(out) :: sva(n) + ! ===================================================================== + ! Local Parameters + integer(ilp), parameter :: nsweep = 30 + + + + ! Local Scalars + complex(sp) :: aapq, ompq + real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & + mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, temp1, & + theta, thsign, tol + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, goscale, lower, lquery, lsvec, noscale, rotok, rsvec, uctol, & + upper + ! Intrinsic Functions + intrinsic :: abs,max,min,conjg,real,sign,sqrt + ! from lapack + ! from lapack + ! Executable Statements + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + uctol = stdlib_lsame( jobu, 'C' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. stdlib_lsame( jobv, 'J' ) + applv = stdlib_lsame( jobv, 'A' ) + upper = stdlib_lsame( joba, 'U' ) + lower = stdlib_lsame( joba, 'L' ) + lquery = ( lwork == -1 ) .or. ( lrwork == -1 ) + if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then + info = -1 + else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -2 + else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -5 + else if( ldaj}|a(:,i)^* * a(:,j)| / (||a(:,i)||*||a(:,j)||) < ctol*eps + ! where eps is the round-off and ctol is defined as follows: + if( uctol ) then + ! ... user controlled + ctol = rwork( 1 ) + else + ! ... default + if( lsvec .or. rsvec .or. applv ) then + ctol = sqrt( real( m,KIND=sp) ) + else + ctol = real( m,KIND=sp) + end if + end if + ! ... and the machine dependent parameters are + ! [!] (make sure that stdlib_slamch() works properly on the target machine.) + epsln = stdlib_slamch( 'EPSILON' ) + rooteps = sqrt( epsln ) + sfmin = stdlib_slamch( 'SAFEMINIMUM' ) + rootsfmin = sqrt( sfmin ) + small = sfmin / epsln + ! big = stdlib_slamch( 'overflow' ) + big = one / sfmin + rootbig = one / rootsfmin + ! large = big / sqrt( real( m*n,KIND=sp) ) + bigtheta = one / rooteps + tol = ctol*epsln + roottol = sqrt( tol ) + if( real( m,KIND=sp)*epsln>=one ) then + info = -4 + call stdlib_xerbla( 'CGESVJ', -info ) + return + end if + ! initialize the right singular vector matrix. + if( rsvec ) then + mvl = n + call stdlib_claset( 'A', mvl, n, czero, cone, v, ldv ) + else if( applv ) then + mvl = mv + end if + rsvec = rsvec .or. applv + ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) + ! (!) if necessary, scale a to protect the largest singular value + ! from overflow. it is possible that saving the largest singular + ! value destroys the information about the small ones. + ! this initial scaling is almost minimal in the sense that the + ! goal is to make sure that no column norm overflows, and that + ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries + ! in a are detected, the procedure returns with info=-6. + skl = one / sqrt( real( m,KIND=sp)*real( n,KIND=sp) ) + noscale = .true. + goscale = .true. + if( lower ) then + ! the input matrix is m-by-n lower triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_classq( m-p+1, a( p, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'CGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else if( upper ) then + ! the input matrix is m-by-n upper triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_classq( p, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'CGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else + ! the input matrix is m-by-n general dense + do p = 1, n + aapp = zero + aaqq = one + call stdlib_classq( m, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'CGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + end if + if( noscale )skl = one + ! move the smaller part of the spectrum from the underflow threshold + ! (!) start by determining the position of the nonzero entries of the + ! array sva() relative to ( sfmin, big ). + aapp = zero + aaqq = big + do p = 1, n + if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) + aapp = max( aapp, sva( p ) ) + end do + ! #:) quick return for zero matrix + if( aapp==zero ) then + if( lsvec )call stdlib_claset( 'G', m, n, czero, cone, a, lda ) + rwork( 1 ) = one + rwork( 2 ) = zero + rwork( 3 ) = zero + rwork( 4 ) = zero + rwork( 5 ) = zero + rwork( 6 ) = zero + return + end if + ! #:) quick return for one-column matrix + if( n==1 ) then + if( lsvec )call stdlib_clascl( 'G', 0, 0, sva( 1 ), skl, m, 1,a( 1, 1 ), lda, ierr ) + + rwork( 1 ) = one / skl + if( sva( 1 )>=sfmin ) then + rwork( 2 ) = one + else + rwork( 2 ) = zero + end if + rwork( 3 ) = zero + rwork( 4 ) = zero + rwork( 5 ) = zero + rwork( 6 ) = zero + return + end if + ! protect small singular values from underflow, and try to + ! avoid underflows/overflows in computing jacobi rotations. + sn = sqrt( sfmin / epsln ) + temp1 = sqrt( big / real( n,KIND=sp) ) + if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & + then + temp1 = min( big, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=sp) ) ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = max( sn / aaqq, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=sp) )*aapp ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else + temp1 = one + end if + ! scale, if necessary + if( temp1/=one ) then + call stdlib_slascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr ) + end if + skl = temp1*skl + if( skl/=one ) then + call stdlib_clascl( joba, 0, 0, one, skl, m, n, a, lda, ierr ) + skl = one / skl + end if + ! row-cyclic jacobi svd algorithm with column pivoting + emptsw = ( n*( n-1 ) ) / 2 + notrot = 0 + do q = 1, n + cwork( q ) = cone + end do + swband = 3 + ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective + ! if stdlib_cgesvj is used as a computational routine in the preconditioned + ! jacobi svd algorithm stdlib_cgejsv. for sweeps i=1:swband the procedure + ! works on pivots inside a band-like region around the diagonal. + ! the boundaries are determined dynamically, based on the number of + ! pivots above a threshold. + kbl = min( 8, n ) + ! [tp] kbl is a tuning parameter that defines the tile size in the + ! tiling of the p-q loops of pivot pairs. in general, an optimal + ! value of kbl depends on the matrix dimensions and on the + ! parameters of the computer's memory. + nbl = n / kbl + if( ( nbl*kbl )/=n )nbl = nbl + 1 + blskip = kbl**2 + ! [tp] blkskip is a tuning parameter that depends on swband and kbl. + rowskip = min( 5, kbl ) + ! [tp] rowskip is a tuning parameter. + lkahead = 1 + ! [tp] lkahead is a tuning parameter. + ! quasi block transformations, using the lower (upper) triangular + ! structure of the input matrix. the quasi-block-cycling usually + ! invokes cubic convergence. big part of this cycle is done inside + ! canonical subspaces of dimensions less than m. + if( ( lower .or. upper ) .and. ( n>max( 64, 4*kbl ) ) ) then + ! [tp] the number of partition levels and the actual partition are + ! tuning parameters. + n4 = n / 4 + n2 = n / 2 + n34 = 3*n4 + if( applv ) then + q = 0 + else + q = 1 + end if + if( lower ) then + ! this works very well on lower triangular matrices, in particular + ! in the framework of the preconditioned jacobi svd (xgejsv). + ! the idea is simple: + ! [+ 0 0 0] note that jacobi transformations of [0 0] + ! [+ + 0 0] [0 0] + ! [+ + x 0] actually work on [x 0] [x 0] + ! [+ + x x] [x x]. [x x] + call stdlib_cgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,cwork( n34+1 ), & + sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2, cwork( n+1 ), & + lwork-n, ierr ) + call stdlib_cgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,cwork( n2+1 ), sva( & + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,cwork( n+1 ), lwork-n, & + ierr ) + call stdlib_cgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,cwork( n2+1 ), & + sva( n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), & + lwork-n, ierr ) + call stdlib_cgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,cwork( n4+1 ), sva( & + n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), lwork-n, & + ierr ) + call stdlib_cgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & + tol, 1, cwork( n+1 ), lwork-n,ierr ) + call stdlib_cgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, cwork( n+1 ),lwork-n, ierr ) + else if( upper ) then + call stdlib_cgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & + tol, 2, cwork( n+1 ), lwork-n,ierr ) + call stdlib_cgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, cwork( n4+1 ),sva( n4+1 ), & + mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,ierr ) + + call stdlib_cgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, cwork( n+1 ),lwork-n, ierr ) + call stdlib_cgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,cwork( n2+1 ), sva( n2+1 )& + , mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), lwork-n, ierr ) + + end if + end if + ! .. row-cyclic pivot strategy with de rijk's pivoting .. + loop_1993: do i = 1, nsweep + ! .. go go go ... + mxaapq = zero + mxsinj = zero + iswrot = 0 + notrot = 0 + pskipped = 0 + ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs + ! 1 <= p < q <= n. this is the first step toward a blocked implementation + ! of the rotations. new implementation, based on block transformations, + ! is under development. + loop_2000: do ibr = 1, nbl + igl = ( ibr-1 )*kbl + 1 + loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) + igl = igl + ir1*kbl + loop_2001: do p = igl, min( igl+kbl-1, n-1 ) + ! .. de rijk's pivoting + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + call stdlib_cswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_cswap( mvl, v( 1, p ), 1,v( 1, q ), 1 ) + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = cwork(p) + cwork(p) = cwork(q) + cwork(q) = aapq + end if + if( ir1==0 ) then + ! column norms are periodically updated by explicit + ! norm computation. + ! [!] caveat: + ! unfortunately, some blas implementations compute stdlib_scnrm2(m,a(1,p),1) + ! as sqrt(s=stdlib_cdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to + ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_scnrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_scnrm2 is available, the if-then-else-end if + ! below should be replaced with "aapp = stdlib_scnrm2( m, a(1,p), 1 )". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + temp1 = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_ccopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, cwork(n+1), & + lda, ierr ) + aapq = stdlib_cdotc( m, cwork(n+1), 1,a( 1, q ), 1 ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aapp ) / aaqq + else + call stdlib_ccopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_cdotc( m, a(1, p ), 1,cwork(n+1), 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg( cwork(p) ) * cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq1 + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if ( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if ( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + cwork(p) = -cwork(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_ccopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_clascl( 'G', 0, 0, aapp, one, m,1, cwork(n+1), & + lda,ierr ) + call stdlib_clascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + call stdlib_caxpy( m, -aapq, cwork(n+1), 1,a( 1, q ), 1 ) + + call stdlib_clascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_scnrm2( m, a( 1, q ), 1 ) + else + t = zero + aaqq = one + call stdlib_classq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_ccopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_clascl( 'G', 0, 0, aapp,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_cdotc( m, cwork(n+1), 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_ccopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_cdotc( m, a( 1, p ), 1,cwork(n+1), 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + cwork(p) = -cwork(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_ccopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, cwork(n+1)& + ,lda,ierr ) + call stdlib_clascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_caxpy( m, -aapq, cwork(n+1),1, a( 1, q ), 1 ) + + call stdlib_clascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_ccopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq, one,m, 1, cwork(n+1)& + ,lda,ierr ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_caxpy( m, -conjg(aapq),cwork(n+1), 1, a( 1, & + p ), 1 ) + call stdlib_clascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_scnrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_classq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_scnrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the singular values and find how many are above + ! the underflow threshold. + n2 = 0 + n4 = 0 + do p = 1, n - 1 + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + call stdlib_cswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_cswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + if( sva( p )/=zero ) then + n4 = n4 + 1 + if( sva( p )*skl>sfmin )n2 = n2 + 1 + end if + end do + if( sva( n )/=zero ) then + n4 = n4 + 1 + if( sva( n )*skl>sfmin )n2 = n2 + 1 + end if + ! normalize the left singular vectors. + if( lsvec .or. uctol ) then + do p = 1, n4 + ! call stdlib_csscal( m, one / sva( p ), a( 1, p ), 1 ) + call stdlib_clascl( 'G',0,0, sva(p), one, m, 1, a(1,p), m, ierr ) + end do + end if + ! scale the product of jacobi rotations. + if( rsvec ) then + do p = 1, n + temp1 = one / stdlib_scnrm2( mvl, v( 1, p ), 1 ) + call stdlib_csscal( mvl, temp1, v( 1, p ), 1 ) + end do + end if + ! undo scaling, if necessary (and possible). + if( ( ( skl>one ) .and. ( sva( 1 )<( big / skl ) ) ).or. ( ( skl( sfmin / skl ) ) ) ) then + do p = 1, n + sva( p ) = skl*sva( p ) + end do + skl = one + end if + rwork( 1 ) = skl + ! the singular values of a are skl*sva(1:n). if skl/=one + ! then some of the singular values may overflow or underflow and + ! the spectrum is given in this factored representation. + rwork( 2 ) = real( n4,KIND=sp) + ! n4 is the number of computed nonzero singular values of a. + rwork( 3 ) = real( n2,KIND=sp) + ! n2 is the number of singular values of a greater than sfmin. + ! if n2 CGGES3: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> CGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + + subroutine stdlib_cgges3( jobvsl, jobvsr, sort, selctg, n, a, lda, b,ldb, sdim, alpha, beta, & + vsl, ldvsl, vsr, ldvsr,work, lwork, rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_c) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, iright, irows, irwrk, & + itau, iwrk, lwkopt + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_cggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_cgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_cunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + if( ilvsl ) then + call stdlib_claset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_clacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_cungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_claset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + call stdlib_cgghd3( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + work( iwrk ), lwork+1-iwrk, ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + iwrk = itau + call stdlib_claqz0( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 30 + end if + ! sort eigenvalues alpha/beta if desired + if( wantst ) then + ! undo scaling on eigenvalues before selecting + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + call stdlib_ctgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alpha,beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pvsl, pvsr,dif, work( iwrk ), lwork-iwrk+1, idum, 1, ierr ) + + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + if( ilvsl )call stdlib_cggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_cggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_clascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_clascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_clascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_clascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 30 continue + work( 1 ) = cmplx( lwkopt,KIND=sp) + return + end subroutine stdlib_cgges3 + + !> CGGEV3: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_cggev3( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(sp), intent(out) :: rwork(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, irwrk,& + itau, iwrk, jc, jr, lwkopt + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(sp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,aimag,max,real,sqrt + ! Statement Functions + real(sp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=sp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_clascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_clange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_clascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_cggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_cgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_cunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + if( ilvl ) then + call stdlib_claset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_clacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_cungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_claset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_cgghd3( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + work( iwrk ), lwork+1-iwrk,ierr ) + else + call stdlib_cgghd3( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur form and schur vectors) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_claqz0( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 70 + end if + ! compute eigenvectors + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_ctgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), rwork( irwrk ),ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 70 + end if + ! undo balancing on vl and vr and normalization + if( ilvl ) then + call stdlib_cggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), n, vl,& + ldvl, ierr ) + loop_30: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp CGSVJ0: is called from CGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as CGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + + pure subroutine stdlib_cgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep + real(sp), intent(in) :: eps, sfmin, tol + character, intent(in) :: jobv + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), d(n), v(ldv,*) + complex(sp), intent(out) :: work(lwork) + real(sp), intent(inout) :: sva(n) + ! ===================================================================== + + + ! Local Scalars + complex(sp) :: aapq, ompq + real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Intrinsic Functions + intrinsic :: abs,max,conjg,real,min,sign,sqrt + ! from lapack + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( lda sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_scnrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_scnrm2 is available, the if-then-else-end if + ! below should be replaced with "aapp = stdlib_scnrm2( m, a(1,p), 1 )". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + temp1 = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_ccopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, work, lda, & + ierr ) + aapq = stdlib_cdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aapp ) / aaqq + else + call stdlib_ccopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_cdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg( cwork(p) ) * cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq1 + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if ( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if ( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_ccopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aapp, one, m,1, work, lda,& + ierr ) + call stdlib_clascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + call stdlib_caxpy( m, -aapq, work, 1,a( 1, q ), 1 ) + call stdlib_clascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_scnrm2( m, a( 1, q ), 1 ) + else + t = zero + aaqq = one + call stdlib_classq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_ccopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aapp,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_cdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_ccopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_cdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_ccopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, work,lda,& + ierr ) + call stdlib_clascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_caxpy( m, -aapq, work,1, a( 1, q ), 1 ) + + call stdlib_clascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_ccopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq, one,m, 1, work,lda,& + ierr ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_caxpy( m, -conjg(aapq),work, 1, a( 1, p ), 1 & + ) + call stdlib_clascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_scnrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_classq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_scnrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector sva() of column norms. + do p = 1, n - 1 + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = d( p ) + d( p ) = d( q ) + d( q ) = aapq + call stdlib_cswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_cswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_cgsvj0 + + !> CGSVJ1: is called from CGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as CGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> CGSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + + pure subroutine stdlib_cgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: eps, sfmin, tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep + character, intent(in) :: jobv + ! Array Arguments + complex(sp), intent(inout) :: a(lda,*), d(n), v(ldv,*) + complex(sp), intent(out) :: work(lwork) + real(sp), intent(inout) :: sva(n) + ! ===================================================================== + + ! Local Scalars + complex(sp) :: aapq, ompq + real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & + notrot, nblc, nblr, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Intrinsic Functions + intrinsic :: abs,max,conjg,real,min,sign,sqrt + ! From Lapack + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( n1<0 ) then + info = -4 + else if( ldazero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_ccopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aapp,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_cdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_cdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_ccopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_cdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_crot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_crot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_ccopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, work,lda,& + ierr ) + call stdlib_clascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_caxpy( m, -aapq, work,1, a( 1, q ), 1 ) + + call stdlib_clascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_ccopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_clascl( 'G', 0, 0, aaqq, one,m, 1, work,lda,& + ierr ) + call stdlib_clascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_caxpy( m, -conjg(aapq),work, 1, a( 1, p ), 1 & + ) + call stdlib_clascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_scnrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_classq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_scnrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_scnrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_classq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector sva() of column norms. + do p = 1, n - 1 + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = d( p ) + d( p ) = d( q ) + d( q ) = aapq + call stdlib_cswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_cswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_cgsvj1 + + !> CHESV_AA: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**H * T * U, if UPLO = 'U', or + !> A = L * T * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is Hermitian and tridiagonal. The factored form + !> of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_chesv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_hetrf, lwkopt_hetrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CHETRF_AA: computes the factorization of a complex hermitian matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**H*T*U or A = L*T*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a hermitian tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_chetrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'CHETRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_clahef_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_cswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = conjg( a( j, j+1 ) ) + a( j, j+1 ) = cone + call stdlib_ccopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=0 and k2=1 for the first panel, + ! and k1=1 and k2=0 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_cgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_cgemm( 'CONJUGATE TRANSPOSE', 'TRANSPOSE',1, mj, jb+1,-cone,& + a( j1-k2, j3 ), lda,work( (j3-j1+1)+k1*n ), n,cone, a( j3, j3 ), lda ) + + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_cgemm + call stdlib_cgemm( 'CONJUGATE TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-& + cone, a( j1-k2, j2 ), lda,work( (j3-j1+1)+k1*n ), n,cone, a( j2, j3 ), lda & + ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = conjg( alpha ) + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**h using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_clahef; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_clahef_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_cswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = conjg( a( j+1, j ) ) + a( j+1, j ) = cone + call stdlib_ccopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=0 and k2=1 for the first panel, + ! and k1=1 and k2=0 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_cgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',mj, 1, jb+1,-& + cone, work( (j3-j1+1)+k1*n ), n,a( j3, j1-k2 ), lda,cone, a( j3, j3 ), & + lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block column with stdlib_cgemm + call stdlib_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',n-j3+1, nj, jb+1,-& + cone, work( (j3-j1+1)+k1*n ), n,a( j2, j1-k2 ), lda,cone, a( j3, j2 ), lda & + ) + end do + ! recover t( j+1, j ) + a( j+1, j ) = conjg( alpha ) + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_chetrf_aa + + !> CHSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. + + pure subroutine stdlib_chseqr( job, compz, n, ilo, ihi, h, ldh, w, z, ldz,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + character, intent(in) :: compz, job + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: nl = 49 + real(sp), parameter :: rzero = 0.0_sp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_clahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== nl allocates some local workspace to help small matrices + ! . through a rare stdlib_clahqr failure. nl > ntiny = 15 is + ! . required and nl <= nmin = stdlib_ilaenv(ispec=12,...) is recom- + ! . mended. (the default value of nmin is 75.) using nl = 49 + ! . allows up to six simultaneous shifts and a 16-by-16 + ! . deflation window. ==== + + + + ! Local Arrays + complex(sp) :: hl(nl,nl), workl(nl) + ! Local Scalars + integer(ilp) :: kbot, nmin + logical(lk) :: initz, lquery, wantt, wantz + ! Intrinsic Functions + intrinsic :: cmplx,max,min,real + ! Executable Statements + ! ==== decode and check the input parameters. ==== + wantt = stdlib_lsame( job, 'S' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + work( 1 ) = cmplx( real( max( 1, n ),KIND=sp), rzero,KIND=sp) + lquery = lwork==-1 + info = 0 + if( .not.stdlib_lsame( job, 'E' ) .and. .not.wantt ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( ldh1 )call stdlib_ccopy( ilo-1, h, ldh+1, w, 1 ) + if( ihinmin ) then + call stdlib_claqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, ilo, ihi,z, ldz, work, & + lwork, info ) + else + ! ==== small matrix ==== + call stdlib_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, ilo, ihi,z, ldz, info ) + + if( info>0 ) then + ! ==== a rare stdlib_clahqr failure! stdlib_claqr0 sometimes succeeds + ! . when stdlib_clahqr fails. ==== + kbot = info + if( n>=nl ) then + ! ==== larger matrices have enough subdiagonal scratch + ! . space to call stdlib_claqr0 directly. ==== + call stdlib_claqr0( wantt, wantz, n, ilo, kbot, h, ldh, w,ilo, ihi, z, ldz,& + work, lwork, info ) + else + ! ==== tiny matrices don't have enough subdiagonal + ! . scratch space to benefit from stdlib_claqr0. hence, + ! . tiny matrices must be copied into a larger + ! . array before calling stdlib_claqr0. ==== + call stdlib_clacpy( 'A', n, n, h, ldh, hl, nl ) + hl( n+1, n ) = czero + call stdlib_claset( 'A', nl, nl-n, czero, czero, hl( 1, n+1 ),nl ) + call stdlib_claqr0( wantt, wantz, nl, ilo, kbot, hl, nl, w,ilo, ihi, z, & + ldz, workl, nl, info ) + if( wantt .or. info/=0 )call stdlib_clacpy( 'A', n, n, hl, nl, h, ldh ) + + end if + end if + end if + ! ==== clear out the trash, if necessary. ==== + if( ( wantt .or. info/=0 ) .and. n>2 )call stdlib_claset( 'L', n-2, n-2, czero, & + czero, h( 3, 1 ), ldh ) + ! ==== ensure reported workspace size is backward-compatible with + ! . previous lapack versions. ==== + work( 1 ) = cmplx( max( real( max( 1, n ),KIND=sp),real( work( 1 ),KIND=sp) ), & + rzero,KIND=sp) + end if + end subroutine stdlib_chseqr + + !> CLAHEF_AA: factorizes a panel of a complex hermitian matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_clahef_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), h(ldh,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + complex(sp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_chetrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:n, j) := a(j, j:n) - h(j:n, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:n, j) has been initialized to be a(j, j:n) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_clacgv( j-k1, a( 1, j ), 1 ) + call stdlib_cgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( 1, j ), 1,& + cone, h( j, j ), 1 ) + call stdlib_clacgv( j-k1, a( 1, j ), 1 ) + end if + ! copy h(i:n, i) into work + call stdlib_ccopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:n) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:n) stores u(j-1, j:n) + alpha = -conjg( a( k-1, j ) ) + call stdlib_caxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = real( work( 1 ),KIND=sp) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_caxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:n)|) + i2 = stdlib_icamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply hermitian pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:n) with a(i1+1:n, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_cswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + call stdlib_clacgv( i2-i1, a( j1+i1-1, i1+1 ), lda ) + call stdlib_clacgv( i2-i1-1, a( j1+i1, i2 ), 1 ) + ! swap a(i1, i2+1:n) with a(i2, i2+1:n) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_cswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_chetrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:n, j) := a(j:n, j) - h(j:n, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:n, j) has been initialized to be a(j:n, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_clacgv( j-k1, a( j, 1 ), lda ) + call stdlib_cgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( j, 1 ), & + lda,cone, h( j, j ), 1 ) + call stdlib_clacgv( j-k1, a( j, 1 ), lda ) + end if + ! copy h(j:n, j) into work + call stdlib_ccopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:n, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -conjg( a( j, k-1 ) ) + call stdlib_caxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = real( work( 1 ),KIND=sp) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_caxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:n)|) + i2 = stdlib_icamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply hermitian pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:n, i1) with a(i2, i1+1:n) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_cswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + call stdlib_clacgv( i2-i1, a( i1+1, j1+i1-1 ), 1 ) + call stdlib_clacgv( i2-i1-1, a( i2, j1+i1 ), lda ) + ! swap a(i2+1:n, i1) with a(i2+1:n, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_cswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j CLAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + pure subroutine stdlib_claqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(sp), intent(out) :: w(*), work(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(sp), parameter :: wilk1 = 0.75_sp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_clahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constant wilk1 is used to form the exceptional + ! . shifts. ==== + + + + ! Local Scalars + complex(sp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 + real(sp) :: s + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + complex(sp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,int,max,min,mod,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = cone + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_clahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & + z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_claqr3 ==== + call stdlib_claqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_claqr5, stdlib_claqr3) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + return + end if + ! ==== stdlib_clahqr/stdlib_claqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_70: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_claqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, w, h( kv, 1 ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh, work,& + lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_claqr3 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_claqr3 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, ks + 1, -2 + w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) + w( i-1 ) = w( i ) + end do + else + ! ==== got ns/2 or fewer shifts? use stdlib_claqr4 or + ! . stdlib_clahqr on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_clacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + if( ns>nmin ) then + call stdlib_claqr4( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( & + ks ), 1, 1,zdum, 1, work, lwork, inf ) + else + call stdlib_clahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( & + ks ), 1, 1,zdum, 1, inf ) + end if + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. scale to avoid + ! . overflows, underflows and subnormals. + ! . (the scale factor s can not be czero, + ! . because h(kbot,kbot-1) is nonzero.) ==== + if( ks>=kbot ) then + s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & + h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) + aa = h( kbot-1, kbot-1 ) / s + cc = h( kbot, kbot-1 ) / s + bb = h( kbot-1, kbot ) / s + dd = h( kbot, kbot ) / s + tr2 = ( aa+dd ) / two + det = ( aa-tr2 )*( dd-tr2 ) - bb*cc + rtdisc = sqrt( -det ) + w( kbot-1 ) = ( tr2+rtdisc )*s + w( kbot ) = ( tr2-rtdisc )*s + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( cabs1( w( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_70 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 80 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + end subroutine stdlib_claqr0 + + !> Aggressive early deflation: + !> CLAQR3: accepts as input an upper Hessenberg matrix + !> H and performs an unitary similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an unitary similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + pure subroutine stdlib_claqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(sp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(sp), parameter :: rzero = 0.0_sp + real(sp), parameter :: rone = 1.0_sp + + + ! Local Scalars + complex(sp) :: beta, cdum, s, tau + real(sp) :: foo, safmax, safmin, smlnum, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & + lwk1, lwk2, lwk3, lwkopt, nmin + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,conjg,int,max,min,real + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_cgehrd ==== + call stdlib_cgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_cunmhr ==== + call stdlib_cunmhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_claqr4 ==== + call stdlib_claqr4( .true., .true., jw, 1, jw, t, ldt, sh, 1, jw, v,ldv, work, -1, & + infqr ) + lwk3 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = cone + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = czero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sh( kwtop ) = h( kwtop, kwtop ) + ns = 1 + nd = 0 + if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero + end if + work( 1 ) = cone + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_clacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_ccopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_claset( 'A', jw, jw, czero, cone, v, ldv ) + nmin = stdlib_ilaenv( 12, 'CLAQR3', 'SV', jw, 1, jw, lwork ) + if( jw>nmin ) then + call stdlib_claqr4( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + work, lwork, infqr ) + else + call stdlib_clahqr( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + infqr ) + end if + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + do knt = infqr + 1, jw + ! ==== small spike tip deflation test ==== + foo = cabs1( t( ns, ns ) ) + if( foo==rzero )foo = cabs1( s ) + if( cabs1( s )*cabs1( v( 1, ns ) )<=max( smlnum, ulp*foo ) )then + ! ==== cone more converged eigenvalue ==== + ns = ns - 1 + else + ! ==== cone undeflatable eigenvalue. move it up out of the + ! . way. (stdlib_ctrexc can not fail in this case.) ==== + ifst = ns + call stdlib_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + ilst = ilst + 1 + end if + end do + ! ==== return to hessenberg form ==== + if( ns==0 )s = czero + if( nscabs1( t( ifst, ifst ) ) )ifst = j + end do + ilst = i + if( ifst/=ilst )call stdlib_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + + end do + end if + ! ==== restore shift/eigenvalue array from t ==== + do i = infqr + 1, jw + sh( kwtop+i-1 ) = t( i, i ) + end do + if( ns1 .and. s/=czero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_ccopy( ns, v, ldv, work, 1 ) + do i = 1, ns + work( i ) = conjg( work( i ) ) + end do + beta = work( 1 ) + call stdlib_clarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = cone + call stdlib_claset( 'L', jw-2, jw-2, czero, czero, t( 3, 1 ), ldt ) + call stdlib_clarf( 'L', ns, jw, work, 1, conjg( tau ), t, ldt,work( jw+1 ) ) + + call stdlib_clarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_clarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_cgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*conjg( v( 1, 1 ) ) + call stdlib_clacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_ccopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=czero )call stdlib_cunmhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_cgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & + czero, wv, ldwv ) + call stdlib_clacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_cgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & + czero, t, ldt ) + call stdlib_clacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_cgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & + czero, wv, ldwv ) + call stdlib_clacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + end subroutine stdlib_claqr3 + + !> CLAQR4: implements one level of recursion for CLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by CLAQR0 and, for large enough + !> deflation window size, it may be called by CLAQR3. This + !> subroutine is identical to CLAQR0 except that it calls CLAQR2 + !> instead of CLAQR3. + !> CLAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + pure subroutine stdlib_claqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(sp), intent(out) :: w(*), work(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(sp), parameter :: wilk1 = 0.75_sp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_clahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constant wilk1 is used to form the exceptional + ! . shifts. ==== + + + + ! Local Scalars + complex(sp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 + real(sp) :: s + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + complex(sp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,aimag,cmplx,int,max,min,mod,real,sqrt + ! Statement Functions + real(sp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = cone + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_clahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & + z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_claqr2 ==== + call stdlib_claqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_claqr5, stdlib_claqr2) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + return + end if + ! ==== stdlib_clahqr/stdlib_claqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_70: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_claqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, w, h( kv, 1 ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh, work,& + lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_claqr2 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_claqr2 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, ks + 1, -2 + w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) + w( i-1 ) = w( i ) + end do + else + ! ==== got ns/2 or fewer shifts? use stdlib_clahqr + ! . on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_clacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + call stdlib_clahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( ks )& + , 1, 1, zdum,1, inf ) + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. scale to avoid + ! . overflows, underflows and subnormals. + ! . (the scale factor s can not be czero, + ! . because h(kbot,kbot-1) is nonzero.) ==== + if( ks>=kbot ) then + s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & + h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) + aa = h( kbot-1, kbot-1 ) / s + cc = h( kbot, kbot-1 ) / s + bb = h( kbot-1, kbot ) / s + dd = h( kbot, kbot ) / s + tr2 = ( aa+dd ) / two + det = ( aa-tr2 )*( dd-tr2 ) - bb*cc + rtdisc = sqrt( -det ) + w( kbot-1 ) = ( tr2+rtdisc )*s + w( kbot ) = ( tr2-rtdisc )*s + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( cabs1( w( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_70 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 80 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=sp) + end subroutine stdlib_claqr4 + + !> CLAQZ0: computes the eigenvalues of a matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by CGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices, P and S are an upper triangular + !> matrices. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from CGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the unitary factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + + recursive subroutine stdlib_claqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, & + beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) + ! arguments + character, intent( in ) :: wants, wantq, wantz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec + integer(ilp), intent( out ) :: info + complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), & + alpha( * ), beta( * ), work( * ) + real(sp), intent( out ) :: rwork( * ) + + + ! local scalars + real(sp) :: smlnum, ulp, safmin, safmax, c1, tempr + complex(sp) :: eshift, s1, temp + integer(ilp) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& + nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & + istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & + rcost + logical(lk) :: ilschur, ilq, ilz + character :: jbcmpz*3 + if( stdlib_lsame( wants, 'E' ) ) then + ilschur = .false. + iwants = 1 + else if( stdlib_lsame( wants, 'S' ) ) then + ilschur = .true. + iwants = 2 + else + iwants = 0 + end if + if( stdlib_lsame( wantq, 'N' ) ) then + ilq = .false. + iwantq = 1 + else if( stdlib_lsame( wantq, 'V' ) ) then + ilq = .true. + iwantq = 2 + else if( stdlib_lsame( wantq, 'I' ) ) then + ilq = .true. + iwantq = 3 + else + iwantq = 0 + end if + if( stdlib_lsame( wantz, 'N' ) ) then + ilz = .false. + iwantz = 1 + else if( stdlib_lsame( wantz, 'V' ) ) then + ilz = .true. + iwantz = 2 + else if( stdlib_lsame( wantz, 'I' ) ) then + ilz = .true. + iwantz = 3 + else + iwantz = 0 + end if + ! check argument values + info = 0 + if( iwants==0 ) then + info = -1 + else if( iwantq==0 ) then + info = -2 + else if( iwantz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi= 2 ) then + call stdlib_chgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,& + ldq, z, ldz, work, lwork, rwork,info ) + return + end if + ! find out required workspace + ! workspace query to stdlib_claqz2 + nw = max( nwr, nmin ) + call stdlib_claqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & + n_undeflated, n_deflated, alpha,beta, work, nw, work, nw, work, -1, rwork, rec,& + aed_info ) + itemp1 = int( work( 1 ),KIND=ilp) + ! workspace query to stdlib_claqz3 + call stdlib_claqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, & + ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1, sweep_info ) + itemp2 = int( work( 1 ),KIND=ilp) + lworkreq = max( itemp1+2*nw**2, itemp2+2*nbr**2 ) + if ( lwork ==-1 ) then + work( 1 ) = real( lworkreq,KIND=sp) + return + else if ( lwork < lworkreq ) then + info = -19 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CLAQZ0', info ) + return + end if + ! initialize q and z + if( iwantq==3 ) call stdlib_claset( 'FULL', n, n, czero, cone, q,ldq ) + if( iwantz==3 ) call stdlib_claset( 'FULL', n, n, czero, cone, z,ldz ) + ! get machine constants + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp)/ulp ) + istart = ilo + istop = ihi + maxit = 30*( ihi-ilo+1 ) + ld = 0 + do iiter = 1, maxit + if( iiter >= maxit ) then + info = istop+1 + goto 80 + end if + if ( istart+1 >= istop ) then + istop = istart + exit + end if + ! check deflations at the end + if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( & + a( istop-1,istop-1 ) ) ) ) ) then + a( istop, istop-1 ) = czero + istop = istop-1 + ld = 0 + eshift = czero + end if + ! check deflations at the start + if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+& + abs( a( istart+1,istart+1 ) ) ) ) ) then + a( istart+1, istart ) = czero + istart = istart+1 + ld = 0 + eshift = czero + end if + if ( istart+1 >= istop ) then + exit + end if + ! check interior deflations + istart2 = istart + do k = istop, istart+1, -1 + if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & + ) ) ) ) then + a( k, k-1 ) = czero + istart2 = k + exit + end if + end do + ! get range to apply rotations to + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = istart2 + istopm = istop + end if + ! check infinite eigenvalues, this is done without blocking so might + ! slow down the method when many infinite eigenvalues are present + k = istop + do while ( k>=istart2 ) + tempr = zero + if( k < istop ) then + tempr = tempr+abs( b( k, k+1 ) ) + end if + if( k > istart2 ) then + tempr = tempr+abs( b( k-1, k ) ) + end if + if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then + ! a diagonal element of b is negligable, move it + ! to the top and deflate it + do k2 = k, istart2+1, -1 + call stdlib_clartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) + b( k2-1, k2 ) = temp + b( k2-1, k2-1 ) = czero + call stdlib_crot( k2-2-istartm+1, b( istartm, k2 ), 1,b( istartm, k2-1 ), & + 1, c1, s1 ) + call stdlib_crot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1, a( & + istartm, k2-1 ), 1, c1, s1 ) + if ( ilz ) then + call stdlib_crot( n, z( 1, k2 ), 1, z( 1, k2-1 ), 1, c1,s1 ) + end if + if( k2= istop ) then + istop = istart2-1 + ld = 0 + eshift = czero + cycle + end if + nw = nwr + nshifts = nsr + nblock = nbr + if ( istop-istart2+1 < nmin ) then + ! setting nw to the size of the subblock will make aed deflate + ! all the eigenvalues. this is slightly more efficient than just + ! using stdlib_chgeqz because the off diagonal part gets updated via blas. + if ( istop-istart+1 < nmin ) then + nw = istop-istart+1 + istart2 = istart + else + nw = istop-istart2+1 + end if + end if + ! time for aed + call stdlib_claqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& + z, ldz, n_undeflated, n_deflated,alpha, beta, work, nw, work( nw**2+1 ), nw,work( & + 2*nw**2+1 ), lwork-2*nw**2, rwork, rec,aed_info ) + if ( n_deflated > 0 ) then + istop = istop-n_deflated + ld = 0 + eshift = czero + end if + if ( 100*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & + ) then + ! aed has uncovered many eigenvalues. skip a qz sweep and run + ! aed again. + cycle + end if + ld = ld+1 + ns = min( nshifts, istop-istart2 ) + ns = min( ns, n_undeflated ) + shiftpos = istop-n_deflated-n_undeflated+1 + if ( mod( ld, 6 ) == 0 ) then + ! exceptional shift. chosen for no particularly good reason. + if( ( real( maxit,KIND=sp)*safmin )*abs( a( istop,istop-1 ) ) CLAQZ2: performs AED + + recursive subroutine stdlib_claqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & + ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info ) + ! arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & + rec + complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), & + alpha( * ), beta( * ) + integer(ilp), intent( out ) :: ns, nd, info + complex(sp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) + complex(sp), intent(out) :: work(*) + real(sp), intent(out) :: rwork(*) + + + ! local scalars + integer(ilp) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ctgexc_info, ifst, ilst, & + lworkreq, qz_small_info + real(sp) :: smlnum, ulp, safmin, safmax, c1, tempr + complex(sp) :: s, s1, temp + info = 0 + ! set up deflation window + jw = min( nw, ihi-ilo+1 ) + kwtop = ihi-jw+1 + if ( kwtop == ilo ) then + s = czero + else + s = a( kwtop, kwtop-1 ) + end if + ! determine required workspace + ifst = 1 + ilst = jw + call stdlib_claqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1, rwork, rec+1, qz_small_info ) + lworkreq = int( work( 1 ),KIND=ilp)+2*jw**2 + lworkreq = max( lworkreq, n*nw, 2*nw**2+n ) + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = lworkreq + return + else if ( lwork < lworkreq ) then + info = -26 + end if + if( info/=0 ) then + call stdlib_xerbla( 'CLAQZ2', -info ) + return + end if + ! get machine constants + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp)/ulp ) + if ( ihi == kwtop ) then + ! 1 by 1 deflation window, just try a regular deflation + alpha( kwtop ) = a( kwtop, kwtop ) + beta( kwtop ) = b( kwtop, kwtop ) + ns = 1 + nd = 0 + if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if ( kwtop > ilo ) then + a( kwtop, kwtop-1 ) = czero + end if + end if + end if + ! store window in case of convergence failure + call stdlib_clacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) + call stdlib_clacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2+1 ), jw ) + + ! transform window to real schur form + call stdlib_claset( 'FULL', jw, jw, czero, cone, qc, ldqc ) + call stdlib_claset( 'FULL', jw, jw, czero, cone, zc, ldzc ) + call stdlib_claqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2*jw**2+1 ), lwork-2*jw**2, rwork,rec+1, & + qz_small_info ) + if( qz_small_info /= 0 ) then + ! convergence failure, restore the window and exit + nd = 0 + ns = jw-qz_small_info + call stdlib_clacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) + call stdlib_clacpy( 'ALL', jw, jw, work( jw**2+1 ), jw, b( kwtop,kwtop ), ldb ) + + return + end if + ! deflation detection loop + if ( kwtop == ilo .or. s == czero ) then + kwbot = kwtop-1 + else + kwbot = ihi + k = 1 + k2 = 1 + do while ( k <= jw ) + ! try to deflate eigenvalue + tempr = abs( a( kwbot, kwbot ) ) + if( tempr == zero ) then + tempr = abs( s ) + end if + if ( ( abs( s*qc( 1, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) & + then + ! deflatable + kwbot = kwbot-1 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_ctgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ctgexc_info ) + k2 = k2+1 + end if + k = k+1 + end do + end if + ! store eigenvalues + nd = ihi-kwbot + ns = jw-nd + k = kwtop + do while ( k <= ihi ) + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + k = k+1 + end do + if ( kwtop /= ilo .and. s /= czero ) then + ! reflect spike back, this will create optimally packed bulges + a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1,1:jw-nd ) ) + do k = kwbot-1, kwtop, -1 + call stdlib_clartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) + a( k, kwtop-1 ) = temp + a( k+1, kwtop-1 ) = czero + k2 = max( kwtop, k-1 ) + call stdlib_crot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) + call stdlib_crot( ihi-( k-1 )+1, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) + + call stdlib_crot( jw, qc( 1, k-kwtop+1 ), 1, qc( 1, k+1-kwtop+1 ),1, c1, conjg( & + s1 ) ) + end do + ! chase bulges down + istartm = kwtop + istopm = ihi + k = kwbot-1 + do while ( k >= kwtop ) + ! move bulge down and remove it + do k2 = k, kwbot-1 + call stdlib_claqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, & + ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc ) + end do + k = k-1 + end do + end if + ! apply qc and zc to rest of the matrix + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + if ( istopm-ihi > 0 ) then + call stdlib_cgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), & + lda, czero, work, jw ) + call stdlib_clacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) + call stdlib_cgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), & + ldb, czero, work, jw ) + call stdlib_clacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) + end if + if ( ilq ) then + call stdlib_cgemm( 'N', 'N', n, jw, jw, cone, q( 1, kwtop ), ldq, qc,ldqc, czero, & + work, n ) + call stdlib_clacpy( 'ALL', n, jw, work, n, q( 1, kwtop ), ldq ) + end if + if ( kwtop-1-istartm+1 > 0 ) then + call stdlib_cgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, & + zc, ldzc, czero, work,kwtop-istartm ) + call stdlib_clacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )& + , lda ) + call stdlib_cgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, & + zc, ldzc, czero, work,kwtop-istartm ) + call stdlib_clacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )& + , ldb ) + end if + if ( ilz ) then + call stdlib_cgemm( 'N', 'N', n, jw, jw, cone, z( 1, kwtop ), ldz, zc,ldzc, czero, & + work, n ) + call stdlib_clacpy( 'ALL', n, jw, work, n, z( 1, kwtop ), ldz ) + end if + end subroutine stdlib_claqz2 + + !> DLATRF_AA factorizes a panel of a complex symmetric matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_clasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), h(ldh,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + complex(sp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_csytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j, j:m) - h(j:m, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:m, j) has been initialized to be a(j, j:m) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_cgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( 1, j ), 1,& + cone, h( j, j ), 1 ) + end if + ! copy h(i:m, i) into work + call stdlib_ccopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:m) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:m) stores u(j-1, j:m) + alpha = -a( k-1, j ) + call stdlib_caxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = work( 1 ) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_caxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_icamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:m) with a(i1+1:m, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_cswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + ! swap a(i1, i2+1:m) with a(i2, i2+1:m) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_cswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_csytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j:m, j) - h(j:m, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:m, j) has been initialized to be a(j:m, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_cgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( j, 1 ), & + lda,cone, h( j, j ), 1 ) + end if + ! copy h(j:m, j) into work + call stdlib_ccopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:m, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -a( j, k-1 ) + call stdlib_caxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = work( 1 ) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_caxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_icamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:m, i1) with a(i2, i1+1:m) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_cswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + ! swap a(i2+1:m, i1) with a(i2+1:m, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_cswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j CSYSV computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_csysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*), b(ldb,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_sytrf, lwkopt_sytrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda CSYTRF_AA: computes the factorization of a complex symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a complex symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_csytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(sp), intent(inout) :: a(lda,*) + complex(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + complex(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'CSYTRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_clasyf_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_cswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j, j+1 ) + a( j, j+1 ) = cone + call stdlib_ccopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_cgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_cgemv( 'NO TRANSPOSE', mj, jb+1,-cone, work( j3-j1+1+k1*n ),& + n,a( j1-k2, j3 ), 1,cone, a( j3, j3 ), lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_cgemm + call stdlib_cgemm( 'TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-cone, a( j1-& + k2, j2 ), lda,work( j3-j1+1+k1*n ), n,cone, a( j2, j3 ), lda ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**t using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_clasyf; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_clasyf_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_cswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j+1, j ) + a( j+1, j ) = cone + call stdlib_ccopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_cgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_cgemv( 'NO TRANSPOSE', mj, jb+1,-cone, work( j3-j1+1+k1*n ),& + n,a( j3, j1-k2 ), lda,cone, a( j3, j3 ), 1 ) + j3 = j3 + 1 + end do + ! update off-diagonal block in j2-th block column with stdlib_cgemm + call stdlib_cgemm( 'NO TRANSPOSE', 'TRANSPOSE',n-j3+1, nj, jb+1,-cone, & + work( j3-j1+1+k1*n ), n,a( j2, j1-k2 ), lda,cone, a( j3, j2 ), lda ) + + end do + ! recover t( j+1, j ) + a( j+1, j ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_csytrf_aa + + + +end module stdlib_linalg_lapack_c diff --git a/src/stdlib_linalg_lapack_d.fypp b/src/stdlib_linalg_lapack_d.fypp new file mode 100644 index 000000000..e1eed9a92 --- /dev/null +++ b/src/stdlib_linalg_lapack_d.fypp @@ -0,0 +1,85276 @@ +#:include "common.fypp" +module stdlib_linalg_lapack_d + use stdlib_linalg_constants + use stdlib_linalg_blas + use stdlib_linalg_lapack_aux + use stdlib_linalg_lapack_s + use stdlib_linalg_lapack_c + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_dbbcsd + public :: stdlib_dbdsdc + public :: stdlib_dbdsqr + public :: stdlib_ddisna + public :: stdlib_dgbbrd + public :: stdlib_dgbcon + public :: stdlib_dgbequ + public :: stdlib_dgbequb + public :: stdlib_dgbrfs + public :: stdlib_dgbsv + public :: stdlib_dgbsvx + public :: stdlib_dgbtf2 + public :: stdlib_dgbtrf + public :: stdlib_dgbtrs + public :: stdlib_dgebak + public :: stdlib_dgebal + public :: stdlib_dgebd2 + public :: stdlib_dgebrd + public :: stdlib_dgecon + public :: stdlib_dgeequ + public :: stdlib_dgeequb + public :: stdlib_dgees + public :: stdlib_dgeesx + public :: stdlib_dgeev + public :: stdlib_dgeevx + public :: stdlib_dgehd2 + public :: stdlib_dgehrd + public :: stdlib_dgejsv + public :: stdlib_dgelq + public :: stdlib_dgelq2 + public :: stdlib_dgelqf + public :: stdlib_dgelqt + public :: stdlib_dgelqt3 + public :: stdlib_dgels + public :: stdlib_dgelsd + public :: stdlib_dgelss + public :: stdlib_dgelsy + public :: stdlib_dgemlq + public :: stdlib_dgemlqt + public :: stdlib_dgemqr + public :: stdlib_dgemqrt + public :: stdlib_dgeql2 + public :: stdlib_dgeqlf + public :: stdlib_dgeqp3 + public :: stdlib_dgeqr + public :: stdlib_dgeqr2 + public :: stdlib_dgeqr2p + public :: stdlib_dgeqrf + public :: stdlib_dgeqrfp + public :: stdlib_dgeqrt + public :: stdlib_dgeqrt2 + public :: stdlib_dgeqrt3 + public :: stdlib_dgerfs + public :: stdlib_dgerq2 + public :: stdlib_dgerqf + public :: stdlib_dgesc2 + public :: stdlib_dgesdd + public :: stdlib_dgesv + public :: stdlib_dgesvd + public :: stdlib_dgesvdq + public :: stdlib_dgesvj + public :: stdlib_dgesvx + public :: stdlib_dgetc2 + public :: stdlib_dgetf2 + public :: stdlib_dgetrf + public :: stdlib_dgetrf2 + public :: stdlib_dgetri + public :: stdlib_dgetrs + public :: stdlib_dgetsls + public :: stdlib_dgetsqrhrt + public :: stdlib_dggbak + public :: stdlib_dggbal + public :: stdlib_dgges + public :: stdlib_dgges3 + public :: stdlib_dggesx + public :: stdlib_dggev + public :: stdlib_dggev3 + public :: stdlib_dggevx + public :: stdlib_dggglm + public :: stdlib_dgghd3 + public :: stdlib_dgghrd + public :: stdlib_dgglse + public :: stdlib_dggqrf + public :: stdlib_dggrqf + public :: stdlib_dgsvj0 + public :: stdlib_dgsvj1 + public :: stdlib_dgtcon + public :: stdlib_dgtrfs + public :: stdlib_dgtsv + public :: stdlib_dgtsvx + public :: stdlib_dgttrf + public :: stdlib_dgttrs + public :: stdlib_dgtts2 + public :: stdlib_dhgeqz + public :: stdlib_dhsein + public :: stdlib_dhseqr + public :: stdlib_disnan + public :: stdlib_dla_gbamv + public :: stdlib_dla_gbrcond + public :: stdlib_dla_gbrpvgrw + public :: stdlib_dla_geamv + public :: stdlib_dla_gercond + public :: stdlib_dla_gerpvgrw + public :: stdlib_dla_lin_berr + public :: stdlib_dla_porcond + public :: stdlib_dla_porpvgrw + public :: stdlib_dla_syamv + public :: stdlib_dla_syrcond + public :: stdlib_dla_syrpvgrw + public :: stdlib_dla_wwaddw + public :: stdlib_dlabad + public :: stdlib_dlabrd + public :: stdlib_dlacn2 + public :: stdlib_dlacon + public :: stdlib_dlacpy + public :: stdlib_dladiv + public :: stdlib_dladiv1 + public :: stdlib_dladiv2 + public :: stdlib_dlae2 + public :: stdlib_dlaebz + public :: stdlib_dlaed0 + public :: stdlib_dlaed1 + public :: stdlib_dlaed2 + public :: stdlib_dlaed3 + public :: stdlib_dlaed4 + public :: stdlib_dlaed5 + public :: stdlib_dlaed6 + public :: stdlib_dlaed7 + public :: stdlib_dlaed8 + public :: stdlib_dlaed9 + public :: stdlib_dlaeda + public :: stdlib_dlaein + public :: stdlib_dlaev2 + public :: stdlib_dlaexc + public :: stdlib_dlag2 + public :: stdlib_dlag2s + public :: stdlib_dlags2 + public :: stdlib_dlagtf + public :: stdlib_dlagtm + public :: stdlib_dlagts + public :: stdlib_dlagv2 + public :: stdlib_dlahqr + public :: stdlib_dlahr2 + public :: stdlib_dlaic1 + public :: stdlib_dlaisnan + public :: stdlib_dlaln2 + public :: stdlib_dlals0 + public :: stdlib_dlalsa + public :: stdlib_dlalsd + public :: stdlib_dlamch + public :: stdlib_dlamc3 + public :: stdlib_dlamrg + public :: stdlib_dlamswlq + public :: stdlib_dlamtsqr + public :: stdlib_dlaneg + public :: stdlib_dlangb + public :: stdlib_dlange + public :: stdlib_dlangt + public :: stdlib_dlanhs + public :: stdlib_dlansb + public :: stdlib_dlansf + public :: stdlib_dlansp + public :: stdlib_dlanst + public :: stdlib_dlansy + public :: stdlib_dlantb + public :: stdlib_dlantp + public :: stdlib_dlantr + public :: stdlib_dlanv2 + public :: stdlib_dlaorhr_col_getrfnp + public :: stdlib_dlaorhr_col_getrfnp2 + public :: stdlib_dlapll + public :: stdlib_dlapmr + public :: stdlib_dlapmt + public :: stdlib_dlapy2 + public :: stdlib_dlapy3 + public :: stdlib_dlaqgb + public :: stdlib_dlaqge + public :: stdlib_dlaqp2 + public :: stdlib_dlaqps + public :: stdlib_dlaqr0 + public :: stdlib_dlaqr1 + public :: stdlib_dlaqr2 + public :: stdlib_dlaqr3 + public :: stdlib_dlaqr4 + public :: stdlib_dlaqr5 + public :: stdlib_dlaqsb + public :: stdlib_dlaqsp + public :: stdlib_dlaqsy + public :: stdlib_dlaqtr + public :: stdlib_dlaqz0 + public :: stdlib_dlaqz1 + public :: stdlib_dlaqz2 + public :: stdlib_dlaqz3 + public :: stdlib_dlaqz4 + public :: stdlib_dlar1v + public :: stdlib_dlar2v + public :: stdlib_dlarf + public :: stdlib_dlarfb + public :: stdlib_dlarfb_gett + public :: stdlib_dlarfg + public :: stdlib_dlarfgp + public :: stdlib_dlarft + public :: stdlib_dlarfx + public :: stdlib_dlarfy + public :: stdlib_dlargv + public :: stdlib_dlarnv + public :: stdlib_dlarra + public :: stdlib_dlarrb + public :: stdlib_dlarrc + public :: stdlib_dlarrd + public :: stdlib_dlarre + public :: stdlib_dlarrf + public :: stdlib_dlarrj + public :: stdlib_dlarrk + public :: stdlib_dlarrr + public :: stdlib_dlarrv + public :: stdlib_dlartg + public :: stdlib_dlartgp + public :: stdlib_dlartgs + public :: stdlib_dlartv + public :: stdlib_dlaruv + public :: stdlib_dlarz + public :: stdlib_dlarzb + public :: stdlib_dlarzt + public :: stdlib_dlas2 + public :: stdlib_dlascl + public :: stdlib_dlasd0 + public :: stdlib_dlasd1 + public :: stdlib_dlasd2 + public :: stdlib_dlasd3 + public :: stdlib_dlasd4 + public :: stdlib_dlasd5 + public :: stdlib_dlasd6 + public :: stdlib_dlasd7 + public :: stdlib_dlasd8 + public :: stdlib_dlasda + public :: stdlib_dlasdq + public :: stdlib_dlasdt + public :: stdlib_dlaset + public :: stdlib_dlasq1 + public :: stdlib_dlasq2 + public :: stdlib_dlasq3 + public :: stdlib_dlasq4 + public :: stdlib_dlasq5 + public :: stdlib_dlasq6 + public :: stdlib_dlasr + public :: stdlib_dlasrt + public :: stdlib_dlassq + public :: stdlib_dlasv2 + public :: stdlib_dlaswlq + public :: stdlib_dlaswp + public :: stdlib_dlasy2 + public :: stdlib_dlasyf + public :: stdlib_dlasyf_aa + public :: stdlib_dlasyf_rk + public :: stdlib_dlasyf_rook + public :: stdlib_dlat2s + public :: stdlib_dlatbs + public :: stdlib_dlatdf + public :: stdlib_dlatps + public :: stdlib_dlatrd + public :: stdlib_dlatrs + public :: stdlib_dlatrz + public :: stdlib_dlatsqr + public :: stdlib_dlauu2 + public :: stdlib_dlauum + public :: stdlib_dopgtr + public :: stdlib_dopmtr + public :: stdlib_dorbdb + public :: stdlib_dorbdb1 + public :: stdlib_dorbdb2 + public :: stdlib_dorbdb3 + public :: stdlib_dorbdb4 + public :: stdlib_dorbdb5 + public :: stdlib_dorbdb6 + public :: stdlib_dorcsd + public :: stdlib_dorcsd2by1 + public :: stdlib_dorg2l + public :: stdlib_dorg2r + public :: stdlib_dorgbr + public :: stdlib_dorghr + public :: stdlib_dorgl2 + public :: stdlib_dorglq + public :: stdlib_dorgql + public :: stdlib_dorgqr + public :: stdlib_dorgr2 + public :: stdlib_dorgrq + public :: stdlib_dorgtr + public :: stdlib_dorgtsqr + public :: stdlib_dorgtsqr_row + public :: stdlib_dorhr_col + public :: stdlib_dorm22 + public :: stdlib_dorm2l + public :: stdlib_dorm2r + public :: stdlib_dormbr + public :: stdlib_dormhr + public :: stdlib_dorml2 + public :: stdlib_dormlq + public :: stdlib_dormql + public :: stdlib_dormqr + public :: stdlib_dormr2 + public :: stdlib_dormr3 + public :: stdlib_dormrq + public :: stdlib_dormrz + public :: stdlib_dormtr + public :: stdlib_dpbcon + public :: stdlib_dpbequ + public :: stdlib_dpbrfs + public :: stdlib_dpbstf + public :: stdlib_dpbsv + public :: stdlib_dpbsvx + public :: stdlib_dpbtf2 + public :: stdlib_dpbtrf + public :: stdlib_dpbtrs + public :: stdlib_dpftrf + public :: stdlib_dpftri + public :: stdlib_dpftrs + public :: stdlib_dpocon + public :: stdlib_dpoequ + public :: stdlib_dpoequb + public :: stdlib_dporfs + public :: stdlib_dposv + public :: stdlib_dposvx + public :: stdlib_dpotf2 + public :: stdlib_dpotrf + public :: stdlib_dpotrf2 + public :: stdlib_dpotri + public :: stdlib_dpotrs + public :: stdlib_dppcon + public :: stdlib_dppequ + public :: stdlib_dpprfs + public :: stdlib_dppsv + public :: stdlib_dppsvx + public :: stdlib_dpptrf + public :: stdlib_dpptri + public :: stdlib_dpptrs + public :: stdlib_dpstf2 + public :: stdlib_dpstrf + public :: stdlib_dptcon + public :: stdlib_dpteqr + public :: stdlib_dptrfs + public :: stdlib_dptsv + public :: stdlib_dptsvx + public :: stdlib_dpttrf + public :: stdlib_dpttrs + public :: stdlib_dptts2 + public :: stdlib_drscl + public :: stdlib_dsb2st_kernels + public :: stdlib_dsbev + public :: stdlib_dsbevd + public :: stdlib_dsbevx + public :: stdlib_dsbgst + public :: stdlib_dsbgv + public :: stdlib_dsbgvd + public :: stdlib_dsbgvx + public :: stdlib_dsbtrd + public :: stdlib_dsfrk + public :: stdlib_dsgesv + public :: stdlib_dspcon + public :: stdlib_dspev + public :: stdlib_dspevd + public :: stdlib_dspevx + public :: stdlib_dspgst + public :: stdlib_dspgv + public :: stdlib_dspgvd + public :: stdlib_dspgvx + public :: stdlib_dsposv + public :: stdlib_dsprfs + public :: stdlib_dspsv + public :: stdlib_dspsvx + public :: stdlib_dsptrd + public :: stdlib_dsptrf + public :: stdlib_dsptri + public :: stdlib_dsptrs + public :: stdlib_dstebz + public :: stdlib_dstedc + public :: stdlib_dstegr + public :: stdlib_dstein + public :: stdlib_dstemr + public :: stdlib_dsteqr + public :: stdlib_dsterf + public :: stdlib_dstev + public :: stdlib_dstevd + public :: stdlib_dstevr + public :: stdlib_dstevx + public :: stdlib_dsycon + public :: stdlib_dsycon_rook + public :: stdlib_dsyconv + public :: stdlib_dsyconvf + public :: stdlib_dsyconvf_rook + public :: stdlib_dsyequb + public :: stdlib_dsyev + public :: stdlib_dsyevd + public :: stdlib_dsyevr + public :: stdlib_dsyevx + public :: stdlib_dsygs2 + public :: stdlib_dsygst + public :: stdlib_dsygv + public :: stdlib_dsygvd + public :: stdlib_dsygvx + public :: stdlib_dsyrfs + public :: stdlib_dsysv + public :: stdlib_dsysv_aa + public :: stdlib_dsysv_rk + public :: stdlib_dsysv_rook + public :: stdlib_dsysvx + public :: stdlib_dsyswapr + public :: stdlib_dsytd2 + public :: stdlib_dsytf2 + public :: stdlib_dsytf2_rk + public :: stdlib_dsytf2_rook + public :: stdlib_dsytrd + public :: stdlib_dsytrd_sb2st + public :: stdlib_dsytrd_sy2sb + public :: stdlib_dsytrf + public :: stdlib_dsytrf_aa + public :: stdlib_dsytrf_rk + public :: stdlib_dsytrf_rook + public :: stdlib_dsytri + public :: stdlib_dsytri_rook + public :: stdlib_dsytrs + public :: stdlib_dsytrs2 + public :: stdlib_dsytrs_3 + public :: stdlib_dsytrs_aa + public :: stdlib_dsytrs_rook + public :: stdlib_dtbcon + public :: stdlib_dtbrfs + public :: stdlib_dtbtrs + public :: stdlib_dtfsm + public :: stdlib_dtftri + public :: stdlib_dtfttp + public :: stdlib_dtfttr + public :: stdlib_dtgevc + public :: stdlib_dtgex2 + public :: stdlib_dtgexc + public :: stdlib_dtgsen + public :: stdlib_dtgsja + public :: stdlib_dtgsna + public :: stdlib_dtgsy2 + public :: stdlib_dtgsyl + public :: stdlib_dtpcon + public :: stdlib_dtplqt + public :: stdlib_dtplqt2 + public :: stdlib_dtpmlqt + public :: stdlib_dtpmqrt + public :: stdlib_dtpqrt + public :: stdlib_dtpqrt2 + public :: stdlib_dtprfb + public :: stdlib_dtprfs + public :: stdlib_dtptri + public :: stdlib_dtptrs + public :: stdlib_dtpttf + public :: stdlib_dtpttr + public :: stdlib_dtrcon + public :: stdlib_dtrevc + public :: stdlib_dtrevc3 + public :: stdlib_dtrexc + public :: stdlib_dtrrfs + public :: stdlib_dtrsen + public :: stdlib_dtrsna + public :: stdlib_dtrsyl + public :: stdlib_dtrti2 + public :: stdlib_dtrtri + public :: stdlib_dtrtrs + public :: stdlib_dtrttf + public :: stdlib_dtrttp + public :: stdlib_dtzrzf + public :: stdlib_dzsum1 +#:if WITH_QP + public :: stdlib_dlag2q +#:endif + + ! 64-bit real constants + real(dp), parameter, private :: negone = -1.00_dp + real(dp), parameter, private :: zero = 0.00_dp + real(dp), parameter, private :: half = 0.50_dp + real(dp), parameter, private :: one = 1.00_dp + real(dp), parameter, private :: two = 2.00_dp + real(dp), parameter, private :: three = 3.00_dp + real(dp), parameter, private :: four = 4.00_dp + real(dp), parameter, private :: eight = 8.00_dp + real(dp), parameter, private :: ten = 10.00_dp + + ! 64-bit complex constants + complex(dp), parameter, private :: czero = ( 0.0_dp,0.0_dp) + complex(dp), parameter, private :: chalf = ( 0.5_dp,0.0_dp) + complex(dp), parameter, private :: cone = ( 1.0_dp,0.0_dp) + complex(dp), parameter, private :: cnegone = (-1.0_dp,0.0_dp) + + ! 64-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(dp), parameter, private :: rradix = real(radix(zero),dp) + real(dp), parameter, private :: ulp = epsilon(zero) + real(dp), parameter, private :: eps = ulp*half + real(dp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(dp), parameter, private :: safmax = one/safmin + real(dp), parameter, private :: smlnum = safmin/ulp + real(dp), parameter, private :: bignum = safmax*ulp + real(dp), parameter, private :: rtmin = sqrt(smlnum) + real(dp), parameter, private :: rtmax = sqrt(bignum) + + ! 64-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(dp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(dp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(dp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(dp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> DGBTF2: computes an LU factorization of a real m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_dgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jp, ju, km, kv + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in. + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab0 ) then + ! compute multipliers. + call stdlib_dscal( km, one / ab( kv+1, j ), ab( kv+2, j ), 1 ) + ! update trailing submatrix within the band. + if( ju>j )call stdlib_dger( km, ju-j, -one, ab( kv+2, j ), 1,ab( kv, j+1 ), & + ldab-1, ab( kv+1, j+1 ),ldab-1 ) + end if + else + ! if pivot is zero, set info to the index of the pivot + ! unless a zero pivot has already been found. + if( info==0 )info = j + end if + end do loop_40 + return + end subroutine stdlib_dgbtf2 + + !> DGBTRS: solves a system of linear equations + !> A * X = B or A**T * X = B + !> with a general band matrix A using the LU factorization computed + !> by DGBTRF. + + pure subroutine stdlib_dgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, notran + integer(ilp) :: i, j, kd, l, lm + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab<( 2*kl+ku+1 ) ) then + info = -7 + else if( ldb0 + if( notran ) then + ! solve a*x = b. + ! solve l*x = b, overwriting b with x. + ! l is represented as a product of permutations and unit lower + ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1), + ! where each transformation l(i) is a rank-one modification of + ! the identity matrix. + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + l = ipiv( j ) + if( l/=j )call stdlib_dswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + call stdlib_dger( lm, nrhs, -one, ab( kd+1, j ), 1, b( j, 1 ),ldb, b( j+1, 1 )& + , ldb ) + end do + end if + do i = 1, nrhs + ! solve u*x = b, overwriting b with x. + call stdlib_dtbsv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, kl+ku,ab, ldab, b( 1, & + i ), 1 ) + end do + else + ! solve a**t*x = b. + do i = 1, nrhs + ! solve u**t*x = b, overwriting b with x. + call stdlib_dtbsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, kl+ku, ab,ldab, b( 1, i )& + , 1 ) + end do + ! solve l**t*x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_dgemv( 'TRANSPOSE', lm, nrhs, -one, b( j+1, 1 ),ldb, ab( kd+1, j )& + , 1, one, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_dswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_dgbtrs + + !> DGEBAK: forms the right or left eigenvectors of a real general matrix + !> by backward transformation on the computed eigenvectors of the + !> balanced matrix output by DGEBAL. + + pure subroutine stdlib_dgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: scale(*) + real(dp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, ii, k + real(dp) :: s + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! decode and test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( m<0 ) then + info = -7 + else if( ldv=ilo .and. i<=ihi )cycle loop_40 + if( i=ilo .and. i<=ihi )cycle loop_50 + if( i DGGBAK: forms the right or left eigenvectors of a real generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> DGGBAL. + + pure subroutine stdlib_dggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: lscale(*), rscale(*) + real(dp), intent(inout) :: v(ldv,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, k + ! Intrinsic Functions + intrinsic :: max,int + ! Executable Statements + ! test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( n==0 .and. ihi==0 .and. ilo/=1 ) then + info = -4 + else if( n>0 .and. ( ihimax( 1, n ) ) )then + info = -5 + else if( n==0 .and. ilo==1 .and. ihi/=0 ) then + info = -5 + else if( m<0 ) then + info = -8 + else if( ldv DGTSV: solves the equation + !> A*X = B, + !> where A is an n by n tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T*X = B may be solved by interchanging the + !> order of the arguments DU and DL. + + pure subroutine stdlib_dgtsv( n, nrhs, dl, d, du, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: b(ldb,*), d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: fact, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb=abs( dl( i ) ) ) then + ! no row interchange required + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + b( i+1, 1 ) = b( i+1, 1 ) - fact*b( i, 1 ) + else + info = i + return + end if + dl( i ) = zero + else + ! interchange rows i and i+1 + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + dl( i ) = du( i+1 ) + du( i+1 ) = -fact*dl( i ) + du( i ) = temp + temp = b( i, 1 ) + b( i, 1 ) = b( i+1, 1 ) + b( i+1, 1 ) = temp - fact*b( i+1, 1 ) + end if + end do loop_10 + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + b( i+1, 1 ) = b( i+1, 1 ) - fact*b( i, 1 ) + else + info = i + return + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + du( i ) = temp + temp = b( i, 1 ) + b( i, 1 ) = b( i+1, 1 ) + b( i+1, 1 ) = temp - fact*b( i+1, 1 ) + end if + end if + if( d( n )==zero ) then + info = n + return + end if + else + loop_40: do i = 1, n - 2 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + ! no row interchange required + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + do j = 1, nrhs + b( i+1, j ) = b( i+1, j ) - fact*b( i, j ) + end do + else + info = i + return + end if + dl( i ) = zero + else + ! interchange rows i and i+1 + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + dl( i ) = du( i+1 ) + du( i+1 ) = -fact*dl( i ) + du( i ) = temp + do j = 1, nrhs + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - fact*b( i+1, j ) + end do + end if + end do loop_40 + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + do j = 1, nrhs + b( i+1, j ) = b( i+1, j ) - fact*b( i, j ) + end do + else + info = i + return + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + du( i ) = temp + do j = 1, nrhs + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - fact*b( i+1, j ) + end do + end if + end if + if( d( n )==zero ) then + info = n + return + end if + end if + ! back solve with the matrix u from the factorization. + if( nrhs<=2 ) then + j = 1 + 70 continue + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) / d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-dl( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-dl( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + return + end subroutine stdlib_dgtsv + + !> DGTTRF: computes an LU factorization of a real tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + + pure subroutine stdlib_dgttrf( n, dl, d, du, du2, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: d(*), dl(*), du(*) + real(dp), intent(out) :: du2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: fact, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DGTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! initialize ipiv(i) = i and du2(i) = 0 + do i = 1, n + ipiv( i ) = i + end do + do i = 1, n - 2 + du2( i ) = zero + end do + do i = 1, n - 2 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + ! no row interchange required, eliminate dl(i) + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + ! interchange rows i and i+1, eliminate dl(i) + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + du2( i ) = du( i+1 ) + du( i+1 ) = -fact*du( i+1 ) + ipiv( i ) = i + 1 + end if + end do + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + ipiv( i ) = i + 1 + end if + end if + ! check for a zero on the diagonal of u. + do i = 1, n + if( d( i )==zero ) then + info = i + go to 50 + end if + end do + 50 continue + return + end subroutine stdlib_dgttrf + + !> DGTTS2: solves one of the systems of equations + !> A*X = B or A**T*X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by DGTTRF. + + pure subroutine stdlib_dgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: itrans, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ip, j + real(dp) :: temp + ! Executable Statements + ! quick return if possible + if( n==0 .or. nrhs==0 )return + if( itrans==0 ) then + ! solve a*x = b using the lu factorization of a, + ! overwriting each right hand side vector with its solution. + if( nrhs<=1 ) then + j = 1 + 10 continue + ! solve l*x = b. + do i = 1, n - 1 + ip = ipiv( i ) + temp = b( i+1-ip+i, j ) - dl( i )*b( ip, j ) + b( i, j ) = b( ip, j ) + b( i+1, j ) = temp + end do + ! solve u*x = b. + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + else + ! solve a**t * x = b. + if( nrhs<=1 ) then + ! solve u**t*x = b. + j = 1 + 70 continue + b( 1, j ) = b( 1, j ) / d( 1 ) + if( n>1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d( i & + ) + end do + ! solve l**t*x = b. + do i = n - 1, 1, -1 + ip = ipiv( i ) + temp = b( i, j ) - dl( i )*b( i+1, j ) + b( i, j ) = b( ip, j ) + b( ip, j ) = temp + end do + if( j1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d(& + i ) + end do + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + end do + end if + end if + end subroutine stdlib_dgtts2 + + !> DLA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(dp) function stdlib_dla_gbrpvgrw( n, kl, ku, ncols, ab,ldab, afb, ldafb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, kl, ku, ncols, ldab, ldafb + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*), afb(ldafb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j, kd + real(dp) :: amax, umax, rpvgrw + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + rpvgrw = one + kd = ku + 1 + do j = 1, ncols + amax = zero + umax = zero + do i = max( j-ku, 1 ), min( j+kl, n ) + amax = max( abs( ab( kd+i-j, j)), amax ) + end do + do i = max( j-ku, 1 ), j + umax = max( abs( afb( kd+i-j, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_dla_gbrpvgrw = rpvgrw + end function stdlib_dla_gbrpvgrw + + !> DLA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(dp) function stdlib_dla_gerpvgrw( n, ncols, a, lda, af,ldaf ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, ncols, lda, ldaf + ! Array Arguments + real(dp), intent(in) :: a(lda,*), af(ldaf,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: amax, umax, rpvgrw + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + rpvgrw = one + do j = 1, ncols + amax = zero + umax = zero + do i = 1, n + amax = max( abs( a( i, j ) ), amax ) + end do + do i = 1, j + umax = max( abs( af( i, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_dla_gerpvgrw = rpvgrw + end function stdlib_dla_gerpvgrw + + !> DLA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + + pure subroutine stdlib_dla_wwaddw( n, x, y, w ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: x(*), y(*) + real(dp), intent(in) :: w(*) + ! ===================================================================== + ! Local Scalars + real(dp) :: s + integer(ilp) :: i + ! Executable Statements + do 10 i = 1, n + s = x(i) + w(i) + s = (s + s) - s + y(i) = ((x(i) - s) + w(i)) + y(i) + x(i) = s + 10 continue + return + end subroutine stdlib_dla_wwaddw + + !> DLABAD: takes as input the values computed by DLAMCH for underflow and + !> overflow, and returns the square root of each of these values if the + !> log of LARGE is sufficiently large. This subroutine is intended to + !> identify machines with a large exponent range, such as the Crays, and + !> redefine the underflow and overflow limits to be the square roots of + !> the values computed by DLAMCH. This subroutine is needed because + !> DLAMCH does not compensate for poor arithmetic in the upper half of + !> the exponent range, as is found on a Cray. + + pure subroutine stdlib_dlabad( small, large ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(inout) :: large, small + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: log10,sqrt + ! Executable Statements + ! if it looks like we're on a cray, take the square root of + ! small and large to avoid overflow and underflow problems. + if( log10( large )>2000._dp ) then + small = sqrt( small ) + large = sqrt( large ) + end if + return + end subroutine stdlib_dlabad + + !> DLACN2: estimates the 1-norm of a square, real matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + pure subroutine stdlib_dlacn2( n, v, x, isgn, est, kase, isave ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(out) :: isgn(*) + integer(ilp), intent(inout) :: isave(3) + real(dp), intent(out) :: v(*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + ! Local Scalars + integer(ilp) :: i, jlast + real(dp) :: altsgn, estold, temp, xs + ! Intrinsic Functions + intrinsic :: abs,real,nint + ! Executable Statements + if( kase==0 ) then + do i = 1, n + x( i ) = one / real( n,KIND=dp) + end do + kase = 1 + isave( 1 ) = 1 + return + end if + go to ( 20, 40, 70, 110, 140 )isave( 1 ) + ! ................ entry (isave( 1 ) = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 150 + end if + est = stdlib_dasum( n, x, 1 ) + do i = 1, n + if( x(i)>=zero ) then + x(i) = one + else + x(i) = -one + end if + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + isave( 1 ) = 2 + return + ! ................ entry (isave( 1 ) = 2) + ! first iteration. x has been overwritten by transpose(a)*x. + 40 continue + isave( 2 ) = stdlib_idamax( n, x, 1 ) + isave( 3 ) = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = zero + end do + x( isave( 2 ) ) = one + kase = 1 + isave( 1 ) = 3 + return + ! ................ entry (isave( 1 ) = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_dcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_dasum( n, v, 1 ) + do i = 1, n + if( x(i)>=zero ) then + xs = one + else + xs = -one + end if + if( nint( xs,KIND=ilp)/=isgn( i ) )go to 90 + end do + ! repeated sign vector detected, hence algorithm has converged. + go to 120 + 90 continue + ! test for cycling. + if( est<=estold )go to 120 + do i = 1, n + if( x(i)>=zero ) then + x(i) = one + else + x(i) = -one + end if + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + isave( 1 ) = 4 + return + ! ................ entry (isave( 1 ) = 4) + ! x has been overwritten by transpose(a)*x. + 110 continue + jlast = isave( 2 ) + isave( 2 ) = stdlib_idamax( n, x, 1 ) + if( ( x( jlast )/=abs( x( isave( 2 ) ) ) ) .and.( isave( 3 )est ) then + call stdlib_dcopy( n, x, 1, v, 1 ) + est = temp + end if + 150 continue + kase = 0 + return + end subroutine stdlib_dlacn2 + + !> DLACON: estimates the 1-norm of a square, real matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + subroutine stdlib_dlacon( n, v, x, isgn, est, kase ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(out) :: isgn(*) + real(dp), intent(out) :: v(*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + ! Local Scalars + integer(ilp) :: i, iter, j, jlast, jump + real(dp) :: altsgn, estold, temp + ! Intrinsic Functions + intrinsic :: abs,real,nint,sign + ! Save Statement + save + ! Executable Statements + if( kase==0 ) then + do i = 1, n + x( i ) = one / real( n,KIND=dp) + end do + kase = 1 + jump = 1 + return + end if + go to ( 20, 40, 70, 110, 140 )jump + ! ................ entry (jump = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 150 + end if + est = stdlib_dasum( n, x, 1 ) + do i = 1, n + x( i ) = sign( one, x( i ) ) + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + jump = 2 + return + ! ................ entry (jump = 2) + ! first iteration. x has been overwritten by transpose(a)*x. + 40 continue + j = stdlib_idamax( n, x, 1 ) + iter = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = zero + end do + x( j ) = one + kase = 1 + jump = 3 + return + ! ................ entry (jump = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_dcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_dasum( n, v, 1 ) + do i = 1, n + if( nint( sign( one, x( i ) ),KIND=ilp)/=isgn( i ) )go to 90 + end do + ! repeated sign vector detected, hence algorithm has converged. + go to 120 + 90 continue + ! test for cycling. + if( est<=estold )go to 120 + do i = 1, n + x( i ) = sign( one, x( i ) ) + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + jump = 4 + return + ! ................ entry (jump = 4) + ! x has been overwritten by transpose(a)*x. + 110 continue + jlast = j + j = stdlib_idamax( n, x, 1 ) + if( ( x( jlast )/=abs( x( j ) ) ) .and. ( iterest ) then + call stdlib_dcopy( n, x, 1, v, 1 ) + est = temp + end if + 150 continue + kase = 0 + return + end subroutine stdlib_dlacon + + !> DLACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + + pure subroutine stdlib_dlacpy( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_dlacpy + + + pure real(dp) function stdlib_dladiv2( a, b, c, d, r, t ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: a, b, c, d, r, t + ! ===================================================================== + + ! Local Scalars + real(dp) :: br + ! Executable Statements + if( r/=zero ) then + br = b * r + if( br/=zero ) then + stdlib_dladiv2 = (a + br) * t + else + stdlib_dladiv2 = a * t + (b * t) * r + end if + else + stdlib_dladiv2 = (a + d * (b / c)) * t + end if + return + end function stdlib_dladiv2 + + !> DLAE2: computes the eigenvalues of a 2-by-2 symmetric matrix + !> [ A B ] + !> [ B C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, and RT2 + !> is the eigenvalue of smaller absolute value. + + pure subroutine stdlib_dlae2( a, b, c, rt1, rt2 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: a, b, c + real(dp), intent(out) :: rt1, rt2 + ! ===================================================================== + + + + + ! Local Scalars + real(dp) :: ab, acmn, acmx, adf, df, rt, sm, tb + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ! compute the eigenvalues + sm = a + c + df = a - c + adf = abs( df ) + tb = b + b + ab = abs( tb ) + if( abs( a )>abs( c ) ) then + acmx = a + acmn = c + else + acmx = c + acmn = a + end if + if( adf>ab ) then + rt = adf*sqrt( one+( ab / adf )**2 ) + else if( adfzero ) then + rt1 = half*( sm+rt ) + ! order of execution important. + ! to get fully accurate smaller eigenvalue, + ! next line needs to be executed in higher precision. + rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b + else + ! includes case rt1 = rt2 = 0 + rt1 = half*rt + rt2 = -half*rt + end if + return + end subroutine stdlib_dlae2 + + !> DLAEBZ: contains the iteration loops which compute and use the + !> function N(w), which is the count of eigenvalues of a symmetric + !> tridiagonal matrix T less than or equal to its argument w. It + !> performs a choice of two types of loops: + !> IJOB=1, followed by + !> IJOB=2: It takes as input a list of intervals and returns a list of + !> sufficiently small intervals whose union contains the same + !> eigenvalues as the union of the original intervals. + !> The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. + !> The output interval (AB(j,1),AB(j,2)] will contain + !> eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. + !> IJOB=3: It performs a binary search in each input interval + !> (AB(j,1),AB(j,2)] for a point w(j) such that + !> N(w(j))=NVAL(j), and uses C(j) as the starting point of + !> the search. If such a w(j) is found, then on output + !> AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output + !> (AB(j,1),AB(j,2)] will be a small interval containing the + !> point where N(w) jumps through NVAL(j), unless that point + !> lies outside the initial interval. + !> Note that the intervals are in all cases half-open intervals, + !> i.e., of the form (a,b] , which includes b but not a . + !> To avoid underflow, the matrix should be scaled so that its largest + !> element is no greater than overflow**(1/2) * underflow**(1/4) + !> in absolute value. To assure the most accurate computation + !> of small eigenvalues, the matrix should be scaled to be + !> not much smaller than that, either. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966 + !> Note: the arguments are, in general, *not* checked for unreasonable + !> values. + + pure subroutine stdlib_dlaebz( ijob, nitmax, n, mmax, minp, nbmin, abstol,reltol, pivmin, d, & + e, e2, nval, ab, c, mout,nab, work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ijob, minp, mmax, n, nbmin, nitmax + integer(ilp), intent(out) :: info, mout + real(dp), intent(in) :: abstol, pivmin, reltol + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(inout) :: nab(mmax,*), nval(*) + real(dp), intent(inout) :: ab(mmax,*), c(*) + real(dp), intent(in) :: d(*), e(*), e2(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: itmp1, itmp2, j, ji, jit, jp, kf, kfnew, kl, klnew + real(dp) :: tmp1, tmp2 + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! check for errors + info = 0 + if( ijob<1 .or. ijob>3 ) then + info = -1 + return + end if + ! initialize nab + if( ijob==1 ) then + ! compute the number of eigenvalues in the initial intervals. + mout = 0 + do ji = 1, minp + do jp = 1, 2 + tmp1 = d( 1 ) - ab( ji, jp ) + if( abs( tmp1 )=nbmin .and. nbmin>0 ) then + ! begin of parallel version of the loop + do ji = kf, kl + ! compute n(c), the number of eigenvalues less than c + work( ji ) = d( 1 ) - c( ji ) + iwork( ji ) = 0 + if( work( ji )<=pivmin ) then + iwork( ji ) = 1 + work( ji ) = min( work( ji ), -pivmin ) + end if + do j = 2, n + work( ji ) = d( j ) - e2( j-1 ) / work( ji ) - c( ji ) + if( work( ji )<=pivmin ) then + iwork( ji ) = iwork( ji ) + 1 + work( ji ) = min( work( ji ), -pivmin ) + end if + end do + end do + if( ijob<=2 ) then + ! ijob=2: choose all intervals containing eigenvalues. + klnew = kl + loop_70: do ji = kf, kl + ! insure that n(w) is monotone + iwork( ji ) = min( nab( ji, 2 ),max( nab( ji, 1 ), iwork( ji ) ) ) + ! update the queue -- add intervals if both halves + ! contain eigenvalues. + if( iwork( ji )==nab( ji, 2 ) ) then + ! no eigenvalue in the upper interval: + ! just use the lower interval. + ab( ji, 2 ) = c( ji ) + else if( iwork( ji )==nab( ji, 1 ) ) then + ! no eigenvalue in the lower interval: + ! just use the upper interval. + ab( ji, 1 ) = c( ji ) + else + klnew = klnew + 1 + if( klnew<=mmax ) then + ! eigenvalue in both intervals -- add upper to + ! queue. + ab( klnew, 2 ) = ab( ji, 2 ) + nab( klnew, 2 ) = nab( ji, 2 ) + ab( klnew, 1 ) = c( ji ) + nab( klnew, 1 ) = iwork( ji ) + ab( ji, 2 ) = c( ji ) + nab( ji, 2 ) = iwork( ji ) + else + info = mmax + 1 + end if + end if + end do loop_70 + if( info/=0 )return + kl = klnew + else + ! ijob=3: binary search. keep only the interval containing + ! w s.t. n(w) = nval + do ji = kf, kl + if( iwork( ji )<=nval( ji ) ) then + ab( ji, 1 ) = c( ji ) + nab( ji, 1 ) = iwork( ji ) + end if + if( iwork( ji )>=nval( ji ) ) then + ab( ji, 2 ) = c( ji ) + nab( ji, 2 ) = iwork( ji ) + end if + end do + end if + else + ! end of parallel version of the loop + ! begin of serial version of the loop + klnew = kl + loop_100: do ji = kf, kl + ! compute n(w), the number of eigenvalues less than w + tmp1 = c( ji ) + tmp2 = d( 1 ) - tmp1 + itmp1 = 0 + if( tmp2<=pivmin ) then + itmp1 = 1 + tmp2 = min( tmp2, -pivmin ) + end if + do j = 2, n + tmp2 = d( j ) - e2( j-1 ) / tmp2 - tmp1 + if( tmp2<=pivmin ) then + itmp1 = itmp1 + 1 + tmp2 = min( tmp2, -pivmin ) + end if + end do + if( ijob<=2 ) then + ! ijob=2: choose all intervals containing eigenvalues. + ! insure that n(w) is monotone + itmp1 = min( nab( ji, 2 ),max( nab( ji, 1 ), itmp1 ) ) + ! update the queue -- add intervals if both halves + ! contain eigenvalues. + if( itmp1==nab( ji, 2 ) ) then + ! no eigenvalue in the upper interval: + ! just use the lower interval. + ab( ji, 2 ) = tmp1 + else if( itmp1==nab( ji, 1 ) ) then + ! no eigenvalue in the lower interval: + ! just use the upper interval. + ab( ji, 1 ) = tmp1 + else if( klnew=nval( ji ) ) then + ab( ji, 2 ) = tmp1 + nab( ji, 2 ) = itmp1 + end if + end if + end do loop_100 + kl = klnew + end if + ! check for convergence + kfnew = kf + loop_110: do ji = kf, kl + tmp1 = abs( ab( ji, 2 )-ab( ji, 1 ) ) + tmp2 = max( abs( ab( ji, 2 ) ), abs( ab( ji, 1 ) ) ) + if( tmp1=nab( ji, 2 ) ) & + then + ! converged -- swap with position kfnew, + ! then increment kfnew + if( ji>kfnew ) then + tmp1 = ab( ji, 1 ) + tmp2 = ab( ji, 2 ) + itmp1 = nab( ji, 1 ) + itmp2 = nab( ji, 2 ) + ab( ji, 1 ) = ab( kfnew, 1 ) + ab( ji, 2 ) = ab( kfnew, 2 ) + nab( ji, 1 ) = nab( kfnew, 1 ) + nab( ji, 2 ) = nab( kfnew, 2 ) + ab( kfnew, 1 ) = tmp1 + ab( kfnew, 2 ) = tmp2 + nab( kfnew, 1 ) = itmp1 + nab( kfnew, 2 ) = itmp2 + if( ijob==3 ) then + itmp1 = nval( ji ) + nval( ji ) = nval( kfnew ) + nval( kfnew ) = itmp1 + end if + end if + kfnew = kfnew + 1 + end if + end do loop_110 + kf = kfnew + ! choose midpoints + do ji = kf, kl + c( ji ) = half*( ab( ji, 1 )+ab( ji, 2 ) ) + end do + ! if no more intervals to refine, quit. + if( kf>kl )go to 140 + end do loop_130 + ! converged + 140 continue + info = max( kl+1-kf, 0 ) + mout = kl + return + end subroutine stdlib_dlaebz + + !> This subroutine computes the I-th eigenvalue of a symmetric rank-one + !> modification of a 2-by-2 diagonal matrix + !> diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal elements in the array D are assumed to satisfy + !> D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + + pure subroutine stdlib_dlaed5( i, d, z, delta, rho, dlam ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i + real(dp), intent(out) :: dlam + real(dp), intent(in) :: rho + ! Array Arguments + real(dp), intent(in) :: d(2), z(2) + real(dp), intent(out) :: delta(2) + ! ===================================================================== + + ! Local Scalars + real(dp) :: b, c, del, tau, temp, w + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + del = d( 2 ) - d( 1 ) + if( i==1 ) then + w = one + two*rho*( z( 2 )*z( 2 )-z( 1 )*z( 1 ) ) / del + if( w>zero ) then + b = del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 1 )*z( 1 )*del + ! b > zero, always + tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) + dlam = d( 1 ) + tau + delta( 1 ) = -z( 1 ) / tau + delta( 2 ) = z( 2 ) / ( del-tau ) + else + b = -del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*del + if( b>zero ) then + tau = -two*c / ( b+sqrt( b*b+four*c ) ) + else + tau = ( b-sqrt( b*b+four*c ) ) / two + end if + dlam = d( 2 ) + tau + delta( 1 ) = -z( 1 ) / ( del+tau ) + delta( 2 ) = -z( 2 ) / tau + end if + temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + delta( 1 ) = delta( 1 ) / temp + delta( 2 ) = delta( 2 ) / temp + else + ! now i=2 + b = -del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*del + if( b>zero ) then + tau = ( b+sqrt( b*b+four*c ) ) / two + else + tau = two*c / ( -b+sqrt( b*b+four*c ) ) + end if + dlam = d( 2 ) + tau + delta( 1 ) = -z( 1 ) / ( del+tau ) + delta( 2 ) = -z( 2 ) / tau + temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + delta( 1 ) = delta( 1 ) / temp + delta( 2 ) = delta( 2 ) / temp + end if + return + end subroutine stdlib_dlaed5 + + !> DLAEDA: computes the Z vector corresponding to the merge step in the + !> CURLVLth step of the merge process with TLVLS steps for the CURPBMth + !> problem. + + pure subroutine stdlib_dlaeda( n, tlvls, curlvl, curpbm, prmptr, perm, givptr,givcol, givnum,& + q, qptr, z, ztemp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, n, tlvls + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + real(dp), intent(in) :: givnum(2,*), q(*) + real(dp), intent(out) :: z(*), ztemp(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: bsiz1, bsiz2, curr, i, k, mid, psiz1, psiz2, ptr, zptr1 + ! Intrinsic Functions + intrinsic :: real,int,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAEDA', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine location of first number in second half. + mid = n / 2 + 1 + ! gather last/first rows of appropriate eigenblocks into center of z + ptr = 1 + ! determine location of lowest level subproblem in the full storage + ! scheme + curr = ptr + curpbm*2**curlvl + 2**( curlvl-1 ) - 1 + ! determine size of these matrices. we add half to the value of + ! the sqrt in case the machine underestimates one of these square + ! roots. + bsiz1 = int( half+sqrt( real( qptr( curr+1 )-qptr( curr ),KIND=dp) ),KIND=ilp) + bsiz2 = int( half+sqrt( real( qptr( curr+2 )-qptr( curr+1 ),KIND=dp) ),KIND=ilp) + + do k = 1, mid - bsiz1 - 1 + z( k ) = zero + end do + call stdlib_dcopy( bsiz1, q( qptr( curr )+bsiz1-1 ), bsiz1,z( mid-bsiz1 ), 1 ) + call stdlib_dcopy( bsiz2, q( qptr( curr+1 ) ), bsiz2, z( mid ), 1 ) + do k = mid + bsiz2, n + z( k ) = zero + end do + ! loop through remaining levels 1 -> curlvl applying the givens + ! rotations and permutation and then multiplying the center matrices + ! against the current z. + ptr = 2**tlvls + 1 + loop_70: do k = 1, curlvl - 1 + curr = ptr + curpbm*2**( curlvl-k ) + 2**( curlvl-k-1 ) - 1 + psiz1 = prmptr( curr+1 ) - prmptr( curr ) + psiz2 = prmptr( curr+2 ) - prmptr( curr+1 ) + zptr1 = mid - psiz1 + ! apply givens at curr and curr+1 + do i = givptr( curr ), givptr( curr+1 ) - 1 + call stdlib_drot( 1, z( zptr1+givcol( 1, i )-1 ), 1,z( zptr1+givcol( 2, i )-1 ), & + 1, givnum( 1, i ),givnum( 2, i ) ) + end do + do i = givptr( curr+1 ), givptr( curr+2 ) - 1 + call stdlib_drot( 1, z( mid-1+givcol( 1, i ) ), 1,z( mid-1+givcol( 2, i ) ), 1, & + givnum( 1, i ),givnum( 2, i ) ) + end do + psiz1 = prmptr( curr+1 ) - prmptr( curr ) + psiz2 = prmptr( curr+2 ) - prmptr( curr+1 ) + do i = 0, psiz1 - 1 + ztemp( i+1 ) = z( zptr1+perm( prmptr( curr )+i )-1 ) + end do + do i = 0, psiz2 - 1 + ztemp( psiz1+i+1 ) = z( mid+perm( prmptr( curr+1 )+i )-1 ) + end do + ! multiply blocks at curr and curr+1 + ! determine size of these matrices. we add half to the value of + ! the sqrt in case the machine underestimates one of these + ! square roots. + bsiz1 = int( half+sqrt( real( qptr( curr+1 )-qptr( curr ),KIND=dp) ),KIND=ilp) + + bsiz2 = int( half+sqrt( real( qptr( curr+2 )-qptr( curr+1 ),KIND=dp) ),KIND=ilp) + + if( bsiz1>0 ) then + call stdlib_dgemv( 'T', bsiz1, bsiz1, one, q( qptr( curr ) ),bsiz1, ztemp( 1 ), & + 1, zero, z( zptr1 ), 1 ) + end if + call stdlib_dcopy( psiz1-bsiz1, ztemp( bsiz1+1 ), 1, z( zptr1+bsiz1 ),1 ) + if( bsiz2>0 ) then + call stdlib_dgemv( 'T', bsiz2, bsiz2, one, q( qptr( curr+1 ) ),bsiz2, ztemp( & + psiz1+1 ), 1, zero, z( mid ), 1 ) + end if + call stdlib_dcopy( psiz2-bsiz2, ztemp( psiz1+bsiz2+1 ), 1,z( mid+bsiz2 ), 1 ) + + ptr = ptr + 2**( tlvls-k ) + end do loop_70 + return + end subroutine stdlib_dlaeda + + !> DLAEV2: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> [ A B ] + !> [ B C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + !> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + !> eigenvector for RT1, giving the decomposition + !> [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + !> [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + pure subroutine stdlib_dlaev2( a, b, c, rt1, rt2, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: a, b, c + real(dp), intent(out) :: cs1, rt1, rt2, sn1 + ! ===================================================================== + + + + + ! Local Scalars + integer(ilp) :: sgn1, sgn2 + real(dp) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ! compute the eigenvalues + sm = a + c + df = a - c + adf = abs( df ) + tb = b + b + ab = abs( tb ) + if( abs( a )>abs( c ) ) then + acmx = a + acmn = c + else + acmx = c + acmn = a + end if + if( adf>ab ) then + rt = adf*sqrt( one+( ab / adf )**2 ) + else if( adfzero ) then + rt1 = half*( sm+rt ) + sgn1 = 1 + ! order of execution important. + ! to get fully accurate smaller eigenvalue, + ! next line needs to be executed in higher precision. + rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b + else + ! includes case rt1 = rt2 = 0 + rt1 = half*rt + rt2 = -half*rt + sgn1 = 1 + end if + ! compute the eigenvector + if( df>=zero ) then + cs = df + rt + sgn2 = 1 + else + cs = df - rt + sgn2 = -1 + end if + acs = abs( cs ) + if( acs>ab ) then + ct = -tb / cs + sn1 = one / sqrt( one+ct*ct ) + cs1 = ct*sn1 + else + if( ab==zero ) then + cs1 = one + sn1 = zero + else + tn = -cs / tb + cs1 = one / sqrt( one+tn*tn ) + sn1 = tn*cs1 + end if + end if + if( sgn1==sgn2 ) then + tn = cs1 + cs1 = -sn1 + sn1 = tn + end if + return + end subroutine stdlib_dlaev2 + + !> DLAG2: computes the eigenvalues of a 2 x 2 generalized eigenvalue + !> problem A - w B, with scaling as necessary to avoid over-/underflow. + !> The scaling factor "s" results in a modified eigenvalue equation + !> s A - w B + !> where s is a non-negative scaling factor chosen so that w, w B, + !> and s A do not overflow and, if possible, do not underflow, either. + + pure subroutine stdlib_dlag2( a, lda, b, ldb, safmin, scale1, scale2, wr1,wr2, wi ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb + real(dp), intent(in) :: safmin + real(dp), intent(out) :: scale1, scale2, wi, wr1, wr2 + ! Array Arguments + real(dp), intent(in) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: fuzzy1 = one+1.0e-5_dp + + + + ! Local Scalars + real(dp) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, & + binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r,& + rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, & + wsmall + ! Intrinsic Functions + intrinsic :: abs,max,min,sign,sqrt + ! Executable Statements + rtmin = sqrt( safmin ) + rtmax = one / rtmin + safmax = one / safmin + ! scale a + anorm = max( abs( a( 1, 1 ) )+abs( a( 2, 1 ) ),abs( a( 1, 2 ) )+abs( a( 2, 2 ) ), & + safmin ) + ascale = one / anorm + a11 = ascale*a( 1, 1 ) + a21 = ascale*a( 2, 1 ) + a12 = ascale*a( 1, 2 ) + a22 = ascale*a( 2, 2 ) + ! perturb b if necessary to insure non-singularity + b11 = b( 1, 1 ) + b12 = b( 1, 2 ) + b22 = b( 2, 2 ) + bmin = rtmin*max( abs( b11 ), abs( b12 ), abs( b22 ), rtmin ) + if( abs( b11 )=one ) then + discr = ( rtmin*pp )**2 + qq*safmin + r = sqrt( abs( discr ) )*rtmax + else + if( pp**2+abs( qq )<=safmin ) then + discr = ( rtmax*pp )**2 + qq*safmax + r = sqrt( abs( discr ) )*rtmin + else + discr = pp**2 + qq + r = sqrt( abs( discr ) ) + end if + end if + ! note: the test of r in the following if is to cover the case when + ! discr is small and negative and is flushed to zero during + ! the calculation of r. on machines which have a consistent + ! flush-to-zero threshold and handle numbers above that + ! threshold correctly, it would not be necessary. + if( discr>=zero .or. r==zero ) then + sum = pp + sign( r, pp ) + diff = pp - sign( r, pp ) + wbig = shift + sum + ! compute smaller eigenvalue + wsmall = shift + diff + if( half*abs( wbig )>max( abs( wsmall ), safmin ) ) then + wdet = ( a11*a22-a12*a21 )*( binv11*binv22 ) + wsmall = wdet / wbig + end if + ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1) + ! for wr1. + if( pp>abi22 ) then + wr1 = min( wbig, wsmall ) + wr2 = max( wbig, wsmall ) + else + wr1 = max( wbig, wsmall ) + wr2 = min( wbig, wsmall ) + end if + wi = zero + else + ! complex eigenvalues + wr1 = shift + pp + wr2 = wr1 + wi = r + end if + ! further scaling to avoid underflow and overflow in computing + ! scale1 and overflow in computing w*b. + ! this scale factor (wscale) is bounded from above using c1 and c2, + ! and from below using c3 and c4. + ! c1 implements the condition s a must never overflow. + ! c2 implements the condition w b must never overflow. + ! c3, with c2, + ! implement the condition that s a - w b must never overflow. + ! c4 implements the condition s should not underflow. + ! c5 implements the condition max(s,|w|) should be at least 2. + c1 = bsize*( safmin*max( one, ascale ) ) + c2 = safmin*max( one, bnorm ) + c3 = bsize*safmin + if( ascale<=one .and. bsize<=one ) then + c4 = min( one, ( ascale / safmin )*bsize ) + else + c4 = one + end if + if( ascale<=one .or. bsize<=one ) then + c5 = min( one, ascale*bsize ) + else + c5 = one + end if + ! scale first eigenvalue + wabs = abs( wr1 ) + abs( wi ) + wsize = max( safmin, c1, fuzzy1*( wabs*c2+c3 ),min( c4, half*max( wabs, c5 ) ) ) + + if( wsize/=one ) then + wscale = one / wsize + if( wsize>one ) then + scale1 = ( max( ascale, bsize )*wscale )*min( ascale, bsize ) + else + scale1 = ( min( ascale, bsize )*wscale )*max( ascale, bsize ) + end if + wr1 = wr1*wscale + if( wi/=zero ) then + wi = wi*wscale + wr2 = wr1 + scale2 = scale1 + end if + else + scale1 = ascale*bsize + scale2 = scale1 + end if + ! scale second eigenvalue (if real) + if( wi==zero ) then + wsize = max( safmin, c1, fuzzy1*( abs( wr2 )*c2+c3 ),min( c4, half*max( abs( wr2 ), & + c5 ) ) ) + if( wsize/=one ) then + wscale = one / wsize + if( wsize>one ) then + scale2 = ( max( ascale, bsize )*wscale )*min( ascale, bsize ) + else + scale2 = ( min( ascale, bsize )*wscale )*max( ascale, bsize ) + end if + wr2 = wr2*wscale + else + scale2 = ascale*bsize + end if + end if + return + end subroutine stdlib_dlag2 + + !> DLAG2S: converts a DOUBLE PRECISION matrix, SA, to a SINGLE + !> PRECISION matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> DLAG2S checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_dlag2s( m, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + real(sp), intent(out) :: sa(ldsa,*) + real(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: rmax + ! Executable Statements + rmax = stdlib_slamch( 'O' ) + do j = 1, n + do i = 1, m + if( ( a( i, j )<-rmax ) .or. ( a( i, j )>rmax ) ) then + info = 1 + go to 30 + end if + sa( i, j ) = a( i, j ) + end do + end do + info = 0 + 30 continue + return + end subroutine stdlib_dlag2s + + !> DLAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + + pure subroutine stdlib_dlagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(in) :: alpha, beta + ! Array Arguments + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(in) :: d(*), dl(*), du(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + if( n==0 )return + ! multiply b by beta if beta/=1. + if( beta==zero ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = zero + end do + end do + else if( beta==-one ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = -b( i, j ) + end do + end do + end if + if( alpha==one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b + a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + dl( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + dl( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + du( i & + )*x( i+1, j ) + end do + end if + end do + else + ! compute b := b + a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + du( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + du( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + dl( i & + )*x( i+1, j ) + end do + end if + end do + end if + else if( alpha==-one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b - a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - dl( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - dl( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - du( i & + )*x( i+1, j ) + end do + end if + end do + else + ! compute b := b - a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - du( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - du( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - dl( i & + )*x( i+1, j ) + end do + end if + end do + end if + end if + return + end subroutine stdlib_dlagtm + + !> This routine is not for general use. It exists solely to avoid + !> over-optimization in DISNAN. + !> DLAISNAN: checks for NaNs by comparing its two arguments for + !> inequality. NaN is the only floating-point value where NaN != NaN + !> returns .TRUE. To check for NaNs, pass the same variable as both + !> arguments. + !> A compiler must assume that the two arguments are + !> not the same variable, and the test will not be optimized away. + !> Interprocedural or whole-program optimization may delete this + !> test. The ISNAN functions will be replaced by the correct + !> Fortran 03 intrinsic once the intrinsic is widely available. + + pure logical(lk) function stdlib_dlaisnan( din1, din2 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: din1, din2 + ! ===================================================================== + ! Executable Statements + stdlib_dlaisnan = (din1/=din2) + return + end function stdlib_dlaisnan + + !> DLAMCH: determines double precision machine parameters. + + pure real(dp) function stdlib_dlamch( cmach ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: cmach + ! ===================================================================== + + ! Local Scalars + real(dp) :: rnd, eps, sfmin, small, rmach + ! Intrinsic Functions + intrinsic :: digits,epsilon,huge,maxexponent,minexponent,radix,tiny + ! Executable Statements + ! assume rounding, not chopping. always. + rnd = one + if( one==rnd ) then + eps = epsilon(zero) * 0.5 + else + eps = epsilon(zero) + end if + if( stdlib_lsame( cmach, 'E' ) ) then + rmach = eps + else if( stdlib_lsame( cmach, 'S' ) ) then + sfmin = tiny(zero) + small = one / huge(zero) + if( small>=sfmin ) then + ! use small plus a bit, to avoid the possibility of rounding + ! causing overflow when computing 1/sfmin. + sfmin = small*( one+eps ) + end if + rmach = sfmin + else if( stdlib_lsame( cmach, 'B' ) ) then + rmach = radix(zero) + else if( stdlib_lsame( cmach, 'P' ) ) then + rmach = eps * radix(zero) + else if( stdlib_lsame( cmach, 'N' ) ) then + rmach = digits(zero) + else if( stdlib_lsame( cmach, 'R' ) ) then + rmach = rnd + else if( stdlib_lsame( cmach, 'M' ) ) then + rmach = minexponent(zero) + else if( stdlib_lsame( cmach, 'U' ) ) then + rmach = tiny(zero) + else if( stdlib_lsame( cmach, 'L' ) ) then + rmach = maxexponent(zero) + else if( stdlib_lsame( cmach, 'O' ) ) then + rmach = huge(zero) + else + rmach = zero + end if + stdlib_dlamch = rmach + return + end function stdlib_dlamch + + + pure real(dp) function stdlib_dlamc3( a, b ) + ! -- lapack auxiliary routine -- + ! univ. of tennessee, univ. of california berkeley and nag ltd.. + ! Scalar Arguments + real(dp), intent(in) :: a, b + ! ===================================================================== + ! Executable Statements + stdlib_dlamc3 = a + b + return + end function stdlib_dlamc3 + + !> DLAMRG: will create a permutation list which will merge the elements + !> of A (which is composed of two independently sorted sets) into a + !> single set which is sorted in ascending order. + + pure subroutine stdlib_dlamrg( n1, n2, a, dtrd1, dtrd2, index ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dtrd1, dtrd2, n1, n2 + ! Array Arguments + integer(ilp), intent(out) :: index(*) + real(dp), intent(in) :: a(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ind1, ind2, n1sv, n2sv + ! Executable Statements + n1sv = n1 + n2sv = n2 + if( dtrd1>0 ) then + ind1 = 1 + else + ind1 = n1 + end if + if( dtrd2>0 ) then + ind2 = 1 + n1 + else + ind2 = n1 + n2 + end if + i = 1 + ! while ( (n1sv > 0) + 10 continue + if( n1sv>0 .and. n2sv>0 ) then + if( a( ind1 )<=a( ind2 ) ) then + index( i ) = ind1 + i = i + 1 + ind1 = ind1 + dtrd1 + n1sv = n1sv - 1 + else + index( i ) = ind2 + i = i + 1 + ind2 = ind2 + dtrd2 + n2sv = n2sv - 1 + end if + go to 10 + end if + ! end while + if( n1sv==0 ) then + do n1sv = 1, n2sv + index( i ) = ind2 + i = i + 1 + ind2 = ind2 + dtrd2 + end do + else + ! n2sv == 0 + do n2sv = 1, n1sv + index( i ) = ind1 + i = i + 1 + ind1 = ind1 + dtrd1 + end do + end if + return + end subroutine stdlib_dlamrg + + !> DLAORHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine DORHR_COL. In DORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine DLAORHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2 + !> is self-sufficient and can be used without DLAORHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + + pure recursive subroutine stdlib_dlaorhr_col_getrfnp2( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: sfmin + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: abs,sign,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_dscal( m-1, one / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 2, m + a( i, 1 ) = a( i, 1 ) / a( 1, 1 ) + end do + end if + else + ! divide the matrix b into four submatrices + n1 = min( m, n ) / 2 + n2 = n-n1 + ! factor b11, recursive call + call stdlib_dlaorhr_col_getrfnp2( n1, n1, a, lda, d, iinfo ) + ! solve for b21 + call stdlib_dtrsm( 'R', 'U', 'N', 'N', m-n1, n1, one, a, lda,a( n1+1, 1 ), lda ) + + ! solve for b12 + call stdlib_dtrsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1, n1+1 ), lda ) + + ! update b22, i.e. compute the schur complement + ! b22 := b22 - b21*b12 + call stdlib_dgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, one, a( n1+1, n1+1 ), lda ) + ! factor b22, recursive call + call stdlib_dlaorhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo ) + + end if + return + end subroutine stdlib_dlaorhr_col_getrfnp2 + + !> DLAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + + pure subroutine stdlib_dlapmr( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + real(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, in, j, jj + real(dp) :: temp + ! Executable Statements + if( m<=1 )return + do i = 1, m + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, m + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do jj = 1, n + temp = x( j, jj ) + x( j, jj ) = x( in, jj ) + x( in, jj ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, m + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do jj = 1, n + temp = x( i, jj ) + x( i, jj ) = x( j, jj ) + x( j, jj ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_dlapmr + + !> DLAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + + pure subroutine stdlib_dlapmt( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + real(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ii, in, j + real(dp) :: temp + ! Executable Statements + if( n<=1 )return + do i = 1, n + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, n + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do ii = 1, m + temp = x( ii, j ) + x( ii, j ) = x( ii, in ) + x( ii, in ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, n + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do ii = 1, m + temp = x( ii, i ) + x( ii, i ) = x( ii, j ) + x( ii, j ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_dlapmt + + !> DLAPY3: returns sqrt(x**2+y**2+z**2), taking care not to cause + !> unnecessary overflow and unnecessary underflow. + + pure real(dp) function stdlib_dlapy3( x, y, z ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: x, y, z + ! ===================================================================== + + ! Local Scalars + real(dp) :: w, xabs, yabs, zabs, hugeval + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + hugeval = stdlib_dlamch( 'OVERFLOW' ) + xabs = abs( x ) + yabs = abs( y ) + zabs = abs( z ) + w = max( xabs, yabs, zabs ) + if( w==zero .or. w>hugeval ) then + ! w can be zero for max(0,nan,0) + ! adding all three entries together will make sure + ! nan will not disappear. + stdlib_dlapy3 = xabs + yabs + zabs + else + stdlib_dlapy3 = w*sqrt( ( xabs / w )**2+( yabs / w )**2+( zabs / w )**2 ) + end if + return + end function stdlib_dlapy3 + + !> DLAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + + pure subroutine stdlib_dlaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(dp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(in) :: c(*), r(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*ab( ku+1+i-j, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_dlaqgb + + !> DLAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + + pure subroutine stdlib_dlaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: c(*), r(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*a( i, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = 1, m + a( i, j ) = r( i )*a( i, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*r( i )*a( i, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_dlaqge + + !> Given a 2-by-2 or 3-by-3 matrix H, DLAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) + !> scaling to avoid overflows and most underflows. It + !> is assumed that either + !> 1) sr1 = sr2 and si1 = -si2 + !> or + !> 2) si1 = si2 = 0. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + + pure subroutine stdlib_dlaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: si1, si2, sr1, sr2 + integer(ilp), intent(in) :: ldh, n + ! Array Arguments + real(dp), intent(in) :: h(ldh,*) + real(dp), intent(out) :: v(*) + ! ================================================================ + + ! Local Scalars + real(dp) :: h21s, h31s, s + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n/=2 .and. n/=3 ) then + return + end if + if( n==2 ) then + s = abs( h( 1, 1 )-sr2 ) + abs( si2 ) + abs( h( 2, 1 ) ) + if( s==zero ) then + v( 1 ) = zero + v( 2 ) = zero + else + h21s = h( 2, 1 ) / s + v( 1 ) = h21s*h( 1, 2 ) + ( h( 1, 1 )-sr1 )*( ( h( 1, 1 )-sr2 ) / s ) - si1*( & + si2 / s ) + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-sr1-sr2 ) + end if + else + s = abs( h( 1, 1 )-sr2 ) + abs( si2 ) + abs( h( 2, 1 ) ) +abs( h( 3, 1 ) ) + if( s==zero ) then + v( 1 ) = zero + v( 2 ) = zero + v( 3 ) = zero + else + h21s = h( 2, 1 ) / s + h31s = h( 3, 1 ) / s + v( 1 ) = ( h( 1, 1 )-sr1 )*( ( h( 1, 1 )-sr2 ) / s ) -si1*( si2 / s ) + h( 1, 2 )& + *h21s + h( 1, 3 )*h31s + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-sr1-sr2 ) +h( 2, 3 )*h31s + v( 3 ) = h31s*( h( 1, 1 )+h( 3, 3 )-sr1-sr2 ) +h21s*h( 3, 2 ) + end if + end if + end subroutine stdlib_dlaqr1 + + !> DLAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_dlaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_dlaqsb + + !> DLAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_dlaqsp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(inout) :: ap(*) + real(dp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(dp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = j, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_dlaqsp + + !> DLAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_dlaqsy( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_dlaqsy + + !> DLAR2V: applies a vector of real plane rotations from both sides to + !> a sequence of 2-by-2 real symmetric matrices, defined by the elements + !> of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) + !> ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) + + pure subroutine stdlib_dlar2v( n, x, y, z, incx, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, n + ! Array Arguments + real(dp), intent(in) :: c(*), s(*) + real(dp), intent(inout) :: x(*), y(*), z(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix + real(dp) :: ci, si, t1, t2, t3, t4, t5, t6, xi, yi, zi + ! Executable Statements + ix = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( ix ) + zi = z( ix ) + ci = c( ic ) + si = s( ic ) + t1 = si*zi + t2 = ci*zi + t3 = t2 - si*xi + t4 = t2 + si*yi + t5 = ci*xi + t1 + t6 = ci*yi - t1 + x( ix ) = ci*t5 + si*t4 + y( ix ) = ci*t6 - si*t3 + z( ix ) = ci*t4 - si*t5 + ix = ix + incx + ic = ic + incc + end do + return + end subroutine stdlib_dlar2v + + !> DLARF: applies a real elementary reflector H to a real m by n matrix + !> C, from either the left or the right. H is represented in the form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix. + + pure subroutine stdlib_dlarf( side, m, n, v, incv, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, ldc, m, n + real(dp), intent(in) :: tau + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(in) :: v(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: applyleft + integer(ilp) :: i, lastv, lastc + ! Executable Statements + applyleft = stdlib_lsame( side, 'L' ) + lastv = 0 + lastc = 0 + if( tau/=zero ) then + ! set up variables for scanning v. lastv begins pointing to the end + ! of v. + if( applyleft ) then + lastv = m + else + lastv = n + end if + if( incv>0 ) then + i = 1 + (lastv-1) * incv + else + i = 1 + end if + ! look for the last non-zero row in v. + do while( lastv>0 .and. v( i )==zero ) + lastv = lastv - 1 + i = i - incv + end do + if( applyleft ) then + ! scan for the last non-zero column in c(1:lastv,:). + lastc = stdlib_iladlc(lastv, n, c, ldc) + else + ! scan for the last non-zero row in c(:,1:lastv). + lastc = stdlib_iladlr(m, lastv, c, ldc) + end if + end if + ! note that lastc.eq.0_dp renders the blas operations null; no special + ! case is needed at this level. + if( applyleft ) then + ! form h * c + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastv,1:lastc)**t * v(1:lastv,1) + call stdlib_dgemv( 'TRANSPOSE', lastv, lastc, one, c, ldc, v, incv,zero, work, 1 & + ) + ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**t + call stdlib_dger( lastv, lastc, -tau, v, incv, work, 1, c, ldc ) + end if + else + ! form c * h + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1) + call stdlib_dgemv( 'NO TRANSPOSE', lastc, lastv, one, c, ldc,v, incv, zero, work,& + 1 ) + ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**t + call stdlib_dger( lastc, lastv, -tau, work, 1, v, incv, c, ldc ) + end if + end if + return + end subroutine stdlib_dlarf + + !> DLARFB: applies a real block reflector H or its transpose H**T to a + !> real m by n matrix C, from either the left or the right. + + pure subroutine stdlib_dlarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, ldc, & + work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(in) :: t(ldt,*), v(ldv,*) + real(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'T' + else + transt = 'N' + end if + if( stdlib_lsame( storev, 'C' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 ) (first k rows) + ! ( v2 ) + ! where v1 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v = (c1**t * v1 + c2**t * v2) (stored in work) + ! w := c1**t + do j = 1, k + call stdlib_dcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, one, v, ldv,& + work, ldwork ) + if( m>k ) then + ! w := w + c2**t * v2 + call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', n, k, m-k,one, c( k+1, 1 ),& + ldc, v( k+1, 1 ), ldv,one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_dtrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v * w**t + if( m>k ) then + ! c2 := c2 - v2 * w**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-k, n, k,-one, v( k+1, 1 )& + , ldv, work, ldwork, one,c( k+1, 1 ), ldc ) + end if + ! w := w * v1**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', n, k,one, v, ldv, & + work, ldwork ) + ! c1 := c1 - w**t + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_dcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, one, v, ldv,& + work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,one, c( 1, k+& + 1 ), ldc, v( k+1, 1 ), ldv,one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_dtrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v**t + if( n>k ) then + ! c2 := c2 - w * v2**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v( k+1, 1 ), ldv, one,c( 1, k+1 ), ldc ) + end if + ! w := w * v1**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', m, k,one, v, ldv, & + work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 ) + ! ( v2 ) (last k rows) + ! where v2 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v = (c1**t * v1 + c2**t * v2) (stored in work) + ! w := c2**t + do j = 1, k + call stdlib_dcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, one, v( m-k+& + 1, 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**t * v1 + call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', n, k, m-k,one, c, ldc, v, & + ldv, one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_dtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v * w**t + if( m>k ) then + ! c1 := c1 - v1 * w**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-k, n, k,-one, v, ldv, & + work, ldwork, one, c, ldc ) + end if + ! w := w * v2**t + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', n, k,one, v( m-k+1, & + 1 ), ldv, work, ldwork ) + ! c2 := c2 - w**t + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_dcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, one, v( n-k+& + 1, 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,one, c, ldc, & + v, ldv, one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v**t + if( n>k ) then + ! c1 := c1 - w * v1**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v, ldv, one, c, ldc ) + end if + ! w := w * v2**t + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', m, k,one, v( n-k+1, & + 1 ), ldv, work, ldwork ) + ! c2 := c2 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + else if( stdlib_lsame( storev, 'R' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 v2 ) (v1: first k columns) + ! where v1 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v**t = (c1**t * v1**t + c2**t * v2**t) (stored in work) + ! w := c1**t + do j = 1, k + call stdlib_dcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v1**t + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', n, k,one, v, ldv, & + work, ldwork ) + if( m>k ) then + ! w := w + c2**t * v2**t + call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, m-k, one,c( k+1, 1 ), & + ldc, v( 1, k+1 ), ldv, one,work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_dtrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v**t * w**t + if( m>k ) then + ! c2 := c2 - v2**t * w**t + call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE', m-k, n, k, -one,v( 1, k+1 ), & + ldv, work, ldwork, one,c( k+1, 1 ), ldc ) + end if + ! w := w * v1 + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, one, v, ldv,& + work, ldwork ) + ! c1 := c1 - w**t + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v**t = (c1*v1**t + c2*v2**t) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_dcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1**t + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', m, k,one, v, ldv, & + work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, n-k,one, c( 1, k+1 ),& + ldc, v( 1, k+1 ), ldv,one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_dtrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c2 := c2 - w * v2 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v( 1, k+1 ), ldv, one,c( 1, k+1 ), ldc ) + end if + ! w := w * v1 + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, one, v, ldv,& + work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 v2 ) (v2: last k columns) + ! where v2 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v**t = (c1**t * v1**t + c2**t * v2**t) (stored in work) + ! w := c2**t + do j = 1, k + call stdlib_dcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v2**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', n, k,one, v( 1, m-k+& + 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**t * v1**t + call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, m-k, one,c, ldc, v, ldv,& + one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_dtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v**t * w**t + if( m>k ) then + ! c1 := c1 - v1**t * w**t + call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE', m-k, n, k, -one,v, ldv, work, & + ldwork, one, c, ldc ) + end if + ! w := w * v2 + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, one, v( 1, & + m-k+1 ), ldv, work, ldwork ) + ! c2 := c2 - w**t + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h' where c = ( c1 c2 ) + ! w := c * v**t = (c1*v1**t + c2*v2**t) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_dcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', m, k,one, v( 1, n-k+& + 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, n-k,one, c, ldc, v, & + ldv, one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c1 := c1 - w * v1 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v, ldv, one, c, ldc ) + end if + ! w := w * v2 + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, one, v( 1, & + n-k+1 ), ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + end if + return + end subroutine stdlib_dlarfb + + !> DLARFB_GETT: applies a real Householder block reflector H from the + !> left to a real (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + + pure subroutine stdlib_dlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ident + integer(ilp), intent(in) :: k, lda, ldb, ldt, ldwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnotident + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<0 .or. n<=0 .or. k==0 .or. k>n )return + lnotident = .not.stdlib_lsame( ident, 'I' ) + ! ------------------------------------------------------------------ + ! first step. computation of the column block 2: + ! ( a2 ) := h * ( a2 ) + ! ( b2 ) ( b2 ) + ! ------------------------------------------------------------------ + if( n>k ) then + ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n) + ! into w2=work(1:k, 1:n-k) column-by-column. + do j = 1, n-k + call stdlib_dcopy( k, a( 1, k+j ), 1, work( 1, j ), 1 ) + end do + if( lnotident ) then + ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2, + ! v1 is not an identy matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_dtrmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork ) + end if + ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2 + ! v2 stored in b1. + if( m>0 ) then + call stdlib_dgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1, k+1 ), ldb, one, work, & + ldwork ) + end if + ! col2_(4) compute w2: = t * w2, + ! t is upper-triangular. + call stdlib_dtrmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork ) + ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2, + ! v2 stored in b1. + if( m>0 ) then + call stdlib_dgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1, k+& + 1 ), ldb ) + end if + if( lnotident ) then + ! col2_(6) compute w2: = v1 * w2 = a1 * w2, + ! v1 is not an identity matrix, but unit lower-triangular, + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_dtrmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork ) + end if + ! col2_(7) compute a2: = a2 - w2 = + ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k), + ! column-by-column. + do j = 1, n-k + do i = 1, k + a( i, k+j ) = a( i, k+j ) - work( i, j ) + end do + end do + end if + ! ------------------------------------------------------------------ + ! second step. computation of the column block 1: + ! ( a1 ) := h * ( a1 ) + ! ( b1 ) ( 0 ) + ! ------------------------------------------------------------------ + ! col1_(1) compute w1: = a1. copy the upper-triangular + ! a1 = a(1:k, 1:k) into the upper-triangular + ! w1 = work(1:k, 1:k) column-by-column. + do j = 1, k + call stdlib_dcopy( j, a( 1, j ), 1, work( 1, j ), 1 ) + end do + ! set the subdiagonal elements of w1 to zero column-by-column. + do j = 1, k - 1 + do i = j + 1, k + work( i, j ) = zero + end do + end do + if( lnotident ) then + ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_dtrmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork ) + end if + ! col1_(3) compute w1: = t * w1, + ! t is upper-triangular, + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_dtrmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork ) + ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1, + ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal. + if( m>0 ) then + call stdlib_dtrmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb ) + end if + if( lnotident ) then + ! col1_(5) compute w1: = v1 * w1 = a1 * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular on input with zeroes below the diagonal, + ! and square on output. + call stdlib_dtrmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork ) + ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k) + ! column-by-column. a1 is upper-triangular on input. + ! if ident, a1 is square on output, and w1 is square, + ! if not ident, a1 is upper-triangular on output, + ! w1 is upper-triangular. + ! col1_(6)_a compute elements of a1 below the diagonal. + do j = 1, k - 1 + do i = j + 1, k + a( i, j ) = - work( i, j ) + end do + end do + end if + ! col1_(6)_b compute elements of a1 on and above the diagonal. + do j = 1, k + do i = 1, j + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + return + end subroutine stdlib_dlarfb_gett + + !> DLARFT: forms the triangular factor T of a real block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**T + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**T * T * V + + pure subroutine stdlib_dlarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + real(dp), intent(out) :: t(ldt,*) + real(dp), intent(in) :: tau(*), v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, prevlastv, lastv + ! Executable Statements + ! quick return if possible + if( n==0 )return + if( stdlib_lsame( direct, 'F' ) ) then + prevlastv = n + do i = 1, k + prevlastv = max( i, prevlastv ) + if( tau( i )==zero ) then + ! h(i) = i + do j = 1, i + t( j, i ) = zero + end do + else + ! general case + if( stdlib_lsame( storev, 'C' ) ) then + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( lastv, i )/=zero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( i , j ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**t * v(i:j,i) + call stdlib_dgemv( 'TRANSPOSE', j-i, i-1, -tau( i ),v( i+1, 1 ), ldv, v( i+& + 1, i ), 1, one,t( 1, i ), 1 ) + else + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( i, lastv )/=zero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( j , i ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**t + call stdlib_dgemv( 'NO TRANSPOSE', i-1, j-i, -tau( i ),v( 1, i+1 ), ldv, v(& + i, i+1 ), ldv, one,t( 1, i ), 1 ) + end if + ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i) + call stdlib_dtrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', i-1, t,ldt, t( 1, i ),& + 1 ) + t( i, i ) = tau( i ) + if( i>1 ) then + prevlastv = max( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + end do + else + prevlastv = 1 + do i = k, 1, -1 + if( tau( i )==zero ) then + ! h(i) = i + do j = i, k + t( j, i ) = zero + end do + else + ! general case + if( i1 ) then + prevlastv = min( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + t( i, i ) = tau( i ) + end if + end do + end if + return + end subroutine stdlib_dlarft + + !> DLARFX: applies a real elementary reflector H to a real m by n + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix + !> This version uses inline code if H has order < 11. + + pure subroutine stdlib_dlarfx( side, m, n, v, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: ldc, m, n + real(dp), intent(in) :: tau + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(in) :: v(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j + real(dp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, v6, & + v7, v8, v9 + ! Executable Statements + if( tau==zero )return + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c, where h has order m. + go to ( 10, 30, 50, 70, 90, 110, 130, 150,170, 190 )m + ! code for general m + call stdlib_dlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 10 continue + ! special code for 1 x 1 householder + t1 = one - tau*v( 1 )*v( 1 ) + do j = 1, n + c( 1, j ) = t1*c( 1, j ) + end do + go to 410 + 30 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + end do + go to 410 + 50 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + end do + go to 410 + 70 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + end do + go to 410 + 90 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + end do + go to 410 + 110 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + end do + go to 410 + 130 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + end do + go to 410 + 150 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + end do + go to 410 + 170 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + end do + go to 410 + 190 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + v10 = v( 10 ) + t10 = tau*v10 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) +v10*c( 10, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + c( 10, j ) = c( 10, j ) - sum*t10 + end do + go to 410 + else + ! form c * h, where h has order n. + go to ( 210, 230, 250, 270, 290, 310, 330, 350,370, 390 )n + ! code for general n + call stdlib_dlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 210 continue + ! special code for 1 x 1 householder + t1 = one - tau*v( 1 )*v( 1 ) + do j = 1, m + c( j, 1 ) = t1*c( j, 1 ) + end do + go to 410 + 230 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + end do + go to 410 + 250 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + end do + go to 410 + 270 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + end do + go to 410 + 290 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + end do + go to 410 + 310 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + end do + go to 410 + 330 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + end do + go to 410 + 350 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + end do + go to 410 + 370 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + end do + go to 410 + 390 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + v10 = v( 10 ) + t10 = tau*v10 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) +v10*c( j, 10 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + c( j, 10 ) = c( j, 10 ) - sum*t10 + end do + go to 410 + end if + 410 continue + return + end subroutine stdlib_dlarfx + + !> DLARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n symmetric matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + + pure subroutine stdlib_dlarfy( uplo, n, v, incv, tau, c, ldc, work ) + ! -- lapack test routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv, ldc, n + real(dp), intent(in) :: tau + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(in) :: v(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: alpha + ! Executable Statements + if( tau==zero )return + ! form w:= c * v + call stdlib_dsymv( uplo, n, one, c, ldc, v, incv, zero, work, 1 ) + alpha = -half*tau*stdlib_ddot( n, work, 1, v, incv ) + call stdlib_daxpy( n, alpha, v, incv, work, 1 ) + ! c := c - v * w' - w * v' + call stdlib_dsyr2( uplo, n, -tau, v, incv, work, 1, c, ldc ) + return + end subroutine stdlib_dlarfy + + !> DLARGV: generates a vector of real plane rotations, determined by + !> elements of the real vectors x and y. For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( a(i) ) + !> ( -s(i) c(i) ) ( y(i) ) = ( 0 ) + + pure subroutine stdlib_dlargv( n, x, incx, y, incy, c, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(dp), intent(out) :: c(*) + real(dp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + real(dp) :: f, g, t, tt + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + loop_10: do i = 1, n + f = x( ix ) + g = y( iy ) + if( g==zero ) then + c( ic ) = one + else if( f==zero ) then + c( ic ) = zero + y( iy ) = one + x( ix ) = g + else if( abs( f )>abs( g ) ) then + t = g / f + tt = sqrt( one+t*t ) + c( ic ) = one / tt + y( iy ) = t*c( ic ) + x( ix ) = f*tt + else + t = f / g + tt = sqrt( one+t*t ) + y( iy ) = one / tt + c( ic ) = t*y( iy ) + x( ix ) = g*tt + end if + ic = ic + incc + iy = iy + incy + ix = ix + incx + end do loop_10 + return + end subroutine stdlib_dlargv + + !> Compute the splitting points with threshold SPLTOL. + !> DLARRA: sets any "small" off-diagonal elements to zero. + + pure subroutine stdlib_dlarra( n, d, e, e2, spltol, tnrm,nsplit, isplit, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, nsplit + integer(ilp), intent(in) :: n + real(dp), intent(in) :: spltol, tnrm + ! Array Arguments + integer(ilp), intent(out) :: isplit(*) + real(dp), intent(in) :: d(*) + real(dp), intent(inout) :: e(*), e2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: eabs, tmp1 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! compute splitting points + nsplit = 1 + if(spltol Find the number of eigenvalues of the symmetric tridiagonal matrix T + !> that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T + !> if JOBT = 'L'. + + pure subroutine stdlib_dlarrc( jobt, n, vl, vu, d, e, pivmin,eigcnt, lcnt, rcnt, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobt + integer(ilp), intent(out) :: eigcnt, info, lcnt, rcnt + integer(ilp), intent(in) :: n + real(dp), intent(in) :: pivmin, vl, vu + ! Array Arguments + real(dp), intent(in) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + logical(lk) :: matt + real(dp) :: lpivot, rpivot, sl, su, tmp, tmp2 + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + lcnt = 0 + rcnt = 0 + eigcnt = 0 + matt = stdlib_lsame( jobt, 'T' ) + if (matt) then + ! sturm sequence count on t + lpivot = d( 1 ) - vl + rpivot = d( 1 ) - vu + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + do i = 1, n-1 + tmp = e(i)**2 + lpivot = ( d( i+1 )-vl ) - tmp/lpivot + rpivot = ( d( i+1 )-vu ) - tmp/rpivot + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + end do + else + ! sturm sequence count on l d l^t + sl = -vl + su = -vu + do i = 1, n - 1 + lpivot = d( i ) + sl + rpivot = d( i ) + su + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + tmp = e(i) * d(i) * e(i) + tmp2 = tmp / lpivot + if( tmp2==zero ) then + sl = tmp - vl + else + sl = sl*tmp2 - vl + end if + tmp2 = tmp / rpivot + if( tmp2==zero ) then + su = tmp - vu + else + su = su*tmp2 - vu + end if + end do + lpivot = d( n ) + sl + rpivot = d( n ) + su + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + endif + eigcnt = rcnt - lcnt + return + end subroutine stdlib_dlarrc + + !> DLARRD: computes the eigenvalues of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from DSTEMR. + !> The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_dlarrd( range, order, n, vl, vu, il, iu, gers,reltol, d, e, e2, & + pivmin, nsplit, isplit,m, w, werr, wl, wu, iblock, indexw,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: order, range + integer(ilp), intent(in) :: il, iu, n, nsplit + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: pivmin, reltol, vl, vu + real(dp), intent(out) :: wl, wu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), indexw(*), iwork(*) + integer(ilp), intent(in) :: isplit(*) + real(dp), intent(in) :: d(*), e(*), e2(*), gers(*) + real(dp), intent(out) :: w(*), werr(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: fudge = two + integer(ilp), parameter :: allrng = 1 + integer(ilp), parameter :: valrng = 2 + integer(ilp), parameter :: indrng = 3 + + + ! Local Scalars + logical(lk) :: ncnvrg, toofew + integer(ilp) :: i, ib, ibegin, idiscl, idiscu, ie, iend, iinfo, im, in, ioff, iout, & + irange, itmax, itmp1, itmp2, iw, iwoff, j, jblk, jdisc, je, jee, nb, nwl, nwu + real(dp) :: atoli, eps, gl, gu, rtoli, tmp1, tmp2, tnorm, uflow, wkill, wlu, & + wul + ! Local Arrays + integer(ilp) :: idumma(1) + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = allrng + else if( stdlib_lsame( range, 'V' ) ) then + irange = valrng + else if( stdlib_lsame( range, 'I' ) ) then + irange = indrng + else + irange = 0 + end if + ! check for errors + if( irange<=0 ) then + info = -1 + else if( .not.(stdlib_lsame(order,'B').or.stdlib_lsame(order,'E')) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( irange==valrng ) then + if( vl>=vu )info = -5 + else if( irange==indrng .and.( il<1 .or. il>max( 1, n ) ) ) then + info = -6 + else if( irange==indrng .and.( iun ) ) then + info = -7 + end if + if( info/=0 ) then + return + end if + ! initialize error flags + info = 0 + ncnvrg = .false. + toofew = .false. + ! quick return if possible + m = 0 + if( n==0 ) return + ! simplification: + if( irange==indrng .and. il==1 .and. iu==n ) irange = 1 + ! get machine constants + eps = stdlib_dlamch( 'P' ) + uflow = stdlib_dlamch( 'U' ) + ! special case when n=1 + ! treat case of 1x1 matrix for quick return + if( n==1 ) then + if( (irange==allrng).or.((irange==valrng).and.(d(1)>vl).and.(d(1)<=vu)).or.((& + irange==indrng).and.(il==1).and.(iu==1)) ) then + m = 1 + w(1) = d(1) + ! the computation error of the eigenvalue is zero + werr(1) = zero + iblock( 1 ) = 1 + indexw( 1 ) = 1 + endif + return + end if + ! nb is the minimum vector length for vector bisection, or 0 + ! if only scalar is to be done. + nb = stdlib_ilaenv( 1, 'DSTEBZ', ' ', n, -1, -1, -1 ) + if( nb<=1 ) nb = 0 + ! find global spectral radius + gl = d(1) + gu = d(1) + do i = 1,n + gl = min( gl, gers( 2*i - 1)) + gu = max( gu, gers(2*i) ) + end do + ! compute global gerschgorin bounds and spectral diameter + tnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*tnorm*eps*n - fudge*two*pivmin + gu = gu + fudge*tnorm*eps*n + fudge*two*pivmin + ! [jan/28/2009] remove the line below since spdiam variable not use + ! spdiam = gu - gl + ! input arguments for stdlib_dlaebz: + ! the relative tolerance. an interval (a,b] lies within + ! "relative tolerance" if b-a < reltol*max(|a|,|b|), + rtoli = reltol + ! set the absolute tolerance for interval convergence to zero to force + ! interval convergence based on relative size of the interval. + ! this is dangerous because intervals might not converge when reltol is + ! small. but at least a very small number should be selected so that for + ! strongly graded matrices, the code can get relatively accurate + ! eigenvalues. + atoli = fudge*two*uflow + fudge*two*pivmin + if( irange==indrng ) then + ! range='i': compute an interval containing eigenvalues + ! il through iu. the initial interval [gl,gu] from the global + ! gerschgorin bounds gl and gu is refined by stdlib_dlaebz. + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + work( n+1 ) = gl + work( n+2 ) = gl + work( n+3 ) = gu + work( n+4 ) = gu + work( n+5 ) = gl + work( n+6 ) = gu + iwork( 1 ) = -1 + iwork( 2 ) = -1 + iwork( 3 ) = n + 1 + iwork( 4 ) = n + 1 + iwork( 5 ) = il - 1 + iwork( 6 ) = iu + call stdlib_dlaebz( 3, itmax, n, 2, 2, nb, atoli, rtoli, pivmin,d, e, e2, iwork( 5 )& + , work( n+1 ), work( n+5 ), iout,iwork, w, iblock, iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + ! on exit, output intervals may not be ordered by ascending negcount + if( iwork( 6 )==iu ) then + wl = work( n+1 ) + wlu = work( n+3 ) + nwl = iwork( 1 ) + wu = work( n+4 ) + wul = work( n+2 ) + nwu = iwork( 4 ) + else + wl = work( n+2 ) + wlu = work( n+4 ) + nwl = iwork( 2 ) + wu = work( n+3 ) + wul = work( n+1 ) + nwu = iwork( 3 ) + end if + ! on exit, the interval [wl, wlu] contains a value with negcount nwl, + ! and [wul, wu] contains a value with negcount nwu. + if( nwl<0 .or. nwl>=n .or. nwu<1 .or. nwu>n ) then + info = 4 + return + end if + elseif( irange==valrng ) then + wl = vl + wu = vu + elseif( irange==allrng ) then + wl = gl + wu = gu + endif + ! find eigenvalues -- loop over blocks and recompute nwl and nwu. + ! nwl accumulates the number of eigenvalues .le. wl, + ! nwu accumulates the number of eigenvalues .le. wu + m = 0 + iend = 0 + info = 0 + nwl = 0 + nwu = 0 + loop_70: do jblk = 1, nsplit + ioff = iend + ibegin = ioff + 1 + iend = isplit( jblk ) + in = iend - ioff + if( in==1 ) then + ! 1x1 block + if( wl>=d( ibegin )-pivmin )nwl = nwl + 1 + if( wu>=d( ibegin )-pivmin )nwu = nwu + 1 + if( irange==allrng .or.( wl= d( ibegin )-pivmin ) ) & + then + m = m + 1 + w( m ) = d( ibegin ) + werr(m) = zero + ! the gap for a single block doesn't matter for the later + ! algorithm and is assigned an arbitrary large value + iblock( m ) = jblk + indexw( m ) = 1 + end if + ! disabled 2x2 case because of a failure on the following matrix + ! range = 'i', il = iu = 4 + ! original tridiagonal, d = [ + ! -0.150102010615740e+00_dp + ! -0.849897989384260e+00_dp + ! -0.128208148052635e-15_dp + ! 0.128257718286320e-15_dp + ! ]; + ! e = [ + ! -0.357171383266986e+00_dp + ! -0.180411241501588e-15_dp + ! -0.175152352710251e-15_dp + ! ]; + ! else if( in==2 ) then + ! * 2x2 block + ! disc = sqrt( (half*(d(ibegin)-d(iend)))**2 + e(ibegin)**2 ) + ! tmp1 = half*(d(ibegin)+d(iend)) + ! l1 = tmp1 - disc + ! if( wl>= l1-pivmin ) + ! $ nwl = nwl + 1 + ! if( wu>= l1-pivmin ) + ! $ nwu = nwu + 1 + ! if( irange==allrng .or. ( wl= + ! $ l1-pivmin ) ) then + ! m = m + 1 + ! w( m ) = l1 + ! * the uncertainty of eigenvalues of a 2x2 matrix is very small + ! werr( m ) = eps * abs( w( m ) ) * two + ! iblock( m ) = jblk + ! indexw( m ) = 1 + ! endif + ! l2 = tmp1 + disc + ! if( wl>= l2-pivmin ) + ! $ nwl = nwl + 1 + ! if( wu>= l2-pivmin ) + ! $ nwu = nwu + 1 + ! if( irange==allrng .or. ( wl= + ! $ l2-pivmin ) ) then + ! m = m + 1 + ! w( m ) = l2 + ! * the uncertainty of eigenvalues of a 2x2 matrix is very small + ! werr( m ) = eps * abs( w( m ) ) * two + ! iblock( m ) = jblk + ! indexw( m ) = 2 + ! endif + else + ! general case - block of size in >= 2 + ! compute local gerschgorin interval and use it as the initial + ! interval for stdlib_dlaebz + gu = d( ibegin ) + gl = d( ibegin ) + tmp1 = zero + do j = ibegin, iend + gl = min( gl, gers( 2*j - 1)) + gu = max( gu, gers(2*j) ) + end do + ! [jan/28/2009] + ! change spdiam by tnorm in lines 2 and 3 thereafter + ! line 1: remove computation of spdiam (not useful anymore) + ! spdiam = gu - gl + ! gl = gl - fudge*spdiam*eps*in - fudge*pivmin + ! gu = gu + fudge*spdiam*eps*in + fudge*pivmin + gl = gl - fudge*tnorm*eps*in - fudge*pivmin + gu = gu + fudge*tnorm*eps*in + fudge*pivmin + if( irange>1 ) then + if( gu=gu )cycle loop_70 + end if + ! find negcount of initial interval boundaries gl and gu + work( n+1 ) = gl + work( n+in+1 ) = gu + call stdlib_dlaebz( 1, 0, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e( & + ibegin ), e2( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), im,iwork, w( m+1 ),& + iblock( m+1 ), iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + nwl = nwl + iwork( 1 ) + nwu = nwu + iwork( in+1 ) + iwoff = m - iwork( 1 ) + ! compute eigenvalues + itmax = int( ( log( gu-gl+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + & + 2 + call stdlib_dlaebz( 2, itmax, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e(& + ibegin ), e2( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), iout,iwork, w( m+& + 1 ), iblock( m+1 ), iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + ! copy eigenvalues into w and iblock + ! use -jblk for block number for unconverged eigenvalues. + ! loop over the number of output intervals from stdlib_dlaebz + do j = 1, iout + ! eigenvalue approximation is middle point of interval + tmp1 = half*( work( j+n )+work( j+in+n ) ) + ! semi length of error interval + tmp2 = half*abs( work( j+n )-work( j+in+n ) ) + if( j>iout-iinfo ) then + ! flag non-convergence. + ncnvrg = .true. + ib = -jblk + else + ib = jblk + end if + do je = iwork( j ) + 1 + iwoff,iwork( j+in ) + iwoff + w( je ) = tmp1 + werr( je ) = tmp2 + indexw( je ) = je - iwoff + iblock( je ) = ib + end do + end do + m = m + im + end if + end do loop_70 + ! if range='i', then (wl,wu) contains eigenvalues nwl+1,...,nwu + ! if nwl+1 < il or nwu > iu, discard extra eigenvalues. + if( irange==indrng ) then + idiscl = il - 1 - nwl + idiscu = nwu - iu + if( idiscl>0 ) then + im = 0 + do je = 1, m + ! remove some of the smallest eigenvalues from the left so that + ! at the end idiscl =0. move all eigenvalues up to the left. + if( w( je )<=wlu .and. idiscl>0 ) then + idiscl = idiscl - 1 + else + im = im + 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscu>0 ) then + ! remove some of the largest eigenvalues from the right so that + ! at the end idiscu =0. move all eigenvalues up to the left. + im=m+1 + do je = m, 1, -1 + if( w( je )>=wul .and. idiscu>0 ) then + idiscu = idiscu - 1 + else + im = im - 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + jee = 0 + do je = im, m + jee = jee + 1 + w( jee ) = w( je ) + werr( jee ) = werr( je ) + indexw( jee ) = indexw( je ) + iblock( jee ) = iblock( je ) + end do + m = m-im+1 + end if + if( idiscl>0 .or. idiscu>0 ) then + ! code to deal with effects of bad arithmetic. (if n(w) is + ! monotone non-decreasing, this should never happen.) + ! some low eigenvalues to be discarded are not in (wl,wlu], + ! or high eigenvalues to be discarded are not in (wul,wu] + ! so just kill off the smallest idiscl/largest idiscu + ! eigenvalues, by marking the corresponding iblock = 0 + if( idiscl>0 ) then + wkill = wu + do jdisc = 1, idiscl + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )0 ) then + wkill = wl + do jdisc = 1, idiscu + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )>=wkill .or. iw==0 ) ) then + iw = je + wkill = w( je ) + end if + end do + iblock( iw ) = 0 + end do + end if + ! now erase all eigenvalues with iblock set to zero + im = 0 + do je = 1, m + if( iblock( je )/=0 ) then + im = im + 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl<0 .or. idiscu<0 ) then + toofew = .true. + end if + end if + if(( irange==allrng .and. m/=n ).or.( irange==indrng .and. m/=iu-il+1 ) ) then + toofew = .true. + end if + ! if order='b', do nothing the eigenvalues are already sorted by + ! block. + ! if order='e', sort the eigenvalues from smallest to largest + if( stdlib_lsame(order,'E') .and. nsplit>1 ) then + do je = 1, m - 1 + ie = 0 + tmp1 = w( je ) + do j = je + 1, m + if( w( j ) Given the initial eigenvalue approximations of T, DLARRJ: + !> does bisection to refine the eigenvalues of T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses in WERR. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + + pure subroutine stdlib_dlarrj( n, d, e2, ifirst, ilast,rtol, offset, w, werr, work, iwork,& + pivmin, spdiam, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ifirst, ilast, n, offset + integer(ilp), intent(out) :: info + real(dp), intent(in) :: pivmin, rtol, spdiam + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: d(*), e2(*) + real(dp), intent(inout) :: w(*), werr(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + integer(ilp) :: maxitr + ! Local Scalars + integer(ilp) :: cnt, i, i1, i2, ii, iter, j, k, next, nint, olnint, p, prev, & + savi1 + real(dp) :: dplus, fac, left, mid, right, s, tmp, width + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + maxitr = int( ( log( spdiam+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ]. + ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while + ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 ) + ! for an unconverged interval is set to the index of the next unconverged + ! interval, and is -1 or 0 for a converged interval. thus a linked + ! list of unconverged intervals is set up. + i1 = ifirst + i2 = ilast + ! the number of unconverged intervals + nint = 0 + ! the last unconverged interval found + prev = 0 + loop_75: do i = i1, i2 + k = 2*i + ii = i - offset + left = w( ii ) - werr( ii ) + mid = w(ii) + right = w( ii ) + werr( ii ) + width = right - mid + tmp = max( abs( left ), abs( right ) ) + ! the following test prevents the test of converged intervals + if( width=i1).and.(i<=i2)) iwork( 2*prev-1 ) = i + 1 + else + ! unconverged interval found + prev = i + ! make sure that [left,right] contains the desired eigenvalue + ! do while( cnt(left)>i-1 ) + fac = one + 20 continue + cnt = 0 + s = left + dplus = d( 1 ) - s + if( dplusi-1 ) then + left = left - werr( ii )*fac + fac = two*fac + go to 20 + end if + ! do while( cnt(right)0 ), i.e. there are still unconverged intervals + ! and while (iter=i1) iwork( 2*prev-1 ) = next + end if + i = next + cycle loop_100 + end if + prev = i + ! perform one bisection step + cnt = 0 + s = mid + dplus = d( 1 ) - s + if( dplus0 ).and.(iter<=maxitr) ) go to 80 + ! at this point, all the intervals have converged + do i = savi1, ilast + k = 2*i + ii = i - offset + ! all intervals marked by '0' have been refined. + if( iwork( k-1 )==0 ) then + w( ii ) = half*( work( k-1 )+work( k ) ) + werr( ii ) = work( k ) - w( ii ) + end if + end do + return + end subroutine stdlib_dlarrj + + !> DLARRK: computes one eigenvalue of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from DSTEMR. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_dlarrk( n, iw, gl, gu,d, e2, pivmin, reltol, w, werr, info) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: iw, n + real(dp), intent(in) :: pivmin, reltol, gl, gu + real(dp), intent(out) :: w, werr + ! Array Arguments + real(dp), intent(in) :: d(*), e2(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: fudge = two + + ! Local Scalars + integer(ilp) :: i, it, itmax, negcnt + real(dp) :: atoli, eps, left, mid, right, rtoli, tmp1, tmp2, tnorm + ! Intrinsic Functions + intrinsic :: abs,int,log,max + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + info = 0 + return + end if + ! get machine constants + eps = stdlib_dlamch( 'P' ) + tnorm = max( abs( gl ), abs( gu ) ) + rtoli = reltol + atoli = fudge*two*pivmin + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + info = -1 + left = gl - fudge*tnorm*eps*n - fudge*two*pivmin + right = gu + fudge*tnorm*eps*n + fudge*two*pivmin + it = 0 + 10 continue + ! check if interval converged or maximum number of iterations reached + tmp1 = abs( right - left ) + tmp2 = max( abs(right), abs(left) ) + if( tmp1itmax)goto 30 + ! count number of negative pivots for mid-point + it = it + 1 + mid = half * (left + right) + negcnt = 0 + tmp1 = d( 1 ) - mid + if( abs( tmp1 )=iw) then + right = mid + else + left = mid + endif + goto 10 + 30 continue + ! converged or maximum number of iterations reached + w = half * (left + right) + werr = half * abs( right - left ) + return + end subroutine stdlib_dlarrk + + !> Perform tests to decide whether the symmetric tridiagonal matrix T + !> warrants expensive computations which guarantee high relative accuracy + !> in the eigenvalues. + + pure subroutine stdlib_dlarrr( n, d, e, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: d(*) + real(dp), intent(inout) :: e(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: relcond = 0.999_dp + + ! Local Scalars + integer(ilp) :: i + logical(lk) :: yesrel + real(dp) :: eps, safmin, smlnum, rmin, tmp, tmp2, offdig, offdig2 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + info = 0 + return + end if + ! as a default, do not go for relative-accuracy preserving computations. + info = 1 + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + rmin = sqrt( smlnum ) + ! tests for relative accuracy + ! test for scaled diagonal dominance + ! scale the diagonal entries to one and check whether the sum of the + ! off-diagonals is less than one + ! the sdd relative error bounds have a 1/(1- 2*x) factor in them, + ! x = max(offdig + offdig2), so when x is close to 1/2, no relative + ! accuracy is promised. in the notation of the code fragment below, + ! 1/(1 - (offdig + offdig2)) is the condition number. + ! we don't think it is worth going into "sdd mode" unless the relative + ! condition number is reasonable, not 1/macheps. + ! the threshold should be compatible with other thresholds used in the + ! code. we set offdig + offdig2 <= .999_dp =: relcond, it corresponds + ! to losing at most 3 decimal digits: 1 / (1 - (offdig + offdig2)) <= 1000 + ! instead of the current offdig + offdig2 < 1 + yesrel = .true. + offdig = zero + tmp = sqrt(abs(d(1))) + if (tmp=relcond) yesrel = .false. + if(.not.yesrel) goto 11 + tmp = tmp2 + offdig = offdig2 + end do + 11 continue + if( yesrel ) then + info = 0 + return + else + endif + ! *** more to be implemented *** + ! test if the lower bidiagonal matrix l from t = l d l^t + ! (zero shift facto) is well conditioned + ! test if the upper bidiagonal matrix u from t = u d u^t + ! (zero shift facto) is well conditioned. + ! in this case, the matrix needs to be flipped and, at the end + ! of the eigenvector computation, the flip needs to be applied + ! to the computed eigenvectors (and the support) + return + end subroutine stdlib_dlarrr + + !> ! + !> + !> DLARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -S C ] [ G ] [ 0 ] + !> where C**2 + S**2 = 1. + !> The mathematical formulas used for C and S are + !> R = sign(F) * sqrt(F**2 + G**2) + !> C = F / R + !> S = G / R + !> Hence C >= 0. The algorithm used to compute these quantities + !> incorporates scaling to avoid overflow or underflow in computing the + !> square root of the sum of squares. + !> This version is discontinuous in R at F = 0 but it returns the same + !> C and S as ZLARTG for complex inputs (F,0) and (G,0). + !> This is a more accurate version of the BLAS1 routine DROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0 and (G .ne. 0), then C=0 and S=sign(1,G) without doing any + !> floating point operations (saves work in DBDSQR when + !> there are zeros on the diagonal). + !> If F exceeds G in magnitude, C will be positive. + !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. + + pure subroutine stdlib_dlartg( f, g, c, s, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! february 2021 + ! Scalar Arguments + real(dp), intent(out) :: c, r, s + real(dp), intent(in) :: f, g + ! Local Scalars + real(dp) :: d, f1, fs, g1, gs, p, u, uu + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + f1 = abs( f ) + g1 = abs( g ) + if( g == zero ) then + c = one + s = zero + r = f + else if( f == zero ) then + c = zero + s = sign( one, g ) + r = g1 + else if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) & + then + d = sqrt( f*f + g*g ) + p = one / d + c = f1*p + s = g*sign( p, f ) + r = sign( d, f ) + else + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + fs = f*uu + gs = g*uu + d = sqrt( fs*fs + gs*gs ) + p = one / d + c = abs( fs )*p + s = gs*sign( p, f ) + r = sign( d, f )*u + end if + return + end subroutine stdlib_dlartg + + !> DLARTGP: generates a plane rotation so that + !> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + !> [ -SN CS ] [ G ] [ 0 ] + !> This is a slower, more accurate version of the Level 1 BLAS routine DROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then CS=(+/-)1 and SN=0. + !> If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1. + !> The sign is chosen so that R >= 0. + + pure subroutine stdlib_dlartgp( f, g, cs, sn, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(out) :: cs, r, sn + real(dp), intent(in) :: f, g + ! ===================================================================== + + + + ! Local Scalars + ! logical first + integer(ilp) :: count, i + real(dp) :: eps, f1, g1, safmin, safmn2, safmx2, scale + ! Intrinsic Functions + intrinsic :: abs,int,log,max,sign,sqrt + ! Save Statement + ! save first, safmx2, safmin, safmn2 + ! Data Statements + ! data first / .true. / + ! Executable Statements + ! if( first ) then + safmin = stdlib_dlamch( 'S' ) + eps = stdlib_dlamch( 'E' ) + safmn2 = stdlib_dlamch( 'B' )**int( log( safmin / eps ) /log( stdlib_dlamch( 'B' ) )& + / two,KIND=ilp) + safmx2 = one / safmn2 + ! first = .false. + ! end if + if( g==zero ) then + cs = sign( one, f ) + sn = zero + r = abs( f ) + else if( f==zero ) then + cs = zero + sn = sign( one, g ) + r = abs( g ) + else + f1 = f + g1 = g + scale = max( abs( f1 ), abs( g1 ) ) + if( scale>=safmx2 ) then + count = 0 + 10 continue + count = count + 1 + f1 = f1*safmn2 + g1 = g1*safmn2 + scale = max( abs( f1 ), abs( g1 ) ) + if( scale>=safmx2 .and. count < 20 )go to 10 + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + do i = 1, count + r = r*safmx2 + end do + else if( scale<=safmn2 ) then + count = 0 + 30 continue + count = count + 1 + f1 = f1*safmx2 + g1 = g1*safmx2 + scale = max( abs( f1 ), abs( g1 ) ) + if( scale<=safmn2 )go to 30 + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + do i = 1, count + r = r*safmn2 + end do + else + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + end if + if( r DLARTGS: generates a plane rotation designed to introduce a bulge in + !> Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD + !> problem. X and Y are the top-row entries, and SIGMA is the shift. + !> The computed CS and SN define a plane rotation satisfying + !> [ CS SN ] . [ X^2 - SIGMA ] = [ R ], + !> [ -SN CS ] [ X * Y ] [ 0 ] + !> with R nonnegative. If X^2 - SIGMA and X * Y are 0, then the + !> rotation is by PI/2. + + pure subroutine stdlib_dlartgs( x, y, sigma, cs, sn ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(out) :: cs, sn + real(dp), intent(in) :: sigma, x, y + ! =================================================================== + + ! Local Scalars + real(dp) :: r, s, thresh, w, z + thresh = stdlib_dlamch('E') + ! compute the first column of b**t*b - sigma^2*i, up to a scale + ! factor. + if( (sigma == zero .and. abs(x) < thresh) .or.(abs(x) == sigma .and. y == zero) ) & + then + z = zero + w = zero + else if( sigma == zero ) then + if( x >= zero ) then + z = x + w = y + else + z = -x + w = -y + end if + else if( abs(x) < thresh ) then + z = -sigma*sigma + w = zero + else + if( x >= zero ) then + s = one + else + s = negone + end if + z = s * (abs(x)-sigma) * (s+sigma/x) + w = s * y + end if + ! generate the rotation. + ! call stdlib_dlartgp( z, w, cs, sn, r ) might seem more natural; + ! reordering the arguments ensures that if z = 0 then the rotation + ! is by pi/2. + call stdlib_dlartgp( w, z, sn, cs, r ) + return + ! end stdlib_dlartgs + end subroutine stdlib_dlartgs + + !> DLARTV: applies a vector of real plane rotations to elements of the + !> real vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -s(i) c(i) ) ( y(i) ) + + pure subroutine stdlib_dlartv( n, x, incx, y, incy, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(dp), intent(in) :: c(*), s(*) + real(dp), intent(inout) :: x(*), y(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + real(dp) :: xi, yi + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( iy ) + x( ix ) = c( ic )*xi + s( ic )*yi + y( iy ) = c( ic )*yi - s( ic )*xi + ix = ix + incx + iy = iy + incy + ic = ic + incc + end do + return + end subroutine stdlib_dlartv + + !> DLARUV: returns a vector of n random real numbers from a uniform (0,1) + !> distribution (n <= 128). + !> This is an auxiliary routine called by DLARNV and ZLARNV. + + pure subroutine stdlib_dlaruv( iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + real(dp), intent(out) :: x(n) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + integer(ilp), parameter :: ipw2 = 4096 + real(dp), parameter :: r = one/ipw2 + + + + ! Local Scalars + integer(ilp) :: i, i1, i2, i3, i4, it1, it2, it3, it4, j + ! Local Arrays + integer(ilp) :: mm(lv,4) + ! Intrinsic Functions + intrinsic :: real,min,mod + ! Data Statements + mm(1,1:4)=[494,322,2508,2549] + mm(2,1:4)=[2637,789,3754,1145] + mm(3,1:4)=[255,1440,1766,2253] + mm(4,1:4)=[2008,752,3572,305] + mm(5,1:4)=[1253,2859,2893,3301] + mm(6,1:4)=[3344,123,307,1065] + mm(7,1:4)=[4084,1848,1297,3133] + mm(8,1:4)=[1739,643,3966,2913] + mm(9,1:4)=[3143,2405,758,3285] + mm(10,1:4)=[3468,2638,2598,1241] + mm(11,1:4)=[688,2344,3406,1197] + mm(12,1:4)=[1657,46,2922,3729] + mm(13,1:4)=[1238,3814,1038,2501] + mm(14,1:4)=[3166,913,2934,1673] + mm(15,1:4)=[1292,3649,2091,541] + mm(16,1:4)=[3422,339,2451,2753] + mm(17,1:4)=[1270,3808,1580,949] + mm(18,1:4)=[2016,822,1958,2361] + mm(19,1:4)=[154,2832,2055,1165] + mm(20,1:4)=[2862,3078,1507,4081] + mm(21,1:4)=[697,3633,1078,2725] + mm(22,1:4)=[1706,2970,3273,3305] + mm(23,1:4)=[491,637,17,3069] + mm(24,1:4)=[931,2249,854,3617] + mm(25,1:4)=[1444,2081,2916,3733] + mm(26,1:4)=[444,4019,3971,409] + mm(27,1:4)=[3577,1478,2889,2157] + mm(28,1:4)=[3944,242,3831,1361] + mm(29,1:4)=[2184,481,2621,3973] + mm(30,1:4)=[1661,2075,1541,1865] + mm(31,1:4)=[3482,4058,893,2525] + mm(32,1:4)=[657,622,736,1409] + mm(33,1:4)=[3023,3376,3992,3445] + mm(34,1:4)=[3618,812,787,3577] + mm(35,1:4)=[1267,234,2125,77] + mm(36,1:4)=[1828,641,2364,3761] + mm(37,1:4)=[164,4005,2460,2149] + mm(38,1:4)=[3798,1122,257,1449] + mm(39,1:4)=[3087,3135,1574,3005] + mm(40,1:4)=[2400,2640,3912,225] + mm(41,1:4)=[2870,2302,1216,85] + mm(42,1:4)=[3876,40,3248,3673] + mm(43,1:4)=[1905,1832,3401,3117] + mm(44,1:4)=[1593,2247,2124,3089] + mm(45,1:4)=[1797,2034,2762,1349] + mm(46,1:4)=[1234,2637,149,2057] + mm(47,1:4)=[3460,1287,2245,413] + mm(48,1:4)=[328,1691,166,65] + mm(49,1:4)=[2861,496,466,1845] + mm(50,1:4)=[1950,1597,4018,697] + mm(51,1:4)=[617,2394,1399,3085] + mm(52,1:4)=[2070,2584,190,3441] + mm(53,1:4)=[3331,1843,2879,1573] + mm(54,1:4)=[769,336,153,3689] + mm(55,1:4)=[1558,1472,2320,2941] + mm(56,1:4)=[2412,2407,18,929] + mm(57,1:4)=[2800,433,712,533] + mm(58,1:4)=[189,2096,2159,2841] + mm(59,1:4)=[287,1761,2318,4077] + mm(60,1:4)=[2045,2810,2091,721] + mm(61,1:4)=[1227,566,3443,2821] + mm(62,1:4)=[2838,442,1510,2249] + mm(63,1:4)=[209,41,449,2397] + mm(64,1:4)=[2770,1238,1956,2817] + mm(65,1:4)=[3654,1086,2201,245] + mm(66,1:4)=[3993,603,3137,1913] + mm(67,1:4)=[192,840,3399,1997] + mm(68,1:4)=[2253,3168,1321,3121] + mm(69,1:4)=[3491,1499,2271,997] + mm(70,1:4)=[2889,1084,3667,1833] + mm(71,1:4)=[2857,3438,2703,2877] + mm(72,1:4)=[2094,2408,629,1633] + mm(73,1:4)=[1818,1589,2365,981] + mm(74,1:4)=[688,2391,2431,2009] + mm(75,1:4)=[1407,288,1113,941] + mm(76,1:4)=[634,26,3922,2449] + mm(77,1:4)=[3231,512,2554,197] + mm(78,1:4)=[815,1456,184,2441] + mm(79,1:4)=[3524,171,2099,285] + mm(80,1:4)=[1914,1677,3228,1473] + mm(81,1:4)=[516,2657,4012,2741] + mm(82,1:4)=[164,2270,1921,3129] + mm(83,1:4)=[303,2587,3452,909] + mm(84,1:4)=[2144,2961,3901,2801] + mm(85,1:4)=[3480,1970,572,421] + mm(86,1:4)=[119,1817,3309,4073] + mm(87,1:4)=[3357,676,3171,2813] + mm(88,1:4)=[837,1410,817,2337] + mm(89,1:4)=[2826,3723,3039,1429] + mm(90,1:4)=[2332,2803,1696,1177] + mm(91,1:4)=[2089,3185,1256,1901] + mm(92,1:4)=[3780,184,3715,81] + mm(93,1:4)=[1700,663,2077,1669] + mm(94,1:4)=[3712,499,3019,2633] + mm(95,1:4)=[150,3784,1497,2269] + mm(96,1:4)=[2000,1631,1101,129] + mm(97,1:4)=[3375,1925,717,1141] + mm(98,1:4)=[1621,3912,51,249] + mm(99,1:4)=[3090,1398,981,3917] + mm(100,1:4)=[3765,1349,1978,2481] + mm(101,1:4)=[1149,1441,1813,3941] + mm(102,1:4)=[3146,2224,3881,2217] + mm(103,1:4)=[33,2411,76,2749] + mm(104,1:4)=[3082,1907,3846,3041] + mm(105,1:4)=[2741,3192,3694,1877] + mm(106,1:4)=[359,2786,1682,345] + mm(107,1:4)=[3316,382,124,2861] + mm(108,1:4)=[1749,37,1660,1809] + mm(109,1:4)=[185,759,3997,3141] + mm(110,1:4)=[2784,2948,479,2825] + mm(111,1:4)=[2202,1862,1141,157] + mm(112,1:4)=[2199,3802,886,2881] + mm(113,1:4)=[1364,2423,3514,3637] + mm(114,1:4)=[1244,2051,1301,1465] + mm(115,1:4)=[2020,2295,3604,2829] + mm(116,1:4)=[3160,1332,1888,2161] + mm(117,1:4)=[2785,1832,1836,3365] + mm(118,1:4)=[2772,2405,1990,361] + mm(119,1:4)=[1217,3638,2058,2685] + mm(120,1:4)=[1822,3661,692,3745] + mm(121,1:4)=[1245,327,1194,2325] + mm(122,1:4)=[2252,3660,20,3609] + mm(123,1:4)=[3904,716,3285,3821] + mm(124,1:4)=[2774,1842,2046,3537] + mm(125,1:4)=[997,3987,2107,517] + mm(126,1:4)=[2573,1368,3508,3017] + mm(127,1:4)=[1148,1848,3525,2141] + mm(128,1:4)=[545,2366,3801,1537] + ! Executable Statements + i1 = iseed( 1 ) + i2 = iseed( 2 ) + i3 = iseed( 3 ) + i4 = iseed( 4 ) + loop_10: do i = 1, min( n, lv ) + 20 continue + ! multiply the seed by i-th power of the multiplier modulo 2**48 + it4 = i4*mm( i, 4 ) + it3 = it4 / ipw2 + it4 = it4 - ipw2*it3 + it3 = it3 + i3*mm( i, 4 ) + i4*mm( i, 3 ) + it2 = it3 / ipw2 + it3 = it3 - ipw2*it2 + it2 = it2 + i2*mm( i, 4 ) + i3*mm( i, 3 ) + i4*mm( i, 2 ) + it1 = it2 / ipw2 + it2 = it2 - ipw2*it1 + it1 = it1 + i1*mm( i, 4 ) + i2*mm( i, 3 ) + i3*mm( i, 2 ) +i4*mm( i, 1 ) + it1 = mod( it1, ipw2 ) + ! convert 48-bit integer to a realnumber in the interval (0,1,KIND=dp) + x( i ) = r*( real( it1,KIND=dp)+r*( real( it2,KIND=dp)+r*( real( it3,KIND=dp)+& + r*real( it4,KIND=dp) ) ) ) + if (x( i )==1.0_dp) then + ! if a real number has n bits of precision, and the first + ! n bits of the 48-bit integer above happen to be all 1 (which + ! will occur about once every 2**n calls), then x( i ) will + ! be rounded to exactly one. + ! since x( i ) is not supposed to return exactly 0.0_dp or 1.0_dp, + ! the statistically correct thing to do in this situation is + ! simply to iterate again. + ! n.b. the case x( i ) = 0.0_dp should not be possible. + i1 = i1 + 2 + i2 = i2 + 2 + i3 = i3 + 2 + i4 = i4 + 2 + goto 20 + end if + end do loop_10 + ! return final value of seed + iseed( 1 ) = it1 + iseed( 2 ) = it2 + iseed( 3 ) = it3 + iseed( 4 ) = it4 + return + end subroutine stdlib_dlaruv + + !> DLARZ: applies a real elementary reflector H to a real M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> H is a product of k elementary reflectors as returned by DTZRZF. + + pure subroutine stdlib_dlarz( side, m, n, l, v, incv, tau, c, ldc, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, l, ldc, m, n + real(dp), intent(in) :: tau + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(in) :: v(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Executable Statements + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c + if( tau/=zero ) then + ! w( 1:n ) = c( 1, 1:n ) + call stdlib_dcopy( n, c, ldc, work, 1 ) + ! w( 1:n ) = w( 1:n ) + c( m-l+1:m, 1:n )**t * v( 1:l ) + call stdlib_dgemv( 'TRANSPOSE', l, n, one, c( m-l+1, 1 ), ldc, v,incv, one, work,& + 1 ) + ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n ) + call stdlib_daxpy( n, -tau, work, 1, c, ldc ) + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! tau * v( 1:l ) * w( 1:n )**t + call stdlib_dger( l, n, -tau, v, incv, work, 1, c( m-l+1, 1 ),ldc ) + end if + else + ! form c * h + if( tau/=zero ) then + ! w( 1:m ) = c( 1:m, 1 ) + call stdlib_dcopy( m, c, 1, work, 1 ) + ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l ) + call stdlib_dgemv( 'NO TRANSPOSE', m, l, one, c( 1, n-l+1 ), ldc,v, incv, one, & + work, 1 ) + ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m ) + call stdlib_daxpy( m, -tau, work, 1, c, 1 ) + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! tau * w( 1:m ) * v( 1:l )**t + call stdlib_dger( m, l, -tau, work, 1, v, incv, c( 1, n-l+1 ),ldc ) + end if + end if + return + end subroutine stdlib_dlarz + + !> DLARZB: applies a real block reflector H or its transpose H**T to + !> a real distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_dlarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*), t(ldt,*), v(ldv,*) + real(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, info, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -3 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLARZB', -info ) + return + end if + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'T' + else + transt = 'N' + end if + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c + ! w( 1:n, 1:k ) = c( 1:k, 1:n )**t + do j = 1, k + call stdlib_dcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ... + ! c( m-l+1:m, 1:n )**t * v( 1:k, 1:l )**t + if( l>0 )call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, l, one,c( m-l+1, 1 ), & + ldc, v, ldv, one, work, ldwork ) + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t or w( 1:m, 1:k ) * t + call stdlib_dtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k, one, t,ldt, work, & + ldwork ) + ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**t + do j = 1, n + do i = 1, k + c( i, j ) = c( i, j ) - work( j, i ) + end do + end do + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! v( 1:k, 1:l )**t * w( 1:n, 1:k )**t + if( l>0 )call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE', l, n, k, -one, v, ldv,work, & + ldwork, one, c( m-l+1, 1 ), ldc ) + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t + ! w( 1:m, 1:k ) = c( 1:m, 1:k ) + do j = 1, k + call stdlib_dcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ... + ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**t + if( l>0 )call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, l, one,c( 1, n-l+1 ),& + ldc, v, ldv, one, work, ldwork ) + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * t or w( 1:m, 1:k ) * t**t + call stdlib_dtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k, one, t,ldt, work, & + ldwork ) + ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k ) + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! w( 1:m, 1:k ) * v( 1:k, 1:l ) + if( l>0 )call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, l, k, -one,work, & + ldwork, v, ldv, one, c( 1, n-l+1 ), ldc ) + end if + return + end subroutine stdlib_dlarzb + + !> DLARZT: forms the triangular factor T of a real block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**T + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**T * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_dlarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + real(dp), intent(out) :: t(ldt,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + ! Executable Statements + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -1 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLARZT', -info ) + return + end if + do i = k, 1, -1 + if( tau( i )==zero ) then + ! h(i) = i + do j = i, k + t( j, i ) = zero + end do + else + ! general case + if( i DLAS2: computes the singular values of the 2-by-2 matrix + !> [ F G ] + !> [ 0 H ]. + !> On return, SSMIN is the smaller singular value and SSMAX is the + !> larger singular value. + + pure subroutine stdlib_dlas2( f, g, h, ssmin, ssmax ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: f, g, h + real(dp), intent(out) :: ssmax, ssmin + ! ==================================================================== + + + + ! Local Scalars + real(dp) :: as, at, au, c, fa, fhmn, fhmx, ga, ha + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + fa = abs( f ) + ga = abs( g ) + ha = abs( h ) + fhmn = min( fa, ha ) + fhmx = max( fa, ha ) + if( fhmn==zero ) then + ssmin = zero + if( fhmx==zero ) then + ssmax = ga + else + ssmax = max( fhmx, ga )*sqrt( one+( min( fhmx, ga ) / max( fhmx, ga ) )**2 ) + + end if + else + if( ga This subroutine computes the square root of the I-th eigenvalue + !> of a positive symmetric rank-one modification of a 2-by-2 diagonal + !> matrix + !> diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal entries in the array D are assumed to satisfy + !> 0 <= D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + + pure subroutine stdlib_dlasd5( i, d, z, delta, rho, dsigma, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i + real(dp), intent(out) :: dsigma + real(dp), intent(in) :: rho + ! Array Arguments + real(dp), intent(in) :: d(2), z(2) + real(dp), intent(out) :: delta(2), work(2) + ! ===================================================================== + + ! Local Scalars + real(dp) :: b, c, del, delsq, tau, w + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + del = d( 2 ) - d( 1 ) + delsq = del*( d( 2 )+d( 1 ) ) + if( i==1 ) then + w = one + four*rho*( z( 2 )*z( 2 ) / ( d( 1 )+three*d( 2 ) )-z( 1 )*z( 1 ) / ( & + three*d( 1 )+d( 2 ) ) ) / del + if( w>zero ) then + b = delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 1 )*z( 1 )*delsq + ! b > zero, always + ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 ) + tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) + ! the following tau is dsigma - d( 1 ) + tau = tau / ( d( 1 )+sqrt( d( 1 )*d( 1 )+tau ) ) + dsigma = d( 1 ) + tau + delta( 1 ) = -tau + delta( 2 ) = del - tau + work( 1 ) = two*d( 1 ) + tau + work( 2 ) = ( d( 1 )+tau ) + d( 2 ) + ! delta( 1 ) = -z( 1 ) / tau + ! delta( 2 ) = z( 2 ) / ( del-tau ) + else + b = -delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*delsq + ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) + if( b>zero ) then + tau = -two*c / ( b+sqrt( b*b+four*c ) ) + else + tau = ( b-sqrt( b*b+four*c ) ) / two + end if + ! the following tau is dsigma - d( 2 ) + tau = tau / ( d( 2 )+sqrt( abs( d( 2 )*d( 2 )+tau ) ) ) + dsigma = d( 2 ) + tau + delta( 1 ) = -( del+tau ) + delta( 2 ) = -tau + work( 1 ) = d( 1 ) + tau + d( 2 ) + work( 2 ) = two*d( 2 ) + tau + ! delta( 1 ) = -z( 1 ) / ( del+tau ) + ! delta( 2 ) = -z( 2 ) / tau + end if + ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + ! delta( 1 ) = delta( 1 ) / temp + ! delta( 2 ) = delta( 2 ) / temp + else + ! now i=2 + b = -delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*delsq + ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) + if( b>zero ) then + tau = ( b+sqrt( b*b+four*c ) ) / two + else + tau = two*c / ( -b+sqrt( b*b+four*c ) ) + end if + ! the following tau is dsigma - d( 2 ) + tau = tau / ( d( 2 )+sqrt( d( 2 )*d( 2 )+tau ) ) + dsigma = d( 2 ) + tau + delta( 1 ) = -( del+tau ) + delta( 2 ) = -tau + work( 1 ) = d( 1 ) + tau + d( 2 ) + work( 2 ) = two*d( 2 ) + tau + ! delta( 1 ) = -z( 1 ) / ( del+tau ) + ! delta( 2 ) = -z( 2 ) / tau + ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + ! delta( 1 ) = delta( 1 ) / temp + ! delta( 2 ) = delta( 2 ) / temp + end if + return + end subroutine stdlib_dlasd5 + + !> DLASDT: creates a tree of subproblems for bidiagonal divide and + !> conquer. + + pure subroutine stdlib_dlasdt( n, lvl, nd, inode, ndiml, ndimr, msub ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: lvl, nd + integer(ilp), intent(in) :: msub, n + ! Array Arguments + integer(ilp), intent(out) :: inode(*), ndiml(*), ndimr(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, il, ir, llst, maxn, ncrnt, nlvl + real(dp) :: temp + ! Intrinsic Functions + intrinsic :: real,int,log,max + ! Executable Statements + ! find the number of levels on the tree. + maxn = max( 1, n ) + temp = log( real( maxn,KIND=dp) / real( msub+1,KIND=dp) ) / log( two ) + lvl = int( temp,KIND=ilp) + 1 + i = n / 2 + inode( 1 ) = i + 1 + ndiml( 1 ) = i + ndimr( 1 ) = n - i - 1 + il = 0 + ir = 1 + llst = 1 + do nlvl = 1, lvl - 1 + ! constructing the tree at (nlvl+1)-st level. the number of + ! nodes created on this level is llst * 2. + do i = 0, llst - 1 + il = il + 2 + ir = ir + 2 + ncrnt = llst + i + ndiml( il ) = ndiml( ncrnt ) / 2 + ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1 + inode( il ) = inode( ncrnt ) - ndimr( il ) - 1 + ndiml( ir ) = ndimr( ncrnt ) / 2 + ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1 + inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1 + end do + llst = llst*2 + end do + nd = llst*2 - 1 + return + end subroutine stdlib_dlasdt + + !> DLASET: initializes an m-by-n matrix A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + + pure subroutine stdlib_dlaset( uplo, m, n, alpha, beta, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(in) :: alpha, beta + ! Array Arguments + real(dp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + ! set the strictly upper triangular or trapezoidal part of the + ! array to alpha. + do j = 2, n + do i = 1, min( j-1, m ) + a( i, j ) = alpha + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + ! set the strictly lower triangular or trapezoidal part of the + ! array to alpha. + do j = 1, min( m, n ) + do i = j + 1, m + a( i, j ) = alpha + end do + end do + else + ! set the leading m-by-n submatrix to alpha. + do j = 1, n + do i = 1, m + a( i, j ) = alpha + end do + end do + end if + ! set the first min(m,n) diagonal elements to beta. + do i = 1, min( m, n ) + a( i, i ) = beta + end do + return + end subroutine stdlib_dlaset + + !> DLASQ4: computes an approximation TAU to the smallest eigenvalue + !> using values of d from the previous transform. + + pure subroutine stdlib_dlasq4( i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn,dn1, dn2, tau, & + ttype, g ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i0, n0, n0in, pp + integer(ilp), intent(out) :: ttype + real(dp), intent(in) :: dmin, dmin1, dmin2, dn, dn1, dn2 + real(dp), intent(inout) :: g + real(dp), intent(out) :: tau + ! Array Arguments + real(dp), intent(in) :: z(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: cnst1 = 0.5630_dp + real(dp), parameter :: cnst2 = 1.010_dp + real(dp), parameter :: cnst3 = 1.050_dp + real(dp), parameter :: qurtr = 0.250_dp + real(dp), parameter :: third = 0.3330_dp + real(dp), parameter :: hundrd = 100.0_dp + + + ! Local Scalars + integer(ilp) :: i4, nn, np + real(dp) :: a2, b1, b2, gam, gap1, gap2, s + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! a negative dmin forces the shift to take that absolute value + ! ttype records the type of shift. + if( dmin<=zero ) then + tau = -dmin + ttype = -1 + return + end if + nn = 4*n0 + pp + if( n0in==n0 ) then + ! no eigenvalues deflated. + if( dmin==dn .or. dmin==dn1 ) then + b1 = sqrt( z( nn-3 ) )*sqrt( z( nn-5 ) ) + b2 = sqrt( z( nn-7 ) )*sqrt( z( nn-9 ) ) + a2 = z( nn-7 ) + z( nn-5 ) + ! cases 2 and 3. + if( dmin==dn .and. dmin1==dn1 ) then + gap2 = dmin2 - a2 - dmin2*qurtr + if( gap2>zero .and. gap2>b2 ) then + gap1 = a2 - dn - ( b2 / gap2 )*b2 + else + gap1 = a2 - dn - ( b1+b2 ) + end if + if( gap1>zero .and. gap1>b1 ) then + s = max( dn-( b1 / gap1 )*b1, half*dmin ) + ttype = -2 + else + s = zero + if( dn>b1 )s = dn - b1 + if( a2>( b1+b2 ) )s = min( s, a2-( b1+b2 ) ) + s = max( s, third*dmin ) + ttype = -3 + end if + else + ! case 4. + ttype = -4 + s = qurtr*dmin + if( dmin==dn ) then + gam = dn + a2 = zero + if( z( nn-5 ) > z( nn-7 ) )return + b2 = z( nn-5 ) / z( nn-7 ) + np = nn - 9 + else + np = nn - 2*pp + gam = dn1 + if( z( np-4 ) > z( np-2 ) )return + a2 = z( np-4 ) / z( np-2 ) + if( z( nn-9 ) > z( nn-11 ) )return + b2 = z( nn-9 ) / z( nn-11 ) + np = nn - 13 + end if + ! approximate contribution to norm squared from i < nn-1. + a2 = a2 + b2 + do i4 = np, 4*i0 - 1 + pp, -4 + if( b2==zero )go to 20 + b1 = b2 + if( z( i4 ) > z( i4-2 ) )return + b2 = b2*( z( i4 ) / z( i4-2 ) ) + a2 = a2 + b2 + if( hundrd*max( b2, b1 ) nn-2. + np = nn - 2*pp + b1 = z( np-2 ) + b2 = z( np-6 ) + gam = dn2 + if( z( np-8 )>b2 .or. z( np-4 )>b1 )return + a2 = ( z( np-8 ) / b2 )*( one+z( np-4 ) / b1 ) + ! approximate contribution to norm squared from i < nn-2. + if( n0-i0>2 ) then + b2 = z( nn-13 ) / z( nn-15 ) + a2 = a2 + b2 + do i4 = nn - 17, 4*i0 - 1 + pp, -4 + if( b2==zero )go to 40 + b1 = b2 + if( z( i4 ) > z( i4-2 ) )return + b2 = b2*( z( i4 ) / z( i4-2 ) ) + a2 = a2 + b2 + if( hundrd*max( b2, b1 )z( nn-7 ) )return + b1 = z( nn-5 ) / z( nn-7 ) + b2 = b1 + if( b2==zero )go to 60 + do i4 = 4*n0 - 9 + pp, 4*i0 - 1 + pp, -4 + a2 = b1 + if( z( i4 )>z( i4-2 ) )return + b1 = b1*( z( i4 ) / z( i4-2 ) ) + b2 = b2 + b1 + if( hundrd*max( b1, a2 )zero .and. gap2>b2*a2 ) then + s = max( s, a2*( one-cnst2*a2*( b2 / gap2 )*b2 ) ) + else + s = max( s, a2*( one-cnst2*b2 ) ) + ttype = -8 + end if + else + ! case 9. + s = qurtr*dmin1 + if( dmin1==dn1 )s = half*dmin1 + ttype = -9 + end if + else if( n0in==( n0+2 ) ) then + ! two eigenvalues deflated. use dmin2, dn2 for dmin and dn. + ! cases 10 and 11. + if( dmin2==dn2 .and. two*z( nn-5 )z( nn-7 ) )return + b1 = z( nn-5 ) / z( nn-7 ) + b2 = b1 + if( b2==zero )go to 80 + do i4 = 4*n0 - 9 + pp, 4*i0 - 1 + pp, -4 + if( z( i4 )>z( i4-2 ) )return + b1 = b1*( z( i4 ) / z( i4-2 ) ) + b2 = b2 + b1 + if( hundrd*b1zero .and. gap2>b2*a2 ) then + s = max( s, a2*( one-cnst2*a2*( b2 / gap2 )*b2 ) ) + else + s = max( s, a2*( one-cnst2*b2 ) ) + end if + else + s = qurtr*dmin2 + ttype = -11 + end if + else if( n0in>( n0+2 ) ) then + ! case 12, more than two eigenvalues deflated. no information. + s = zero + ttype = -12 + end if + tau = s + return + end subroutine stdlib_dlasq4 + + !> DLASQ5: computes one dqds transform in ping-pong form, one + !> version for IEEE machines another for non IEEE machines. + + pure subroutine stdlib_dlasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2,dn, dnm1, dnm2, & + ieee, eps ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0, n0, pp + real(dp), intent(out) :: dmin, dmin1, dmin2, dn, dnm1, dnm2 + real(dp), intent(inout) :: tau + real(dp), intent(in) :: sigma, eps + ! Array Arguments + real(dp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j4, j4p2 + real(dp) :: d, emin, temp, dthresh + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( ( n0-i0-1 )<=0 )return + dthresh = eps*(sigma+tau) + if( tau DLASQ6: computes one dqd (shift equal to zero) transform in + !> ping-pong form, with protection against underflow and overflow. + + pure subroutine stdlib_dlasq6( i0, n0, z, pp, dmin, dmin1, dmin2, dn,dnm1, dnm2 ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i0, n0, pp + real(dp), intent(out) :: dmin, dmin1, dmin2, dn, dnm1, dnm2 + ! Array Arguments + real(dp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j4, j4p2 + real(dp) :: d, emin, safmin, temp + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( ( n0-i0-1 )<=0 )return + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + j4 = 4*i0 + pp - 3 + emin = z( j4+4 ) + d = z( j4 ) + dmin = d + if( pp==0 ) then + do j4 = 4*i0, 4*( n0-3 ), 4 + z( j4-2 ) = d + z( j4-1 ) + if( z( j4-2 )==zero ) then + z( j4 ) = zero + d = z( j4+1 ) + dmin = d + emin = zero + else if( safmin*z( j4+1 ) DLASR: applies a sequence of plane rotations to a real matrix A, + !> from either the left or the right. + !> When SIDE = 'L', the transformation takes the form + !> A := P*A + !> and when SIDE = 'R', the transformation takes the form + !> A := A*P**T + !> where P is an orthogonal matrix consisting of a sequence of z plane + !> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + !> and P**T is the transpose of P. + !> When DIRECT = 'F' (Forward sequence), then + !> P = P(z-1) * ... * P(2) * P(1) + !> and when DIRECT = 'B' (Backward sequence), then + !> P = P(1) * P(2) * ... * P(z-1) + !> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + !> R(k) = ( c(k) s(k) ) + !> = ( -s(k) c(k) ). + !> When PIVOT = 'V' (Variable pivot), the rotation is performed + !> for the plane (k,k+1), i.e., P(k) has the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears as a rank-2 modification to the identity matrix in + !> rows and columns k and k+1. + !> When PIVOT = 'T' (Top pivot), the rotation is performed for the + !> plane (1,k+1), so P(k) has the form + !> P(k) = ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears in rows and columns 1 and k+1. + !> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + !> performed for the plane (k,z), giving P(k) the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> where R(k) appears in rows and columns k and z. The rotations are + !> performed without ever forming P(k) explicitly. + + pure subroutine stdlib_dlasr( side, pivot, direct, m, n, c, s, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, pivot, side + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: c(*), s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + real(dp) :: ctemp, stemp, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.( stdlib_lsame( side, 'L' ) .or. stdlib_lsame( side, 'R' ) ) ) then + info = 1 + else if( .not.( stdlib_lsame( pivot, 'V' ) .or. stdlib_lsame( pivot,'T' ) .or. & + stdlib_lsame( pivot, 'B' ) ) ) then + info = 2 + else if( .not.( stdlib_lsame( direct, 'F' ) .or. stdlib_lsame( direct, 'B' ) ) )& + then + info = 3 + else if( m<0 ) then + info = 4 + else if( n<0 ) then + info = 5 + else if( lda Sort the numbers in D in increasing order (if ID = 'I') or + !> in decreasing order (if ID = 'D' ). + !> Use Quick Sort, reverting to Insertion sort on arrays of + !> size <= 20. Dimension of STACK limits N to about 2**32. + + pure subroutine stdlib_dlasrt( id, n, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: id + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: d(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: select = 20 + + ! Local Scalars + integer(ilp) :: dir, endd, i, j, start, stkpnt + real(dp) :: d1, d2, d3, dmnmx, tmp + ! Local Arrays + integer(ilp) :: stack(2,32) + ! Executable Statements + ! test the input parameters. + info = 0 + dir = -1 + if( stdlib_lsame( id, 'D' ) ) then + dir = 0 + else if( stdlib_lsame( id, 'I' ) ) then + dir = 1 + end if + if( dir==-1 ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLASRT', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + stkpnt = 1 + stack( 1, 1 ) = 1 + stack( 2, 1 ) = n + 10 continue + start = stack( 1, stkpnt ) + endd = stack( 2, stkpnt ) + stkpnt = stkpnt - 1 + if( endd-start<=select .and. endd-start>0 ) then + ! do insertion sort on d( start:endd ) + if( dir==0 ) then + ! sort into decreasing order + loop_30: do i = start + 1, endd + do j = i, start + 1, -1 + if( d( j )>d( j-1 ) ) then + dmnmx = d( j ) + d( j ) = d( j-1 ) + d( j-1 ) = dmnmx + else + cycle loop_30 + end if + end do + end do loop_30 + else + ! sort into increasing order + loop_50: do i = start + 1, endd + do j = i, start + 1, -1 + if( d( j )select ) then + ! partition d( start:endd ) and stack parts, largest one first + ! choose partition entry as median of 3 + d1 = d( start ) + d2 = d( endd ) + i = ( start+endd ) / 2 + d3 = d( i ) + if( d1dmnmx )go to 80 + if( iendd-j-1 ) then + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + else + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + end if + else + ! sort into increasing order + i = start - 1 + j = endd + 1 + 90 continue + 100 continue + j = j - 1 + if( d( j )>dmnmx )go to 100 + 110 continue + i = i + 1 + if( d( i )endd-j-1 ) then + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + else + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + end if + end if + end if + if( stkpnt>0 )go to 10 + return + end subroutine stdlib_dlasrt + + !> ! + !> + !> DLASSQ: returns the values scl and smsq such that + !> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + !> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + !> assumed to be non-negative. + !> scale and sumsq must be supplied in SCALE and SUMSQ and + !> scl and smsq are overwritten on SCALE and SUMSQ respectively. + !> If scale * sqrt( sumsq ) > tbig then + !> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, + !> and if 0 < scale * sqrt( sumsq ) < tsml then + !> we require: scale <= sqrt( HUGE ) / ssml on entry, + !> where + !> tbig -- upper threshold for values whose square is representable; + !> sbig -- scaling constant for big numbers; \see la_constants.f90 + !> tsml -- lower threshold for values whose square is representable; + !> ssml -- scaling constant for small numbers; \see la_constants.f90 + !> and + !> TINY*EPS -- tiniest representable number; + !> HUGE -- biggest representable number. + + pure subroutine stdlib_dlassq( n, x, incx, scl, sumsq ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(dp), intent(inout) :: scl, sumsq + ! Array Arguments + real(dp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(dp) :: abig, amed, asml, ax, ymax, ymin + ! quick return if possible + if( ieee_is_nan(scl) .or. ieee_is_nan(sumsq) ) return + if( sumsq == zero ) scl = one + if( scl == zero ) then + scl = one + sumsq = zero + end if + if (n <= 0) then + return + end if + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(x(ix)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! put the existing sum of squares into one of the accumulators + if( sumsq > zero ) then + ax = scl*sqrt( sumsq ) + if (ax > tbig) then + ! we assume scl >= sqrt( tiny*eps ) / sbig + abig = abig + (scl*sbig)**2 * sumsq + else if (ax < tsml) then + ! we assume scl <= sqrt( huge ) / ssml + if (notbig) asml = asml + (scl*ssml)**2 * sumsq + else + amed = amed + scl**2 * sumsq + end if + end if + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range or zero + scl = one + sumsq = amed + end if + return + end subroutine stdlib_dlassq + + !> DLASV2: computes the singular value decomposition of a 2-by-2 + !> triangular matrix + !> [ F G ] + !> [ 0 H ]. + !> On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the + !> smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and + !> right singular vectors for abs(SSMAX), giving the decomposition + !> [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] + !> [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. + + pure subroutine stdlib_dlasv2( f, g, h, ssmin, ssmax, snr, csr, snl, csl ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(out) :: csl, csr, snl, snr, ssmax, ssmin + real(dp), intent(in) :: f, g, h + ! ===================================================================== + + + + + + ! Local Scalars + logical(lk) :: gasmal, swap + integer(ilp) :: pmax + real(dp) :: a, clt, crt, d, fa, ft, ga, gt, ha, ht, l, m, mm, r, s, slt, srt, t, temp, & + tsign, tt + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ft = f + fa = abs( ft ) + ht = h + ha = abs( h ) + ! pmax points to the maximum absolute element of matrix + ! pmax = 1 if f largest in absolute values + ! pmax = 2 if g largest in absolute values + ! pmax = 3 if h largest in absolute values + pmax = 1 + swap = ( ha>fa ) + if( swap ) then + pmax = 3 + temp = ft + ft = ht + ht = temp + temp = fa + fa = ha + ha = temp + ! now fa .ge. ha + end if + gt = g + ga = abs( gt ) + if( ga==zero ) then + ! diagonal matrix + ssmin = ha + ssmax = fa + clt = one + crt = one + slt = zero + srt = zero + else + gasmal = .true. + if( ga>fa ) then + pmax = 2 + if( ( fa / ga )one ) then + ssmin = fa / ( ga / ha ) + else + ssmin = ( fa / ga )*ha + end if + clt = one + slt = ht / gt + srt = one + crt = ft / gt + end if + end if + if( gasmal ) then + ! normal case + d = fa - ha + if( d==fa ) then + ! copes with infinite f or h + l = one + else + l = d / fa + end if + ! note that 0 .le. l .le. 1 + m = gt / ft + ! note that abs(m) .le. 1/macheps + t = two - l + ! note that t .ge. 1 + mm = m*m + tt = t*t + s = sqrt( tt+mm ) + ! note that 1 .le. s .le. 1 + 1/macheps + if( l==zero ) then + r = abs( m ) + else + r = sqrt( l*l+mm ) + end if + ! note that 0 .le. r .le. 1 + 1/macheps + a = half*( s+r ) + ! note that 1 .le. a .le. 1 + abs(m) + ssmin = ha / a + ssmax = fa*a + if( mm==zero ) then + ! note that m is very tiny + if( l==zero ) then + t = sign( two, ft )*sign( one, gt ) + else + t = gt / sign( d, ft ) + m / t + end if + else + t = ( m / ( s+t )+m / ( r+l ) )*( one+a ) + end if + l = sqrt( t*t+four ) + crt = two / l + srt = t / l + clt = ( crt+srt*m ) / a + slt = ( ht / ft )*srt / a + end if + end if + if( swap ) then + csl = srt + snl = crt + csr = slt + snr = clt + else + csl = clt + snl = slt + csr = crt + snr = srt + end if + ! correct signs of ssmax and ssmin + if( pmax==1 )tsign = sign( one, csr )*sign( one, csl )*sign( one, f ) + if( pmax==2 )tsign = sign( one, snr )*sign( one, csl )*sign( one, g ) + if( pmax==3 )tsign = sign( one, snr )*sign( one, snl )*sign( one, h ) + ssmax = sign( ssmax, tsign ) + ssmin = sign( ssmin, tsign*sign( one, f )*sign( one, h ) ) + return + end subroutine stdlib_dlasv2 + + !> DLASWP: performs a series of row interchanges on the matrix A. + !> One row interchange is initiated for each of rows K1 through K2 of A. + + pure subroutine stdlib_dlaswp( n, a, lda, k1, k2, ipiv, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k1, k2, lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32 + real(dp) :: temp + ! Executable Statements + ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows + ! k1 through k2. + if( incx>0 ) then + ix0 = k1 + i1 = k1 + i2 = k2 + inc = 1 + else if( incx<0 ) then + ix0 = k1 + ( k1-k2 )*incx + i1 = k2 + i2 = k1 + inc = -1 + else + return + end if + n32 = ( n / 32 )*32 + if( n32/=0 ) then + do j = 1, n32, 32 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = j, j + 31 + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end do + end if + if( n32/=n ) then + n32 = n32 + 1 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = n32, n + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end if + return + end subroutine stdlib_dlaswp + + !> DLASY2: solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in + !> op(TL)*X + ISGN*X*op(TR) = SCALE*B, + !> where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or + !> -1. op(T) = T or T**T, where T**T denotes the transpose of T. + + pure subroutine stdlib_dlasy2( ltranl, ltranr, isgn, n1, n2, tl, ldtl, tr,ldtr, b, ldb, & + scale, x, ldx, xnorm, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ltranl, ltranr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, ldb, ldtl, ldtr, ldx, n1, n2 + real(dp), intent(out) :: scale, xnorm + ! Array Arguments + real(dp), intent(in) :: b(ldb,*), tl(ldtl,*), tr(ldtr,*) + real(dp), intent(out) :: x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: bswap, xswap + integer(ilp) :: i, ip, ipiv, ipsv, j, jp, jpsv, k + real(dp) :: bet, eps, gam, l21, sgn, smin, smlnum, tau1, temp, u11, u12, u22, & + xmax + ! Local Arrays + logical(lk) :: bswpiv(4), xswpiv(4) + integer(ilp) :: jpiv(4), locl21(4), locu12(4), locu22(4) + real(dp) :: btmp(4), t16(4,4), tmp(4), x2(2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Data Statements + locu12 = [3,4,1,2] + locl21 = [2,1,4,3] + locu22 = [4,3,2,1] + xswpiv = [.false.,.false.,.true.,.true.] + bswpiv = [.false.,.true.,.false.,.true.] + ! Executable Statements + ! do not check the input parameters for errors + info = 0 + ! quick return if possible + if( n1==0 .or. n2==0 )return + ! set constants to control overflow + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + sgn = isgn + k = n1 + n1 + n2 - 2 + go to ( 10, 20, 30, 50 )k + ! 1 by 1: tl11*x + sgn*x*tr11 = b11 + 10 continue + tau1 = tl( 1, 1 ) + sgn*tr( 1, 1 ) + bet = abs( tau1 ) + if( bet<=smlnum ) then + tau1 = smlnum + bet = smlnum + info = 1 + end if + scale = one + gam = abs( b( 1, 1 ) ) + if( smlnum*gam>bet )scale = one / gam + x( 1, 1 ) = ( b( 1, 1 )*scale ) / tau1 + xnorm = abs( x( 1, 1 ) ) + return + ! 1 by 2: + ! tl11*[x11 x12] + isgn*[x11 x12]*op[tr11 tr12] = [b11 b12] + ! [tr21 tr22] + 20 continue + smin = max( eps*max( abs( tl( 1, 1 ) ), abs( tr( 1, 1 ) ),abs( tr( 1, 2 ) ), abs( tr( & + 2, 1 ) ), abs( tr( 2, 2 ) ) ),smlnum ) + tmp( 1 ) = tl( 1, 1 ) + sgn*tr( 1, 1 ) + tmp( 4 ) = tl( 1, 1 ) + sgn*tr( 2, 2 ) + if( ltranr ) then + tmp( 2 ) = sgn*tr( 2, 1 ) + tmp( 3 ) = sgn*tr( 1, 2 ) + else + tmp( 2 ) = sgn*tr( 1, 2 ) + tmp( 3 ) = sgn*tr( 2, 1 ) + end if + btmp( 1 ) = b( 1, 1 ) + btmp( 2 ) = b( 1, 2 ) + go to 40 + ! 2 by 1: + ! op[tl11 tl12]*[x11] + isgn* [x11]*tr11 = [b11] + ! [tl21 tl22] [x21] [x21] [b21] + 30 continue + smin = max( eps*max( abs( tr( 1, 1 ) ), abs( tl( 1, 1 ) ),abs( tl( 1, 2 ) ), abs( tl( & + 2, 1 ) ), abs( tl( 2, 2 ) ) ),smlnum ) + tmp( 1 ) = tl( 1, 1 ) + sgn*tr( 1, 1 ) + tmp( 4 ) = tl( 2, 2 ) + sgn*tr( 1, 1 ) + if( ltranl ) then + tmp( 2 ) = tl( 1, 2 ) + tmp( 3 ) = tl( 2, 1 ) + else + tmp( 2 ) = tl( 2, 1 ) + tmp( 3 ) = tl( 1, 2 ) + end if + btmp( 1 ) = b( 1, 1 ) + btmp( 2 ) = b( 2, 1 ) + 40 continue + ! solve 2 by 2 system using complete pivoting. + ! set pivots less than smin to smin. + ipiv = stdlib_idamax( 4, tmp, 1 ) + u11 = tmp( ipiv ) + if( abs( u11 )<=smin ) then + info = 1 + u11 = smin + end if + u12 = tmp( locu12( ipiv ) ) + l21 = tmp( locl21( ipiv ) ) / u11 + u22 = tmp( locu22( ipiv ) ) - u12*l21 + xswap = xswpiv( ipiv ) + bswap = bswpiv( ipiv ) + if( abs( u22 )<=smin ) then + info = 1 + u22 = smin + end if + if( bswap ) then + temp = btmp( 2 ) + btmp( 2 ) = btmp( 1 ) - l21*temp + btmp( 1 ) = temp + else + btmp( 2 ) = btmp( 2 ) - l21*btmp( 1 ) + end if + scale = one + if( ( two*smlnum )*abs( btmp( 2 ) )>abs( u22 ) .or.( two*smlnum )*abs( btmp( 1 ) )>abs(& + u11 ) ) then + scale = half / max( abs( btmp( 1 ) ), abs( btmp( 2 ) ) ) + btmp( 1 ) = btmp( 1 )*scale + btmp( 2 ) = btmp( 2 )*scale + end if + x2( 2 ) = btmp( 2 ) / u22 + x2( 1 ) = btmp( 1 ) / u11 - ( u12 / u11 )*x2( 2 ) + if( xswap ) then + temp = x2( 2 ) + x2( 2 ) = x2( 1 ) + x2( 1 ) = temp + end if + x( 1, 1 ) = x2( 1 ) + if( n1==1 ) then + x( 1, 2 ) = x2( 2 ) + xnorm = abs( x( 1, 1 ) ) + abs( x( 1, 2 ) ) + else + x( 2, 1 ) = x2( 2 ) + xnorm = max( abs( x( 1, 1 ) ), abs( x( 2, 1 ) ) ) + end if + return + ! 2 by 2: + ! op[tl11 tl12]*[x11 x12] +isgn* [x11 x12]*op[tr11 tr12] = [b11 b12] + ! [tl21 tl22] [x21 x22] [x21 x22] [tr21 tr22] [b21 b22] + ! solve equivalent 4 by 4 system using complete pivoting. + ! set pivots less than smin to smin. + 50 continue + smin = max( abs( tr( 1, 1 ) ), abs( tr( 1, 2 ) ),abs( tr( 2, 1 ) ), abs( tr( 2, 2 ) ) ) + + smin = max( smin, abs( tl( 1, 1 ) ), abs( tl( 1, 2 ) ),abs( tl( 2, 1 ) ), abs( tl( 2, & + 2 ) ) ) + smin = max( eps*smin, smlnum ) + btmp( 1 ) = zero + call stdlib_dcopy( 16, btmp, 0, t16, 1 ) + t16( 1, 1 ) = tl( 1, 1 ) + sgn*tr( 1, 1 ) + t16( 2, 2 ) = tl( 2, 2 ) + sgn*tr( 1, 1 ) + t16( 3, 3 ) = tl( 1, 1 ) + sgn*tr( 2, 2 ) + t16( 4, 4 ) = tl( 2, 2 ) + sgn*tr( 2, 2 ) + if( ltranl ) then + t16( 1, 2 ) = tl( 2, 1 ) + t16( 2, 1 ) = tl( 1, 2 ) + t16( 3, 4 ) = tl( 2, 1 ) + t16( 4, 3 ) = tl( 1, 2 ) + else + t16( 1, 2 ) = tl( 1, 2 ) + t16( 2, 1 ) = tl( 2, 1 ) + t16( 3, 4 ) = tl( 1, 2 ) + t16( 4, 3 ) = tl( 2, 1 ) + end if + if( ltranr ) then + t16( 1, 3 ) = sgn*tr( 1, 2 ) + t16( 2, 4 ) = sgn*tr( 1, 2 ) + t16( 3, 1 ) = sgn*tr( 2, 1 ) + t16( 4, 2 ) = sgn*tr( 2, 1 ) + else + t16( 1, 3 ) = sgn*tr( 2, 1 ) + t16( 2, 4 ) = sgn*tr( 2, 1 ) + t16( 3, 1 ) = sgn*tr( 1, 2 ) + t16( 4, 2 ) = sgn*tr( 1, 2 ) + end if + btmp( 1 ) = b( 1, 1 ) + btmp( 2 ) = b( 2, 1 ) + btmp( 3 ) = b( 1, 2 ) + btmp( 4 ) = b( 2, 2 ) + ! perform elimination + loop_100: do i = 1, 3 + xmax = zero + do ip = i, 4 + do jp = i, 4 + if( abs( t16( ip, jp ) )>=xmax ) then + xmax = abs( t16( ip, jp ) ) + ipsv = ip + jpsv = jp + end if + end do + end do + if( ipsv/=i ) then + call stdlib_dswap( 4, t16( ipsv, 1 ), 4, t16( i, 1 ), 4 ) + temp = btmp( i ) + btmp( i ) = btmp( ipsv ) + btmp( ipsv ) = temp + end if + if( jpsv/=i )call stdlib_dswap( 4, t16( 1, jpsv ), 1, t16( 1, i ), 1 ) + jpiv( i ) = jpsv + if( abs( t16( i, i ) )abs( t16( 1, 1 ) ) .or.( eight*smlnum )*abs( & + btmp( 2 ) )>abs( t16( 2, 2 ) ) .or.( eight*smlnum )*abs( btmp( 3 ) )>abs( t16( 3, 3 ) )& + .or.( eight*smlnum )*abs( btmp( 4 ) )>abs( t16( 4, 4 ) ) ) then + scale = ( one / eight ) / max( abs( btmp( 1 ) ),abs( btmp( 2 ) ), abs( btmp( 3 ) ), & + abs( btmp( 4 ) ) ) + btmp( 1 ) = btmp( 1 )*scale + btmp( 2 ) = btmp( 2 )*scale + btmp( 3 ) = btmp( 3 )*scale + btmp( 4 ) = btmp( 4 )*scale + end if + do i = 1, 4 + k = 5 - i + temp = one / t16( k, k ) + tmp( k ) = btmp( k )*temp + do j = k + 1, 4 + tmp( k ) = tmp( k ) - ( temp*t16( k, j ) )*tmp( j ) + end do + end do + do i = 1, 3 + if( jpiv( 4-i )/=4-i ) then + temp = tmp( 4-i ) + tmp( 4-i ) = tmp( jpiv( 4-i ) ) + tmp( jpiv( 4-i ) ) = temp + end if + end do + x( 1, 1 ) = tmp( 1 ) + x( 2, 1 ) = tmp( 2 ) + x( 1, 2 ) = tmp( 3 ) + x( 2, 2 ) = tmp( 4 ) + xnorm = max( abs( tmp( 1 ) )+abs( tmp( 3 ) ),abs( tmp( 2 ) )+abs( tmp( 4 ) ) ) + return + end subroutine stdlib_dlasy2 + + !> DLASYF: computes a partial factorization of a real symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The partial + !> factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_dlasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(dp) :: absakk, alpha, colmax, d11, d21, d22, r1, rowmax, t + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_idamax( k-1, w( 1, kw ), 1 ) + colmax = abs( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column kw-1 of w and update it + call stdlib_dcopy( imax, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + call stdlib_dcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + if( k1 ) then + jmax = stdlib_idamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, abs( w( jmax, kw-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( w( imax, kw-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_dcopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_dcopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + if( kp>1 )call stdlib_dcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k2 ) then + ! compose the columns of the inverse of 2-by-2 pivot + ! block d in the following way to reduce the number + ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by + ! this inverse + ! d**(-1) = ( d11 d21 )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = + ! ( (-d21 ) ( d11 ) ) + ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * + ! * ( ( d22/d21 ) ( -1 ) ) = + ! ( ( -1 ) ( d11/d21 ) ) + ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) ( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = 1/d21 * t * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + ! = d21 * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / d21 + d22 = w( k-1, kw-1 ) / d21 + t = one / ( d11*d22-one ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = d21*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_dgemv( 'NO TRANSPOSE', jj-j+1, n-k, -one,a( j, k+1 ), lda, w( jj, & + kw+1 ), ldw, one,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k, -one,a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw, one,a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_dswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + ! copy column k of a to column k of w and update it + call stdlib_dcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + call stdlib_dgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ), lda,w( k, 1 ), ldw, & + one, w( k, k ), 1 ) + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column k+1 of w and update it + call stdlib_dcopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_dcopy( n-imax+1, a( imax, imax ), 1, w( imax, k+1 ),1 ) + call stdlib_dgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ),lda, w( imax, & + 1 ), ldw, one, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_idamax( imax-k, w( k, k+1 ), 1 ) + rowmax = abs( w( jmax, k+1 ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( w( imax, k+1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_dcopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_dcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + if( kp1 )call stdlib_dswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_dswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + call stdlib_dcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_dswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_dlasyf + + !> DLASYF_RK: computes a partial factorization of a real symmetric + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> DLASYF_RK is an auxiliary routine called by DSYTRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_dlasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*), w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii + real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, dtemp, r1, rowmax, t, & + sfmin + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_dlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = zero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_idamax( k-1, w( 1, kw ), 1 ) + colmax = abs( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_dcopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = zero + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_idamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = abs( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(abs( w( imax, kw-1 ) )1 ) then + if( abs( a( k, k ) )>=sfmin ) then + r1 = one / a( k, k ) + call stdlib_dscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the superdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = one / ( d11*d22-one ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = zero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = zero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_dgemv( 'NO TRANSPOSE', jj-j+1, n-k, -one,a( j, k+1 ), lda, w( jj, & + kw+1 ), ldw, one,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -one, a( & + 1, k+1 ), lda, w( j, kw+1 ),ldw, one, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = zero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_dcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_dgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ),lda, w( k, & + 1 ), ldw, one, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_dgemv( 'NO TRANSPOSE', n-k+1, k-1, -one,a( k, 1 ), & + lda, w( imax, 1 ), ldw,one, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_idamax( imax-k, w( k, k+1 ), 1 ) + rowmax = abs( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( w( imax, k+1 ) )=sfmin ) then + r1 = one / a( k, k ) + call stdlib_dscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the subdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k DLASYF_ROOK: computes a partial factorization of a real symmetric + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> DLASYF_ROOK is an auxiliary routine called by DSYTRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_dlasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, & + ii + real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, dtemp, r1, rowmax, t, & + sfmin + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_dlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_idamax( k-1, w( 1, kw ), 1 ) + colmax = abs( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_dcopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_idamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = abs( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(abs( w( imax, kw-1 ) )1 ) then + if( abs( a( k, k ) )>=sfmin ) then + r1 = one / a( k, k ) + call stdlib_dscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = one / ( d11*d22-one ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_dgemv( 'NO TRANSPOSE', jj-j+1, n-k, -one,a( j, k+1 ), lda, w( jj, & + kw+1 ), ldw, one,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -one, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,one, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n + j = k + 1 + 60 continue + kstep = 1 + jp1 = 1 + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_dswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = j - 1 + if( jp1/=jj .and. kstep==2 )call stdlib_dswap( n-j+1, a( jp1, j ), lda, a( jj, j & + ), lda ) + if( j<=n )go to 60 + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_dcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_dgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ),lda, w( k, & + 1 ), ldw, one, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_dgemv( 'NO TRANSPOSE', n-k+1, k-1, -one,a( k, 1 ), & + lda, w( imax, 1 ), ldw,one, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_idamax( imax-k, w( k, k+1 ), 1 ) + rowmax = abs( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( w( imax, k+1 ) )=sfmin ) then + r1 = one / a( k, k ) + call stdlib_dscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k=1 )call stdlib_dswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = j + 1 + if( jp1/=jj .and. kstep==2 )call stdlib_dswap( j, a( jp1, 1 ), lda, a( jj, 1 ), & + lda ) + if( j>=1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_dlasyf_rook + + !> DLAT2S: converts a DOUBLE PRECISION triangular matrix, SA, to a SINGLE + !> PRECISION triangular matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> DLAS2S checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_dlat2s( uplo, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, n + ! Array Arguments + real(sp), intent(out) :: sa(ldsa,*) + real(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: rmax + logical(lk) :: upper + ! Executable Statements + rmax = stdlib_slamch( 'O' ) + upper = stdlib_lsame( uplo, 'U' ) + if( upper ) then + do j = 1, n + do i = 1, j + if( ( a( i, j )<-rmax ) .or. ( a( i, j )>rmax ) )then + info = 1 + go to 50 + end if + sa( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = j, n + if( ( a( i, j )<-rmax ) .or. ( a( i, j )>rmax ) )then + info = 1 + go to 50 + end if + sa( i, j ) = a( i, j ) + end do + end do + end if + 50 continue + return + end subroutine stdlib_dlat2s + + !> DLATBS: solves one of the triangular systems + !> A *x = s*b or A**T*x = s*b + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular band matrix. Here A**T denotes the transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine DTBSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_dlatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: cnorm(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind + real(dp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, & + xmax + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( kd<0 ) then + info = -6 + else if( ldab0 ) then + cnorm( j ) = stdlib_dasum( jlen, ab( 2, j ), 1 ) + else + cnorm( j ) = zero + end if + end do + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum. + imax = stdlib_idamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum ) then + tscal = one + else + tscal = one / ( smlnum*tmax ) + call stdlib_dscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_dtbsv can be used. + j = stdlib_idamax( n, x, 1 ) + xmax = abs( x( j ) ) + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + maind = kd + 1 + else + jfirst = 1 + jlast = n + jinc = 1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 50 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! m(j) = g(j-1) / abs(a(j,j)) + tjj = abs( ab( maind, j ) ) + xbnd = min( xbnd, min( one, tjj )*grow ) + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 50 continue + else + ! compute the growth in a**t * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + maind = kd + 1 + else + jfirst = n + jlast = 1 + jinc = -1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 80 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + tjj = abs( ab( maind, j ) ) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 80 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_dtbsv( uplo, trans, diag, n, kd, ab, ldab, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = bignum / xmax + call stdlib_dscal( n, scale, x, 1 ) + xmax = bignum + end if + if( notran ) then + ! solve a * x = b + loop_110: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = abs( x( j ) ) + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 100 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 100 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_dscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - + ! x(j)* a(max(1,j-kd):j-1,j) + jlen = min( kd, j-1 ) + call stdlib_daxpy( jlen, -x( j )*tscal,ab( kd+1-jlen, j ), 1, x( j-jlen & + ), 1 ) + i = stdlib_idamax( j-1, x, 1 ) + xmax = abs( x( i ) ) + end if + else if( j0 )call stdlib_daxpy( jlen, -x( j )*tscal, ab( 2, j ), 1,x( j+1 ),& + 1 ) + i = j + stdlib_idamax( n-j, x( j+1 ), 1 ) + xmax = abs( x( i ) ) + end if + end do loop_110 + else + ! solve a**t * x = b + loop_160: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = abs( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + end if + tjj = abs( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = uscal / tjjs + end if + if( rec0 )sumj = stdlib_ddot( jlen, ab( 2, j ), 1, x( j+1 ), 1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + sumj = sumj + ( ab( kd+i-jlen, j )*uscal )*x( j-jlen-1+i ) + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + sumj = sumj + ( ab( i+1, j )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==tscal ) then + ! compute x(j) := ( x(j) - sumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - sumj + xj = abs( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 150 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a**t*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 150 continue + else + ! compute x(j) := x(j) / a(j,j) - sumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = x( j ) / tjjs - sumj + end if + xmax = max( xmax, abs( x( j ) ) ) + end do loop_160 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_dscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_dlatbs + + !> DLATPS: solves one of the triangular systems + !> A *x = s*b or A**T*x = s*b + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular matrix stored in packed form. Here A**T denotes the + !> transpose of A, x and b are n-element vectors, and s is a scaling + !> factor, usually less than or equal to 1, chosen so that the + !> components of x will be less than the overflow threshold. If the + !> unscaled problem will not cause overflow, the Level 2 BLAS routine + !> DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + !> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_dlatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: cnorm(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen + real(dp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, & + xmax + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLATPS', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine machine dependent parameters to control overflow. + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + bignum = one / smlnum + scale = one + if( stdlib_lsame( normin, 'N' ) ) then + ! compute the 1-norm of each column, not including the diagonal. + if( upper ) then + ! a is upper triangular. + ip = 1 + do j = 1, n + cnorm( j ) = stdlib_dasum( j-1, ap( ip ), 1 ) + ip = ip + j + end do + else + ! a is lower triangular. + ip = 1 + do j = 1, n - 1 + cnorm( j ) = stdlib_dasum( n-j, ap( ip+1 ), 1 ) + ip = ip + n - j + 1 + end do + cnorm( n ) = zero + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum. + imax = stdlib_idamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum ) then + tscal = one + else + tscal = one / ( smlnum*tmax ) + call stdlib_dscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_dtpsv can be used. + j = stdlib_idamax( n, x, 1 ) + xmax = abs( x( j ) ) + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + else + jfirst = 1 + jlast = n + jinc = 1 + end if + if( tscal/=one ) then + grow = zero + go to 50 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = n + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! m(j) = g(j-1) / abs(a(j,j)) + tjj = abs( ap( ip ) ) + xbnd = min( xbnd, min( one, tjj )*grow ) + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + ip = ip + jinc*jlen + jlen = jlen - 1 + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 50 continue + else + ! compute the growth in a**t * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 80 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + tjj = abs( ap( ip ) ) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 80 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_dtpsv( uplo, trans, diag, n, ap, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = bignum / xmax + call stdlib_dscal( n, scale, x, 1 ) + xmax = bignum + end if + if( notran ) then + ! solve a * x = b + ip = jfirst*( jfirst+1 ) / 2 + loop_110: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = abs( x( j ) ) + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + if( tscal==one )go to 100 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 100 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_dscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_daxpy( j-1, -x( j )*tscal, ap( ip-j+1 ), 1, x,1 ) + i = stdlib_idamax( j-1, x, 1 ) + xmax = abs( x( i ) ) + end if + ip = ip - j + else + if( jj + xj = abs( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + end if + tjj = abs( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = uscal / tjjs + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a**t*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 150 continue + else + ! compute x(j) := x(j) / a(j,j) - sumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = x( j ) / tjjs - sumj + end if + xmax = max( xmax, abs( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_160 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_dscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_dlatps + + !> DLATRS: solves one of the triangular systems + !> A *x = s*b or A**T *x = s*b + !> with scaling to prevent overflow. Here A is an upper or lower + !> triangular matrix, A**T denotes the transpose of A, x and b are + !> n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_dlatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: cnorm(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast + real(dp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, & + xmax + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 50 continue + else + ! compute the growth in a**t * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 80 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + tjj = abs( a( j, j ) ) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 80 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_dtrsv( uplo, trans, diag, n, a, lda, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = bignum / xmax + call stdlib_dscal( n, scale, x, 1 ) + xmax = bignum + end if + if( notran ) then + ! solve a * x = b + loop_110: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = abs( x( j ) ) + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 100 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 100 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_dscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_daxpy( j-1, -x( j )*tscal, a( 1, j ), 1, x,1 ) + i = stdlib_idamax( j-1, x, 1 ) + xmax = abs( x( i ) ) + end if + else + if( jj + xj = abs( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + end if + tjj = abs( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = uscal / tjjs + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a**t*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 150 continue + else + ! compute x(j) := x(j) / a(j,j) - sumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = x( j ) / tjjs - sumj + end if + xmax = max( xmax, abs( x( j ) ) ) + end do loop_160 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_dscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_dlatrs + + !> DLAUU2: computes the product U * U**T or L**T * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the unblocked form of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_dlauu2( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DLAUUM: computes the product U * U**T or L**T * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the blocked form of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_dlauum( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ib, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_dlauu2( uplo, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute the product u * u**t. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',i-1, ib, one, a( & + i, i ), lda, a( 1, i ),lda ) + call stdlib_dlauu2( 'UPPER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', i-1, ib,n-i-ib+1, one, a( & + 1, i+ib ), lda,a( i, i+ib ), lda, one, a( 1, i ), lda ) + call stdlib_dsyrk( 'UPPER', 'NO TRANSPOSE', ib, n-i-ib+1,one, a( i, i+ib ),& + lda, one, a( i, i ),lda ) + end if + end do + else + ! compute the product l**t * l. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_dtrmm( 'LEFT', 'LOWER', 'TRANSPOSE', 'NON-UNIT', ib,i-1, one, a( & + i, i ), lda, a( i, 1 ), lda ) + call stdlib_dlauu2( 'LOWER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', ib, i-1,n-i-ib+1, one, a( & + i+ib, i ), lda,a( i+ib, 1 ), lda, one, a( i, 1 ), lda ) + call stdlib_dsyrk( 'LOWER', 'TRANSPOSE', ib, n-i-ib+1, one,a( i+ib, i ), & + lda, one, a( i, i ), lda ) + end if + end do + end if + end if + return + end subroutine stdlib_dlauum + + !> DORBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + + pure subroutine stdlib_dorbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: alphasq = 0.01_dp + real(dp), parameter :: realone = 1.0_dp + real(dp), parameter :: realzero = 0.0_dp + + + ! Local Scalars + integer(ilp) :: i + real(dp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DORBDB6', -info ) + return + end if + ! first, project x onto the orthogonal complement of q's column + ! space + scl1 = realzero + ssq1 = realone + call stdlib_dlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_dlassq( m2, x2, incx2, scl2, ssq2 ) + normsq1 = scl1**2*ssq1 + scl2**2*ssq2 + if( m1 == 0 ) then + do i = 1, n + work(i) = zero + end do + else + call stdlib_dgemv( 'C', m1, n, one, q1, ldq1, x1, incx1, zero, work,1 ) + end if + call stdlib_dgemv( 'C', m2, n, one, q2, ldq2, x2, incx2, one, work, 1 ) + call stdlib_dgemv( 'N', m1, n, negone, q1, ldq1, work, 1, one, x1,incx1 ) + call stdlib_dgemv( 'N', m2, n, negone, q2, ldq2, work, 1, one, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_dlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_dlassq( m2, x2, incx2, scl2, ssq2 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if projection is sufficiently large in norm, then stop. + ! if projection is zero, then stop. + ! otherwise, project again. + if( normsq2 >= alphasq*normsq1 ) then + return + end if + if( normsq2 == zero ) then + return + end if + normsq1 = normsq2 + do i = 1, n + work(i) = zero + end do + if( m1 == 0 ) then + do i = 1, n + work(i) = zero + end do + else + call stdlib_dgemv( 'C', m1, n, one, q1, ldq1, x1, incx1, zero, work,1 ) + end if + call stdlib_dgemv( 'C', m2, n, one, q2, ldq2, x2, incx2, one, work, 1 ) + call stdlib_dgemv( 'N', m1, n, negone, q1, ldq1, work, 1, one, x1,incx1 ) + call stdlib_dgemv( 'N', m2, n, negone, q2, ldq2, work, 1, one, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_dlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_dlassq( m1, x1, incx1, scl1, ssq1 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if second projection is sufficiently large in norm, then do + ! nothing more. alternatively, if it shrunk significantly, then + ! truncate it to zero. + if( normsq2 < alphasq*normsq1 ) then + do i = 1, m1 + x1(i) = zero + end do + do i = 1, m2 + x2(i) = zero + end do + end if + return + end subroutine stdlib_dorbdb6 + + !> DORG2L: generates an m by n real matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. + + pure subroutine stdlib_dorg2l( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda DORG2R: generates an m by n real matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. + + pure subroutine stdlib_dorg2r( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda DORGL2: generates an m by n real matrix Q with orthonormal rows, + !> which is defined as the first m rows of a product of k elementary + !> reflectors of order n + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGELQF. + + pure subroutine stdlib_dorgl2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldak .and. j<=m )a( j, j ) = one + end do + end if + do i = k, 1, -1 + ! apply h(i) to a(i:m,i:n) from the right + if( i DORGLQ: generates an M-by-N real matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGELQF. + + pure subroutine stdlib_dorglq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'DORGLQ', ' ', m, n, k, -1 ) + lwkopt = max( 1, m )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=m ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_dlarft( 'FORWARD', 'ROWWISE', n-i+1, ib, a( i, i ),lda, tau( i ), & + work, ldwork ) + ! apply h**t to a(i+ib:m,i:n) from the right + call stdlib_dlarfb( 'RIGHT', 'TRANSPOSE', 'FORWARD', 'ROWWISE',m-i-ib+1, n-i+& + 1, ib, a( i, i ), lda, work,ldwork, a( i+ib, i ), lda, work( ib+1 ),ldwork ) + + end if + ! apply h**t to columns i:n of current block + call stdlib_dorgl2( ib, n-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set columns 1:i-1 of current block to zero + do j = 1, i - 1 + do l = i, i + ib - 1 + a( l, j ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_dorglq + + !> DORGQL: generates an M-by-N real matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. + + pure subroutine stdlib_dorgql( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + if( n-k+i>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_dlarfb( 'LEFT', 'NO TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-& + 1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + ! apply h to rows 1:m-k+i+ib-1 of current block + call stdlib_dorg2l( m-k+i+ib-1, ib, ib, a( 1, n-k+i ), lda,tau( i ), work, iinfo & + ) + ! set rows m-k+i+ib:m of current block to zero + do j = n - k + i, n - k + i + ib - 1 + do l = m - k + i + ib, m + a( l, j ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_dorgql + + !> DORGQR: generates an M-by-N real matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. + + pure subroutine stdlib_dorgqr( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'DORGQR', ' ', m, n, k, -1 ) + lwkopt = max( 1, n )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=n ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i & + ), work, ldwork ) + ! apply h to a(i:m,i+ib:n) from the left + call stdlib_dlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-& + i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), & + ldwork ) + end if + ! apply h to rows i:m of current block + call stdlib_dorg2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set rows 1:i-1 of current block to zero + do j = i, i + ib - 1 + do l = 1, i - 1 + a( l, j ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_dorgqr + + !> DORGR2: generates an m by n real matrix Q with orthonormal rows, + !> which is defined as the last m rows of a product of k elementary + !> reflectors of order n + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGERQF. + + pure subroutine stdlib_dorgr2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldan-m .and. j<=n-k )a( m-n+j, j ) = one + end do + end if + do i = 1, k + ii = m - k + i + ! apply h(i) to a(1:m-k+i,1:n-k+i) from the right + a( ii, n-m+ii ) = one + call stdlib_dlarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1 ), lda, tau( i ),a, lda, work ) + + call stdlib_dscal( n-m+ii-1, -tau( i ), a( ii, 1 ), lda ) + a( ii, n-m+ii ) = one - tau( i ) + ! set a(m-k+i,n-k+i+1:n) to zero + do l = n - m + ii + 1, n + a( ii, l ) = zero + end do + end do + return + end subroutine stdlib_dorgr2 + + !> DORGRQ: generates an M-by-N real matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGERQF. + + pure subroutine stdlib_dorgrq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + ii = m - k + i + if( ii>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h**t to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_dlarfb( 'RIGHT', 'TRANSPOSE', 'BACKWARD', 'ROWWISE',ii-1, n-k+i+& + ib-1, ib, a( ii, 1 ), lda, work,ldwork, a, lda, work( ib+1 ), ldwork ) + end if + ! apply h**t to columns 1:n-k+i+ib-1 of current block + call stdlib_dorgr2( ib, n-k+i+ib-1, ib, a( ii, 1 ), lda, tau( i ),work, iinfo ) + + ! set columns n-k+i+ib:n of current block to zero + do l = n - k + i + ib, n + do j = ii, ii + ib - 1 + a( j, l ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_dorgrq + + !> DORGTSQR_ROW: generates an M-by-N real matrix Q_out with + !> orthonormal columns from the output of DLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by DLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of DLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine DLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which DLATSQR generates the output blocks. + + pure subroutine stdlib_dorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, & + num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1 + ! Local Arrays + real(dp) :: dummy(1,1) + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m=m, then the loop is never executed. + if ( mb=m, then we have only one row block of a of size m + ! and we work on the entire matrix a. + mb1 = min( mb, m ) + ! apply column blocks of h in the top row block from right to left. + ! kb is the column index of the current block reflector in + ! the matrices t and v. + do kb = kb_last, 1, -nblocal + ! determine the size of the current column block knb in + ! the matrices t and v. + knb = min( nblocal, n - kb + 1 ) + if( mb1-kb-knb+1==0 ) then + ! in stdlib_slarfb_gett parameters, when m=0, then the matrix b + ! does not exist, hence we need to pass a dummy array + ! reference dummy(1,1) to b with lddummy=1. + call stdlib_dlarfb_gett( 'N', 0, n-kb+1, knb,t( 1, kb ), ldt, a( kb, kb ), lda,& + dummy( 1, 1 ), 1, work, knb ) + else + call stdlib_dlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1, kb ), ldt, a( kb, & + kb ), lda,a( kb+knb, kb), lda, work, knb ) + end if + end do + work( 1 ) = real( lworkopt,KIND=dp) + return + end subroutine stdlib_dorgtsqr_row + + + pure subroutine stdlib_dorm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: m, n, n1, n2, ldq, ldc, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: q(ldq,*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, ldwork, len, lwkopt, nb, nq, nw + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q; + ! nw is the minimum dimension of work. + if( left ) then + nq = m + else + nq = n + end if + nw = nq + if( n1==0 .or. n2==0 ) nw = 1 + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( n1<0 .or. n1+n2/=nq ) then + info = -5 + else if( n2<0 ) then + info = -6 + else if( ldq DORM2L: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T * C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_dorm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda DORM2R: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda DORML2: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_dorml2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda DORMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_dormlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_dorml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_dlarft( 'FORWARD', 'ROWWISE', nq-i+1, ib, a( i, i ),lda, tau( i ), & + work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**t is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**t + call stdlib_dlarfb( side, transt, 'FORWARD', 'ROWWISE', mi, ni, ib,a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dormlq + + !> DORMQL: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_dormql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_dorm2l( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarft( 'BACKWARD', 'COLUMNWISE', nq-k+i+ib-1, ib,a( 1, i ), lda, & + tau( i ), work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**t is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**t + call stdlib_dlarfb( side, trans, 'BACKWARD', 'COLUMNWISE', mi, ni,ib, a( 1, i ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dormql + + !> DORMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_dlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),& + work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**t is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**t + call stdlib_dlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dormqr + + !> DORMR2: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_dormr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda DORMR3: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'C', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_dormr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, m, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*), tau(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, ja, jc, mi, ni, nq + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda DORMRQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_dormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_dormr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1 ), lda, tau( & + i ), work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**t is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**t + call stdlib_dlarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1 ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dormrq + + !> DORMRZ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_dormrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, ja, jc, ldwork, lwkopt, mi, nb, & + nbmin, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_dormr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, iinfo ) + + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + ja = m - l + 1 + else + mi = m + ic = 1 + ja = n - l + 1 + end if + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarzt( 'BACKWARD', 'ROWWISE', l, ib, a( i, ja ), lda,tau( i ), work(& + iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**t is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**t + call stdlib_dlarzb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, l, a( i, ja )& + , lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dormrz + + !> DPBEQU: computes row and column scalings intended to equilibrate a + !> symmetric positive definite band matrix A and reduce its condition + !> number (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_dpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j + real(dp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab DPBSTF: computes a split Cholesky factorization of a real + !> symmetric positive definite band matrix A. + !> This routine is designed to be used in conjunction with DSBGST. + !> The factorization has the form A = S**T*S where S is a band matrix + !> of the same bandwidth as A and the following structure: + !> S = ( U ) + !> ( M L ) + !> where U is upper triangular of order m = (n+kd)/2, and L is lower + !> triangular of order n-m. + + pure subroutine stdlib_dpbstf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, km, m + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_dscal( km, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_dsyr( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + end if + end do + else + ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m). + do j = n, m + 1, -1 + ! compute s(j,j) and test for non-positive-definiteness. + ajj = ab( 1, j ) + if( ajj<=zero )go to 50 + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( j-1, kd ) + ! compute elements j-km:j-1 of the j-th row and update the + ! trailing submatrix within the band. + call stdlib_dscal( km, one / ajj, ab( km+1, j-km ), kld ) + call stdlib_dsyr( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1, j-km ), kld ) + + end do + ! factorize the updated submatrix a(1:m,1:m) as u**t*u. + do j = 1, m + ! compute s(j,j) and test for non-positive-definiteness. + ajj = ab( 1, j ) + if( ajj<=zero )go to 50 + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( kd, m-j ) + ! compute elements j+1:j+km of the j-th column and update the + ! trailing submatrix within the band. + if( km>0 ) then + call stdlib_dscal( km, one / ajj, ab( 2, j ), 1 ) + call stdlib_dsyr( 'LOWER', km, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 50 continue + info = j + return + end subroutine stdlib_dpbstf + + !> DPBTF2: computes the Cholesky factorization of a real symmetric + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**T * U , if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix, U**T is the transpose of U, and + !> L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_dpbtf2( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, kn + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_dscal( kn, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_dsyr( 'UPPER', kn, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + end if + end do + else + ! compute the cholesky factorization a = l*l**t. + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = ab( 1, j ) + if( ajj<=zero )go to 30 + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + ! compute elements j+1:j+kn of column j and update the + ! trailing submatrix within the band. + kn = min( kd, n-j ) + if( kn>0 ) then + call stdlib_dscal( kn, one / ajj, ab( 2, j ), 1 ) + call stdlib_dsyr( 'LOWER', kn, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 30 continue + info = j + return + end subroutine stdlib_dpbtf2 + + !> DPBTRS: solves a system of linear equations A*X = B with a symmetric + !> positive definite band matrix A using the Cholesky factorization + !> A = U**T*U or A = L*L**T computed by DPBTRF. + + pure subroutine stdlib_dpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab DPOEQU: computes row and column scalings intended to equilibrate a + !> symmetric positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_dpoequ( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( lda DPOEQUB: computes row and column scalings intended to equilibrate a + !> symmetric positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + !> This routine differs from DPOEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled diagonal entries are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_dpoequb( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: smin, base, tmp + ! Intrinsic Functions + intrinsic :: max,min,sqrt,log,int + ! Executable Statements + ! test the input parameters. + ! positive definite only performs 1 pass of equilibration. + info = 0 + if( n<0 ) then + info = -1 + else if( lda DPOTRS: solves a system of linear equations A*X = B with a symmetric + !> positive definite matrix A using the Cholesky factorization + !> A = U**T*U or A = L*L**T computed by DPOTRF. + + pure subroutine stdlib_dpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DPPEQU: computes row and column scalings intended to equilibrate a + !> symmetric positive definite matrix A in packed storage and reduce + !> its condition number (with respect to the two-norm). S contains the + !> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix + !> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. + !> This choice of S puts the condition number of B within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_dppequ( uplo, n, ap, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, jj + real(dp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DPPEQU', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + scond = one + amax = zero + return + end if + ! initialize smin and amax. + s( 1 ) = ap( 1 ) + smin = s( 1 ) + amax = s( 1 ) + if( upper ) then + ! uplo = 'u': upper triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + i + s( i ) = ap( jj ) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + else + ! uplo = 'l': lower triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + n - i + 2 + s( i ) = ap( jj ) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + end if + if( smin<=zero ) then + ! find the first non-positive diagonal element and return. + do i = 1, n + if( s( i )<=zero ) then + info = i + return + end if + end do + else + ! set the scale factors to the reciprocals + ! of the diagonal elements. + do i = 1, n + s( i ) = one / sqrt( s( i ) ) + end do + ! compute scond = min(s(i)) / max(s(i)) + scond = sqrt( smin ) / sqrt( amax ) + end if + return + end subroutine stdlib_dppequ + + !> DPPTRF: computes the Cholesky factorization of a real symmetric + !> positive definite matrix A stored in packed format. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_dpptrf( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DPPTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( upper ) then + ! compute the cholesky factorization a = u**t*u. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + ! compute elements 1:j-1 of column j. + if( j>1 )call stdlib_dtpsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', j-1, ap,ap( jc ), & + 1 ) + ! compute u(j,j) and test for non-positive-definiteness. + ajj = ap( jj ) - stdlib_ddot( j-1, ap( jc ), 1, ap( jc ), 1 ) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ap( jj ) = sqrt( ajj ) + end do + else + ! compute the cholesky factorization a = l*l**t. + jj = 1 + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = ap( jj ) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ap( jj ) = ajj + ! compute elements j+1:n of column j and update the trailing + ! submatrix. + if( j DPPTRS: solves a system of linear equations A*X = B with a symmetric + !> positive definite matrix A in packed storage using the Cholesky + !> factorization A = U**T*U or A = L*L**T computed by DPPTRF. + + pure subroutine stdlib_dpptrs( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb DPTCON: computes the reciprocal of the condition number (in the + !> 1-norm) of a real symmetric positive definite tridiagonal matrix + !> using the factorization A = L*D*L**T or A = U**T*D*U computed by + !> DPTTRF. + !> Norm(inv(A)) is computed by a direct method, and the reciprocal of + !> the condition number is computed as + !> RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_dptcon( n, d, e, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(in) :: d(*), e(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ix + real(dp) :: ainvnm + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input arguments. + info = 0 + if( n<0 ) then + info = -1 + else if( anorm DPTTRF: computes the L*D*L**T factorization of a real symmetric + !> positive definite tridiagonal matrix A. The factorization may also + !> be regarded as having the form A = U**T*D*U. + + pure subroutine stdlib_dpttrf( n, d, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i4 + real(dp) :: ei + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DPTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! compute the l*d*l**t (or u**t*d*u) factorization of a. + i4 = mod( n-1, 4 ) + do i = 1, i4 + if( d( i )<=zero ) then + info = i + go to 30 + end if + ei = e( i ) + e( i ) = ei / d( i ) + d( i+1 ) = d( i+1 ) - e( i )*ei + end do + loop_20: do i = i4 + 1, n - 4, 4 + ! drop out of the loop if d(i) <= 0: the matrix is not positive + ! definite. + if( d( i )<=zero ) then + info = i + go to 30 + end if + ! solve for e(i) and d(i+1). + ei = e( i ) + e( i ) = ei / d( i ) + d( i+1 ) = d( i+1 ) - e( i )*ei + if( d( i+1 )<=zero ) then + info = i + 1 + go to 30 + end if + ! solve for e(i+1) and d(i+2). + ei = e( i+1 ) + e( i+1 ) = ei / d( i+1 ) + d( i+2 ) = d( i+2 ) - e( i+1 )*ei + if( d( i+2 )<=zero ) then + info = i + 2 + go to 30 + end if + ! solve for e(i+2) and d(i+3). + ei = e( i+2 ) + e( i+2 ) = ei / d( i+2 ) + d( i+3 ) = d( i+3 ) - e( i+2 )*ei + if( d( i+3 )<=zero ) then + info = i + 3 + go to 30 + end if + ! solve for e(i+3) and d(i+4). + ei = e( i+3 ) + e( i+3 ) = ei / d( i+3 ) + d( i+4 ) = d( i+4 ) - e( i+3 )*ei + end do loop_20 + ! check d(n) for positive definiteness. + if( d( n )<=zero )info = n + 30 continue + return + end subroutine stdlib_dpttrf + + !> DPTTS2: solves a tridiagonal system of the form + !> A * X = B + !> using the L*D*L**T factorization of A computed by DPTTRF. D is a + !> diagonal matrix specified in the vector D, L is a unit bidiagonal + !> matrix whose subdiagonal is specified in the vector E, and X and B + !> are N by NRHS matrices. + + pure subroutine stdlib_dptts2( n, nrhs, d, e, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(in) :: d(*), e(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + if( n==1 )call stdlib_dscal( nrhs, 1._dp / d( 1 ), b, ldb ) + return + end if + ! solve a * x = b using the factorization a = l*d*l**t, + ! overwriting each right hand side vector with its solution. + do j = 1, nrhs + ! solve l * x = b. + do i = 2, n + b( i, j ) = b( i, j ) - b( i-1, j )*e( i-1 ) + end do + ! solve d * l**t * x = b. + b( n, j ) = b( n, j ) / d( n ) + do i = n - 1, 1, -1 + b( i, j ) = b( i, j ) / d( i ) - b( i+1, j )*e( i ) + end do + end do + return + end subroutine stdlib_dptts2 + + !> DRSCL: multiplies an n-element real vector x by the real scalar 1/a. + !> This is done without overflow or underflow as long as + !> the final result x/a does not overflow or underflow. + + pure subroutine stdlib_drscl( n, sa, sx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(dp), intent(in) :: sa + ! Array Arguments + real(dp), intent(inout) :: sx(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + real(dp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 )return + ! get machine parameters + smlnum = stdlib_dlamch( 'S' ) + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! initialize the denominator to sa and the numerator to 1. + cden = sa + cnum = one + 10 continue + cden1 = cden*smlnum + cnum1 = cnum / bignum + if( abs( cden1 )>abs( cnum ) .and. cnum/=zero ) then + ! pre-multiply x by smlnum if cden is large compared to cnum. + mul = smlnum + done = .false. + cden = cden1 + else if( abs( cnum1 )>abs( cden ) ) then + ! pre-multiply x by bignum if cden is small compared to cnum. + mul = bignum + done = .false. + cnum = cnum1 + else + ! multiply x by cnum / cden and return. + mul = cnum / cden + done = .true. + end if + ! scale the vector x by mul + call stdlib_dscal( n, mul, sx, incx ) + if( .not.done )go to 10 + return + end subroutine stdlib_drscl + + !> DSBGST: reduces a real symmetric-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**T*S by DPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**T*A*X, where + !> X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the + !> bandwidth of A. + + pure subroutine stdlib_dsbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldx, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(in) :: bb(ldbb,*) + real(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: update, upper, wantx + integer(ilp) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, & + nrt, nx + real(dp) :: bii, ra, ra1, t + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + wantx = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + ka1 = ka + 1 + kb1 = kb + 1 + info = 0 + if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldabn-1 )go to 480 + end if + if( upper ) then + ! transform a, working with the upper triangle + if( update ) then + ! form inv(s(i))**t * a * inv(s(i)) + bii = bb( kb1, i ) + do j = i, i1 + ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii + end do + do j = max( 1, i-ka ), i + ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*ab( k-i+ka1, i ) -bb(& + k-i+kb1, i )*ab( j-i+ka1, i ) +ab( ka1, i )*bb( j-i+kb1, i )*bb( k-i+kb1, & + i ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( k-i+kb1, i )*ab( j-i+ka1, i ) + + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j ) + + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_dscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_dger( n-m, kbt, -one, x( m+1, i ), 1,bb( kb1-kbt, i ), & + 1, x( m+1, i-kbt ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+ka1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_130: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i,i-k+ka+1) + call stdlib_dlartg( ab( k+1, i-k+ka ), ra1,work( n+i-k+ka-m ), work( i-k+& + ka-m ),ra ) + ! create nonzero element a(i-k,i-k+ka+1) outside the + ! band and store it in work(i-k) + t = -bb( kb1-k, i )*ra1 + work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( 1, i-k+ka ) + + ab( 1, i-k+ka ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( 1, i-k+ka ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( 1, j+1 ) + ab( 1, j+1 ) = work( n+j-m )*ab( 1, j+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_dlargv( nrt, ab( 1, j2t ), inca, work( j2t-m ), ka1,work( & + n+j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(& + n+j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + work( n+j2-m ),work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca,work( n+j2-m ), work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_drot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j-m ), & + work( j-m ) ) + end do + end if + end do loop_130 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kb1-kbt, i )*ra1 + end if + end if + loop_170: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), & + inca, work( n+j2-ka ),work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + work( n+j ) = work( n+j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( 1, j+1 ) + ab( 1, j+1 ) = work( n+j )*ab( 1, j+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_dlargv( nr, ab( 1, j2 ), inca, work( j2 ), ka1,work( n+j2 ), ka1 ) + + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(& + n+j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + work( n+j2 ),work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, work( n+j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_drot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j ), work( & + j ) ) + end do + end if + end do loop_210 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca,work( n+j2-m ), work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, i - kb + 2*ka + 1, -1 + work( n+j-m ) = work( n+j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**t * a * inv(s(i)) + bii = bb( 1, i ) + do j = i, i1 + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do j = max( 1, i-ka ), i + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*ab( i-k+1, k ) -bb( i-k+1, & + k )*ab( i-j+1, j ) +ab( 1, i )*bb( i-j+1, j )*bb( i-k+1, k ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-k+1, k )*ab( i-j+1, j ) + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_dscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_dger( n-m, kbt, -one, x( m+1, i ), 1,bb( kbt+1, i-kbt )& + , ldbb-1,x( m+1, i-kbt ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_360: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i-k+ka+1,i) + call stdlib_dlartg( ab( ka1-k, i ), ra1, work( n+i-k+ka-m ),work( i-k+ka-m & + ), ra ) + ! create nonzero element a(i-k+ka+1,i-k) outside the + ! band and store it in work(i-k) + t = -bb( k+1, i-k )*ra1 + work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( ka1, i-k ) + ab( ka1, i-k ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( ka1, i-k ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = work( n+j-m )*ab( ka1, j-ka+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_dlargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, & + work( n+j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( & + n+j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, work( n+& + j2-m ), work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, work( n+j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_drot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j-m ), & + work( j-m ) ) + end do + end if + end do loop_360 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1 + end if + end if + loop_400: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-& + ka+1 ), inca,work( n+j2-ka ), work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + work( n+j ) = work( n+j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = work( n+j )*ab( ka1, j-ka+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_dlargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,work( n+j2 ), & + ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( & + n+j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, work( n+& + j2 ), work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, work( n+j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_drot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j ), work( & + j ) ) + end do + end if + end do loop_440 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, work( n+j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, i - kb + 2*ka + 1, -1 + work( n+j-m ) = work( n+j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + end if + go to 10 + 480 continue + ! **************************** phase 2 ***************************** + ! the logical structure of this phase is: + ! update = .true. + ! do i = 1, m + ! use s(i) to update a and create a new bulge + ! apply rotations to push all bulges ka positions upward + ! end do + ! update = .false. + ! do i = m - ka - 1, 2, -1 + ! apply rotations to push all bulges ka positions upward + ! end do + ! to avoid duplicating code, the two loops are merged. + update = .true. + i = 0 + 490 continue + if( update ) then + i = i + 1 + kbt = min( kb, m-i ) + i0 = i + 1 + i1 = max( 1, i-ka ) + i2 = i + kbt - ka1 + if( i>m ) then + update = .false. + i = i - 1 + i0 = m + 1 + if( ka==0 )return + go to 490 + end if + else + i = i - ka + if( i<2 )return + end if + if( i0 )call stdlib_dger( nx, kbt, -one, x( 1, i ), 1, bb( kb, i+1 ),ldbb-& + 1, x( 1, i+1 ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+ka1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_610: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_dlargv( nrt, ab( 1, j1+ka ), inca, work( j1 ), ka1,work( & + n+j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + work( n+j1 ),work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + work( n+j1 ),work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + work( n+j1t ),work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_drot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+j ), work( j ) ) + + end do + end if + end do loop_610 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1 + end if + end if + loop_650: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),& + inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + work( n+m-kb+j ) = work( n+m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j-1,j+ka) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( 1, j+ka-1 ) + ab( 1, j+ka-1 ) = work( n+m-kb+j )*ab( 1, j+ka-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_650 + loop_690: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_dlargv( nr, ab( 1, j1+ka ), inca, work( m-kb+j1 ),ka1, work( n+m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca,& + work( n+m-kb+j1 ), work( m-kb+j1 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + work( n+m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_drot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_690 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + work( n+j1t ),work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, min( i+kb, m ) - 2*ka - 1 + work( n+j ) = work( n+j+ka ) + work( j ) = work( j+ka ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**t * a * inv(s(i)) + bii = bb( 1, i ) + do j = i1, i + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do j = i, min( n, i+ka ) + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do k = i + 1, i + kbt + do j = k, i + kbt + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*ab( k-i+1, i ) -bb( k-i+1, & + i )*ab( j-i+1, i ) +ab( 1, i )*bb( j-i+1, i )*bb( k-i+1, i ) + end do + do j = i + kbt + 1, min( n, i+ka ) + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( k-i+1, i )*ab( j-i+1, i ) + end do + end do + do j = i1, i + do k = i + 1, min( j+ka, i+kbt ) + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_dscal( nx, one / bii, x( 1, i ), 1 ) + if( kbt>0 )call stdlib_dger( nx, kbt, -one, x( 1, i ), 1, bb( 2, i ), 1,x( 1, & + i+1 ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_840: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_dlargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,work( n+& + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+& + j1 ), work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, work( & + n+j1 ),work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_drot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+j ), work( j ) ) + + end do + end if + end do loop_840 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1 + end if + end if + loop_880: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, & + j1t+l-1 ), inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + work( n+m-kb+j ) = work( n+m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j+ka,j-1) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 ) + ab( ka1, j-1 ) = work( n+m-kb+j )*ab( ka1, j-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_880 + loop_920: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_dlargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, work( n+m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_dlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+& + m-kb+j1 ), work( m-kb+j1 ),ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_dlar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, work( & + n+m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_drot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_920 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_dlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, min( i+kb, m ) - 2*ka - 1 + work( n+j ) = work( n+j+ka ) + work( j ) = work( j+ka ) + end do + end if + end if + go to 490 + end subroutine stdlib_dsbgst + + !> DSBTRD: reduces a real symmetric band matrix A to symmetric + !> tridiagonal form T by an orthogonal similarity transformation: + !> Q**T * A * Q = T. + + pure subroutine stdlib_dsbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldq, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*), q(ldq,*) + real(dp), intent(out) :: d(*), e(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: initq, upper, wantq + integer(ilp) :: i, i2, ibl, inca, incx, iqaend, iqb, iqend, j, j1, j1end, j1inc, j2, & + jend, jin, jinc, k, kd1, kdm1, kdn, l, last, lend, nq, nr, nrt + real(dp) :: temp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + initq = stdlib_lsame( vect, 'V' ) + wantq = initq .or. stdlib_lsame( vect, 'U' ) + upper = stdlib_lsame( uplo, 'U' ) + kd1 = kd + 1 + kdm1 = kd - 1 + incx = ldab - 1 + iqend = 1 + info = 0 + if( .not.wantq .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldab1 ) then + ! reduce to tridiagonal form, working with upper triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + loop_90: do i = 1, n - 2 + ! reduce i-th row of matrix to tridiagonal form + loop_80: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_dlargv( nr, ab( 1, j1-1 ), inca, work( j1 ),kd1, d( j1 ), & + kd1 ) + ! apply rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_dlartv or stdlib_drot is used + if( nr>=2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_dlartv( nr, ab( l+1, j1-1 ), inca,ab( l, j1 ), inca, & + d( j1 ),work( j1 ), kd1 ) + end do + else + jend = j1 + ( nr-1 )*kd1 + do jinc = j1, jend, kd1 + call stdlib_drot( kdm1, ab( 2, jinc-1 ), 1,ab( 1, jinc ), 1, d( & + jinc ),work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+k-1) + ! within the band + call stdlib_dlartg( ab( kd-k+3, i+k-2 ),ab( kd-k+2, i+k-1 ), d( i+k-& + 1 ),work( i+k-1 ), temp ) + ab( kd-k+3, i+k-2 ) = temp + ! apply rotation from the right + call stdlib_drot( k-3, ab( kd-k+4, i+k-2 ), 1,ab( kd-k+3, i+k-1 ), 1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_dlar2v( nr, ab( kd1, j1-1 ), ab( kd1, j1 ),ab( kd, & + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the left + if( nr>0 ) then + if( 2*kd-1n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_dlartv( nrt, ab( kd-l, j1+l ), inca,ab( kd-& + l+1, j1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do jin = j1, j1end, kd1 + call stdlib_drot( kd-1, ab( kd-1, jin+1 ), incx,ab( kd, jin+1 )& + , incx,d( jin ), work( jin ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_drot( lend, ab( kd-1, last+1 ), incx,ab( kd, & + last+1 ), incx, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_drot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_drot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j-1,j+kd) outside the band + ! and store it in work + work( j+kd ) = work( j )*ab( 1, j+kd ) + ab( 1, j+kd ) = d( j )*ab( 1, j+kd ) + end do + end do loop_80 + end do loop_90 + end if + if( kd>0 ) then + ! copy off-diagonal elements to e + do i = 1, n - 1 + e( i ) = ab( kd, i+1 ) + end do + else + ! set e to zero if original matrix was diagonal + do i = 1, n - 1 + e( i ) = zero + end do + end if + ! copy diagonal elements to d + do i = 1, n + d( i ) = ab( kd1, i ) + end do + else + if( kd>1 ) then + ! reduce to tridiagonal form, working with lower triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + loop_210: do i = 1, n - 2 + ! reduce i-th column of matrix to tridiagonal form + loop_200: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_dlargv( nr, ab( kd1, j1-kd1 ), inca,work( j1 ), kd1, d( j1 )& + , kd1 ) + ! apply plane rotations from one side + ! dependent on the the number of diagonals either + ! stdlib_dlartv or stdlib_drot is used + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_dlartv( nr, ab( kd1-l, j1-kd1+l ), inca,ab( kd1-l+1, & + j1-kd1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + jend = j1 + kd1*( nr-1 ) + do jinc = j1, jend, kd1 + call stdlib_drot( kdm1, ab( kd, jinc-kd ), incx,ab( kd1, jinc-kd )& + , incx,d( jinc ), work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i+k-1,i) + ! within the band + call stdlib_dlartg( ab( k-1, i ), ab( k, i ),d( i+k-1 ), work( i+k-1 & + ), temp ) + ab( k-1, i ) = temp + ! apply rotation from the left + call stdlib_drot( k-3, ab( k-2, i+1 ), ldab-1,ab( k-1, i+1 ), ldab-1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_dlar2v( nr, ab( 1, j1-1 ), ab( 1, j1 ),ab( 2, j1-1 ),& + inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_dlartv or stdlib_drot is used + if( nr>0 ) then + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + if( j2+l>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_dlartv( nrt, ab( l+2, j1-1 ), inca,ab( l+1,& + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do j1inc = j1, j1end, kd1 + call stdlib_drot( kdm1, ab( 3, j1inc-1 ), 1,ab( 2, j1inc ), 1, & + d( j1inc ),work( j1inc ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_drot( lend, ab( 3, last-1 ), 1,ab( 2, last ),& + 1, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_drot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_drot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j+kd,j-1) outside the + ! band and store it in work + work( j+kd ) = work( j )*ab( kd1, j ) + ab( kd1, j ) = d( j )*ab( kd1, j ) + end do + end do loop_200 + end do loop_210 + end if + if( kd>0 ) then + ! copy off-diagonal elements to e + do i = 1, n - 1 + e( i ) = ab( 2, i ) + end do + else + ! set e to zero if original matrix was diagonal + do i = 1, n - 1 + e( i ) = zero + end do + end if + ! copy diagonal elements to d + do i = 1, n + d( i ) = ab( 1, i ) + end do + end if + return + end subroutine stdlib_dsbtrd + + !> Level 3 BLAS like routine for C in RFP Format. + !> DSFRK: performs one of the symmetric rank--k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n symmetric + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + + pure subroutine stdlib_dsfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, n + character, intent(in) :: trans, transr, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: c(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, normaltransr, nisodd, notrans + integer(ilp) :: info, nrowa, j, nk, n1, n2 + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( notrans ) then + nrowa = n + else + nrowa = k + end if + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( lda DSPGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by DPPTRF. + + pure subroutine stdlib_dspgst( itype, uplo, n, ap, bp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, n + ! Array Arguments + real(dp), intent(inout) :: ap(*) + real(dp), intent(in) :: bp(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk + real(dp) :: ajj, akk, bjj, bkk, ct + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DSPGST', -info ) + return + end if + if( itype==1 ) then + if( upper ) then + ! compute inv(u**t)*a*inv(u) + ! j1 and jj are the indices of a(1,j) and a(j,j) + jj = 0 + do j = 1, n + j1 = jj + 1 + jj = jj + j + ! compute the j-th column of the upper triangle of a + bjj = bp( jj ) + call stdlib_dtpsv( uplo, 'TRANSPOSE', 'NONUNIT', j, bp,ap( j1 ), 1 ) + call stdlib_dspmv( uplo, j-1, -one, ap, bp( j1 ), 1, one,ap( j1 ), 1 ) + call stdlib_dscal( j-1, one / bjj, ap( j1 ), 1 ) + ap( jj ) = ( ap( jj )-stdlib_ddot( j-1, ap( j1 ), 1, bp( j1 ),1 ) ) / & + bjj + end do + else + ! compute inv(l)*a*inv(l**t) + ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1) + kk = 1 + do k = 1, n + k1k1 = kk + n - k + 1 + ! update the lower triangle of a(k:n,k:n) + akk = ap( kk ) + bkk = bp( kk ) + akk = akk / bkk**2 + ap( kk ) = akk + if( k DSPTRF: computes the factorization of a real symmetric matrix A stored + !> in packed format using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_dsptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, r1, rowmax, t, wk, wkm1, & + wkp1 + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DSPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**t using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( ap( kc+k-1 ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_idamax( k-1, ap( kc ), 1 ) + colmax = abs( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( abs( ap( kx ) )>rowmax ) then + rowmax = abs( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_idamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, abs( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( ap( kpc+imax-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_dswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = ap( knc+j-1 ) + ap( knc+j-1 ) = ap( kx ) + ap( kx ) = t + end do + t = ap( knc+kk-1 ) + ap( knc+kk-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = t + if( kstep==2 ) then + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = one / ap( kc+k-1 ) + call stdlib_dspr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_dscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = ap( k-1+( k-1 )*k / 2 ) + d22 = ap( k-1+( k-2 )*( k-1 ) / 2 ) / d12 + d11 = ap( k+( k-1 )*k / 2 ) / d12 + t = one / ( d11*d22-one ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-ap( j+( k-1 )*k / 2 ) ) + + wk = d12*( d22*ap( j+( k-1 )*k / 2 )-ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *wk -ap( i+( k-2 )*( k-1 ) / 2 )*wkm1 + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( ap( kc ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( abs( ap( kx ) )>rowmax ) then + rowmax = abs( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( ap( kpc ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp DSPTRI: computes the inverse of a real symmetric indefinite matrix + !> A in packed storage using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSPTRF. + + pure subroutine stdlib_dsptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + real(dp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DSPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==zero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==zero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = one / ap( kc+k-1 ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_dcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_dspmv( uplo, k-1, -one, ap, work, 1, zero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_ddot( k-1, work, 1, ap( kc ), 1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( ap( kcnext+k-1 ) ) + ak = ap( kc+k-1 ) / t + akp1 = ap( kcnext+k ) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-one ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_dcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_dspmv( uplo, k-1, -one, ap, work, 1, zero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_ddot( k-1, work, 1, ap( kc ), 1 ) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_ddot( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_dcopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_dspmv( uplo, k-1, -one, ap, work, 1, zero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -stdlib_ddot( k-1, work, 1, ap( kcnext ), 1 ) + + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_dswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = ap( kc+j-1 ) + ap( kc+j-1 ) = ap( kx ) + ap( kx ) = temp + end do + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = one / ap( kc ) + ! compute column k of the inverse. + if( k DSPTRS: solves a system of linear equations A*X = B with a real + !> symmetric matrix A stored in packed format using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by DSPTRF. + + pure subroutine stdlib_dsptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + real(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_dger( k-1, nrhs, -one, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_dscal( nrhs, one / ap( kc+k-1 ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_dswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_dger( k-2, nrhs, -one, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_dger( k-2, nrhs, -one, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, 1 & + ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, ap( kc ),1, one, b( k, & + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, ap( kc ),1, one, b( k, & + 1 ), ldb ) + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb,ap( kc+k ), 1, one, b( k+& + 1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k DSTEBZ: computes the eigenvalues of a symmetric tridiagonal + !> matrix T. The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_dstebz( range, order, n, vl, vu, il, iu, abstol, d, e,m, nsplit, w, & + iblock, isplit, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: order, range + integer(ilp), intent(in) :: il, iu, n + integer(ilp), intent(out) :: info, m, nsplit + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), isplit(*), iwork(*) + real(dp), intent(in) :: d(*), e(*) + real(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: fudge = 2.1_dp + real(dp), parameter :: relfac = 2.0_dp + + + ! Local Scalars + logical(lk) :: ncnvrg, toofew + integer(ilp) :: ib, ibegin, idiscl, idiscu, ie, iend, iinfo, im, in, ioff, iorder, & + iout, irange, itmax, itmp1, iw, iwoff, j, jb, jdisc, je, nb, nwl, nwu + real(dp) :: atoli, bnorm, gl, gu, pivmin, rtoli, safemn, tmp1, tmp2, tnorm, ulp, wkill,& + wl, wlu, wu, wul + ! Local Arrays + integer(ilp) :: idumma(1) + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + info = 0 + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = 1 + else if( stdlib_lsame( range, 'V' ) ) then + irange = 2 + else if( stdlib_lsame( range, 'I' ) ) then + irange = 3 + else + irange = 0 + end if + ! decode order + if( stdlib_lsame( order, 'B' ) ) then + iorder = 2 + else if( stdlib_lsame( order, 'E' ) ) then + iorder = 1 + else + iorder = 0 + end if + ! check for errors + if( irange<=0 ) then + info = -1 + else if( iorder<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( irange==2 ) then + if( vl>=vu )info = -5 + else if( irange==3 .and. ( il<1 .or. il>max( 1, n ) ) )then + info = -6 + else if( irange==3 .and. ( iun ) )then + info = -7 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DSTEBZ', -info ) + return + end if + ! initialize error flags + info = 0 + ncnvrg = .false. + toofew = .false. + ! quick return if possible + m = 0 + if( n==0 )return + ! simplifications: + if( irange==3 .and. il==1 .and. iu==n )irange = 1 + ! get machine constants + ! nb is the minimum vector length for vector bisection, or 0 + ! if only scalar is to be done. + safemn = stdlib_dlamch( 'S' ) + ulp = stdlib_dlamch( 'P' ) + rtoli = ulp*relfac + nb = stdlib_ilaenv( 1, 'DSTEBZ', ' ', n, -1, -1, -1 ) + if( nb<=1 )nb = 0 + ! special case when n=1 + if( n==1 ) then + nsplit = 1 + isplit( 1 ) = 1 + if( irange==2 .and. ( vl>=d( 1 ) .or. vutmp1 ) then + isplit( nsplit ) = j - 1 + nsplit = nsplit + 1 + work( j-1 ) = zero + else + work( j-1 ) = tmp1 + pivmin = max( pivmin, tmp1 ) + end if + end do + isplit( nsplit ) = n + pivmin = pivmin*safemn + ! compute interval and atoli + if( irange==3 ) then + ! range='i': compute the interval containing eigenvalues + ! il through iu. + ! compute gershgorin interval for entire (split) matrix + ! and use it as the initial interval + gu = d( 1 ) + gl = d( 1 ) + tmp1 = zero + do j = 1, n - 1 + tmp2 = sqrt( work( j ) ) + gu = max( gu, d( j )+tmp1+tmp2 ) + gl = min( gl, d( j )-tmp1-tmp2 ) + tmp1 = tmp2 + end do + gu = max( gu, d( n )+tmp1 ) + gl = min( gl, d( n )-tmp1 ) + tnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*tnorm*ulp*n - fudge*two*pivmin + gu = gu + fudge*tnorm*ulp*n + fudge*pivmin + ! compute iteration parameters + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + if( abstol<=zero ) then + atoli = ulp*tnorm + else + atoli = abstol + end if + work( n+1 ) = gl + work( n+2 ) = gl + work( n+3 ) = gu + work( n+4 ) = gu + work( n+5 ) = gl + work( n+6 ) = gu + iwork( 1 ) = -1 + iwork( 2 ) = -1 + iwork( 3 ) = n + 1 + iwork( 4 ) = n + 1 + iwork( 5 ) = il - 1 + iwork( 6 ) = iu + call stdlib_dlaebz( 3, itmax, n, 2, 2, nb, atoli, rtoli, pivmin, d, e,work, iwork( & + 5 ), work( n+1 ), work( n+5 ), iout,iwork, w, iblock, iinfo ) + if( iwork( 6 )==iu ) then + wl = work( n+1 ) + wlu = work( n+3 ) + nwl = iwork( 1 ) + wu = work( n+4 ) + wul = work( n+2 ) + nwu = iwork( 4 ) + else + wl = work( n+2 ) + wlu = work( n+4 ) + nwl = iwork( 2 ) + wu = work( n+3 ) + wul = work( n+1 ) + nwu = iwork( 3 ) + end if + if( nwl<0 .or. nwl>=n .or. nwu<1 .or. nwu>n ) then + info = 4 + return + end if + else + ! range='a' or 'v' -- set atoli + tnorm = max( abs( d( 1 ) )+abs( e( 1 ) ),abs( d( n ) )+abs( e( n-1 ) ) ) + do j = 2, n - 1 + tnorm = max( tnorm, abs( d( j ) )+abs( e( j-1 ) )+abs( e( j ) ) ) + end do + if( abstol<=zero ) then + atoli = ulp*tnorm + else + atoli = abstol + end if + if( irange==2 ) then + wl = vl + wu = vu + else + wl = zero + wu = zero + end if + end if + ! find eigenvalues -- loop over blocks and recompute nwl and nwu. + ! nwl accumulates the number of eigenvalues .le. wl, + ! nwu accumulates the number of eigenvalues .le. wu + m = 0 + iend = 0 + info = 0 + nwl = 0 + nwu = 0 + loop_70: do jb = 1, nsplit + ioff = iend + ibegin = ioff + 1 + iend = isplit( jb ) + in = iend - ioff + if( in==1 ) then + ! special case -- in=1 + if( irange==1 .or. wl>=d( ibegin )-pivmin )nwl = nwl + 1 + if( irange==1 .or. wu>=d( ibegin )-pivmin )nwu = nwu + 1 + if( irange==1 .or. ( wl=d( ibegin )-pivmin ) ) & + then + m = m + 1 + w( m ) = d( ibegin ) + iblock( m ) = jb + end if + else + ! general case -- in > 1 + ! compute gershgorin interval + ! and use it as the initial interval + gu = d( ibegin ) + gl = d( ibegin ) + tmp1 = zero + do j = ibegin, iend - 1 + tmp2 = abs( e( j ) ) + gu = max( gu, d( j )+tmp1+tmp2 ) + gl = min( gl, d( j )-tmp1-tmp2 ) + tmp1 = tmp2 + end do + gu = max( gu, d( iend )+tmp1 ) + gl = min( gl, d( iend )-tmp1 ) + bnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*bnorm*ulp*in - fudge*pivmin + gu = gu + fudge*bnorm*ulp*in + fudge*pivmin + ! compute atoli for the current submatrix + if( abstol<=zero ) then + atoli = ulp*max( abs( gl ), abs( gu ) ) + else + atoli = abstol + end if + if( irange>1 ) then + if( gu=gu )cycle loop_70 + end if + ! set up initial interval + work( n+1 ) = gl + work( n+in+1 ) = gu + call stdlib_dlaebz( 1, 0, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e( & + ibegin ), work( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), im,iwork, w( m+1 & + ), iblock( m+1 ), iinfo ) + nwl = nwl + iwork( 1 ) + nwu = nwu + iwork( in+1 ) + iwoff = m - iwork( 1 ) + ! compute eigenvalues + itmax = int( ( log( gu-gl+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + & + 2 + call stdlib_dlaebz( 2, itmax, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e(& + ibegin ), work( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), iout,iwork, w( & + m+1 ), iblock( m+1 ), iinfo ) + ! copy eigenvalues into w and iblock + ! use -jb for block number for unconverged eigenvalues. + do j = 1, iout + tmp1 = half*( work( j+n )+work( j+in+n ) ) + ! flag non-convergence. + if( j>iout-iinfo ) then + ncnvrg = .true. + ib = -jb + else + ib = jb + end if + do je = iwork( j ) + 1 + iwoff,iwork( j+in ) + iwoff + w( je ) = tmp1 + iblock( je ) = ib + end do + end do + m = m + im + end if + end do loop_70 + ! if range='i', then (wl,wu) contains eigenvalues nwl+1,...,nwu + ! if nwl+1 < il or nwu > iu, discard extra eigenvalues. + if( irange==3 ) then + im = 0 + idiscl = il - 1 - nwl + idiscu = nwu - iu + if( idiscl>0 .or. idiscu>0 ) then + do je = 1, m + if( w( je )<=wlu .and. idiscl>0 ) then + idiscl = idiscl - 1 + else if( w( je )>=wul .and. idiscu>0 ) then + idiscu = idiscu - 1 + else + im = im + 1 + w( im ) = w( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl>0 .or. idiscu>0 ) then + ! code to deal with effects of bad arithmetic: + ! some low eigenvalues to be discarded are not in (wl,wlu], + ! or high eigenvalues to be discarded are not in (wul,wu] + ! so just kill off the smallest idiscl/largest idiscu + ! eigenvalues, by simply finding the smallest/largest + ! eigenvalue(s). + ! (if n(w) is monotone non-decreasing, this should never + ! happen.) + if( idiscl>0 ) then + wkill = wu + do jdisc = 1, idiscl + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )0 ) then + wkill = wl + do jdisc = 1, idiscu + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )>wkill .or. iw==0 ) ) then + iw = je + wkill = w( je ) + end if + end do + iblock( iw ) = 0 + end do + end if + im = 0 + do je = 1, m + if( iblock( je )/=0 ) then + im = im + 1 + w( im ) = w( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl<0 .or. idiscu<0 ) then + toofew = .true. + end if + end if + ! if order='b', do nothing -- the eigenvalues are already sorted + ! by block. + ! if order='e', sort the eigenvalues from smallest to largest + if( iorder==1 .and. nsplit>1 ) then + do je = 1, m - 1 + ie = 0 + tmp1 = w( je ) + do j = je + 1, m + if( w( j ) DSYCONV: convert A given by TRF into L and D and vice-versa. + !> Get Non-diag elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + + pure subroutine stdlib_dsyconv( uplo, way, n, a, lda, ipiv, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, j + real(dp) :: temp + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda 1 ) + if( ipiv(i) < 0 ) then + e(i)=a(i-1,i) + e(i-1)=zero + a(i-1,i)=zero + i=i-1 + else + e(i)=zero + endif + i=i-1 + end do + ! convert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + i=i-1 + endif + i=i-1 + end do + else + ! revert a (a is upper) + ! revert permutations + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i+1 + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + endif + i=i+1 + end do + ! revert value + i=n + do while ( i > 1 ) + if( ipiv(i) < 0 ) then + a(i-1,i)=e(i) + i=i-1 + endif + i=i-1 + end do + end if + else + ! a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + i=1 + e(n)=zero + do while ( i <= n ) + if( i 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i+1,j) + a(i+1,j)=temp + end do + endif + i=i+1 + endif + i=i+1 + end do + else + ! revert a (a is lower) + ! revert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(i,j) + a(i,j)=a(ip,j) + a(ip,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i-1 + if (i > 1) then + do j= 1,i-1 + temp=a(i+1,j) + a(i+1,j)=a(ip,j) + a(ip,j)=temp + end do + endif + endif + i=i-1 + end do + ! revert value + i=1 + do while ( i <= n-1 ) + if( ipiv(i) < 0 ) then + a(i+1,i)=e(i) + i=i+1 + endif + i=i+1 + end do + end if + end if + return + end subroutine stdlib_dsyconv + + !> If parameter WAY = 'C': + !> DSYCONVF: converts the factorization output format used in + !> DSYTRF provided on entry in parameter A into the factorization + !> output format used in DSYTRF_RK (or DSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in DSYTRF into + !> the format used in DSYTRF_RK (or DSYTRF_BK). + !> If parameter WAY = 'R': + !> DSYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in DSYTRF_RK + !> (or DSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in DSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in DSYTRF_RK + !> (or DSYTRF_BK) into the format used in DSYTRF. + + pure subroutine stdlib_dsyconvf( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = zero + a( i-1, i ) = zero + i = i - 1 + else + e( i ) = zero + end if + i = i - 1 + end do + ! convert permutations and ipiv + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and zero out + ! corresponding entries in input storage a + i = 1 + e( n ) = zero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_dswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_dswap( i-1, a( i+1, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is no interchnge of rows i and and ipiv(i), + ! so this should be reflected in ipiv format for + ! *sytrf_rk ( or *sytrf_bk) + ipiv( i ) = i + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations and ipiv + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_dswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_dswap( i-1, a( ip, 1 ), lda,a( i+1, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is one interchange of rows i+1 and ipiv(i+1), + ! so this should be recorded in consecutive entries + ! in ipiv format for *sytrf + ipiv( i ) = ipiv( i+1 ) + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_dsyconvf + + !> If parameter WAY = 'C': + !> DSYCONVF_ROOK: converts the factorization output format used in + !> DSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in DSYTRF_RK (or DSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for DSYTRF_ROOK and + !> DSYTRF_RK (or DSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> DSYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in DSYTRF_RK + !> (or DSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in DSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for DSYTRF_ROOK and + !> DSYTRF_RK (or DSYTRF_BK) is the same and is not converted. + + pure subroutine stdlib_dsyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, ip2 + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = zero + a( i-1, i ) = zero + i = i - 1 + else + e( i ) = zero + end if + i = i - 1 + end do + ! convert permutations + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and zero out + ! corresponding entries in input storage a + i = 1 + e( n ) = zero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_dswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i and ipiv(i) and i+1 and ipiv(i+1) + ! in a(i:n,1:i-1) + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_dswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + if( ip2/=(i+1) ) then + call stdlib_dswap( i-1, a( i+1, 1 ), lda,a( ip2, 1 ), lda ) + end if + end if + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_dswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i+1) and i and ipiv(i) + ! in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip2/=(i+1) ) then + call stdlib_dswap( i-1, a( ip2, 1 ), lda,a( i+1, 1 ), lda ) + end if + if( ip/=i ) then + call stdlib_dswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_dsyconvf_rook + + !> DSYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_dsyequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: s(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(dp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'DSYEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), abs( a( i, j ) ) ) + s( j ) = max( s( j ), abs( a( i, j ) ) ) + amax = max( amax, abs( a( i, j ) ) ) + end do + s( j ) = max( s( j ), abs( a( j, j ) ) ) + amax = max( amax, abs( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), abs( a( j, j ) ) ) + amax = max( amax, abs( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), abs( a( i, j ) ) ) + s( j ) = max( s( j ), abs( a( i, j ) ) ) + amax = max( amax, abs( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_dp / s( j ) + end do + tol = one / sqrt( 2.0_dp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + abs( a( i, j ) ) * s( j ) + work( j ) = work( j ) + abs( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + abs( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + abs( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + abs( a( i, j ) ) * s( j ) + work( j ) = work( j ) + abs( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + s( i )*work( i ) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_dlassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = abs( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( work( i ) - t*si ) + c0 = -(t*si)*si + 2*work( i )*si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = abs( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = abs( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = abs( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = abs( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + work( i ) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_dlamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_dlamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_dsyequb + + !> DSYGS2: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. + !> B must have been previously factorized as U**T *U or L*L**T by DPOTRF. + + pure subroutine stdlib_dsygs2( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k + real(dp) :: akk, bkk, ct + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DSYGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by DPOTRF. + + pure subroutine stdlib_dsygst( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_dsygs2( itype, uplo, n, a, lda, b, ldb, info ) + else + ! use blocked code + if( itype==1 ) then + if( upper ) then + ! compute inv(u**t)*a*inv(u) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(k:n,k:n) + call stdlib_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_dtrsm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT',kb, n-k-kb+1, & + one, b( k, k ), ldb,a( k, k+kb ), lda ) + call stdlib_dsymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( & + k, k+kb ), ldb, one,a( k, k+kb ), lda ) + call stdlib_dsyr2k( uplo, 'TRANSPOSE', n-k-kb+1, kb, -one,a( k, k+kb ), & + lda, b( k, k+kb ), ldb,one, a( k+kb, k+kb ), lda ) + call stdlib_dsymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( & + k, k+kb ), ldb, one,a( k, k+kb ), lda ) + call stdlib_dtrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+& + 1, one,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda ) + end if + end do + else + ! compute inv(l)*a*inv(l**t) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(k:n,k:n) + call stdlib_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_dtrsm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',n-k-kb+1, kb, & + one, b( k, k ), ldb,a( k+kb, k ), lda ) + call stdlib_dsymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(& + k+kb, k ), ldb, one,a( k+kb, k ), lda ) + call stdlib_dsyr2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-one, a( k+kb, k & + ), lda, b( k+kb, k ),ldb, one, a( k+kb, k+kb ), lda ) + call stdlib_dsymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(& + k+kb, k ), ldb, one,a( k+kb, k ), lda ) + call stdlib_dtrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, & + kb, one,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda ) + end if + end do + end if + else + if( upper ) then + ! compute u*a*u**t + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_dtrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, one, & + b, ldb, a( 1, k ), lda ) + call stdlib_dsymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1, k ), & + ldb, one, a( 1, k ), lda ) + call stdlib_dsyr2k( uplo, 'NO TRANSPOSE', k-1, kb, one,a( 1, k ), lda, b( & + 1, k ), ldb, one, a,lda ) + call stdlib_dsymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1, k ), & + ldb, one, a( 1, k ), lda ) + call stdlib_dtrmm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',k-1, kb, one, b( & + k, k ), ldb, a( 1, k ),lda ) + call stdlib_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + else + ! compute l**t*a*l + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_dtrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, one, & + b, ldb, a( k, 1 ), lda ) + call stdlib_dsymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1 ), & + ldb, one, a( k, 1 ), lda ) + call stdlib_dsyr2k( uplo, 'TRANSPOSE', k-1, kb, one,a( k, 1 ), lda, b( k, & + 1 ), ldb, one, a,lda ) + call stdlib_dsymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1 ), & + ldb, one, a( k, 1 ), lda ) + call stdlib_dtrmm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT', kb,k-1, one, b( & + k, k ), ldb, a( k, 1 ), lda ) + call stdlib_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + end if + end if + end if + return + end subroutine stdlib_dsygst + + !> DSYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + + pure subroutine stdlib_dsyswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(dp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_dswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=a(i1+i,i2) + a(i1+i,i2)=tmp + end do + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from i1 to i1-1 + call stdlib_dswap( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=a(i2,i1+i) + a(i2,i1+i)=tmp + end do + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_dsyswapr + + !> DSYTF2_RK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_dsytf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, dtemp, t, wk, wkm1, & + wkp1, sfmin + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_idamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + ! set e( k ) to zero + if( k>1 )e( k ) = zero + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_idamax( imax-1, a( 1, imax ), 1 ) + dtemp = abs( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )1 )call stdlib_dswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_dswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_dswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_dswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / a( k, k ) + call stdlib_dsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_dscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_dsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = zero + a( k-1, k ) = zero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**t using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = zero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )(k+1) )call stdlib_dswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_dswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_dswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_dswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / a( k, k ) + call stdlib_dsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_dscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_dsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k DSYTF2_ROOK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_dsytf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, dtemp, t, wk, wkm1, & + wkp1, sfmin + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_idamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_idamax( imax-1, a( 1, imax ), 1 ) + dtemp = abs( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )1 )call stdlib_dswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_dswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + if( kp>1 )call stdlib_dswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_dswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / a( k, k ) + call stdlib_dsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_dscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_dsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )(k+1) )call stdlib_dswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_dswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / a( k, k ) + call stdlib_dsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_dscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_dsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k DSYTRF_RK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_dsytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_dlasyf_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_dsytf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_dlasyf_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_dsytf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_dswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dsytrf_rk + + !> DSYTRF_ROOK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_dsytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_dlasyf_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_dsytf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_dlasyf_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_dlasyf_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_dsytf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_dsytrf_rook + + !> DSYTRI: computes the inverse of a real symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> DSYTRF. + + pure subroutine stdlib_dsytri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + real(dp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==zero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_dcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_dsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_ddot( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_dcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_dsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_ddot( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_ddot( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_dcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_dsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_ddot( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_dswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_dswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k DSYTRI_ROOK: computes the inverse of a real symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by DSYTRF_ROOK. + + pure subroutine stdlib_dsytri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + real(dp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==zero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_dcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_dsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_ddot( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_dcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_dsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_ddot( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_ddot( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_dcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_dsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_ddot( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k+1,1:k+1) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_dswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_dswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_dswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_dswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_dswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_dswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k DSYTRS: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSYTRF. + + pure subroutine stdlib_dsytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_dger( k-1, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_dscal( nrhs, one / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_dswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_dger( k-2, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_dger( k-2, nrhs, -one, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, a( 1, k ),1, one, b( k, & + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, a( 1, k ),1, one, b( k, & + 1 ), ldb ) + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb,a( 1, k+1 ), 1, one, b( & + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k DSYTRS2: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSYTRF and converted by DSYCONV. + + pure subroutine stdlib_dsytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + real(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_dswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_dtrsm('L','U','N','U',n,nrhs,one,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + call stdlib_dscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_dtrsm('L','U','T','U',n,nrhs,one,a,lda,b,ldb) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_dswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_dtrsm('L','L','N','U',n,nrhs,one,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + call stdlib_dscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / akm1k + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i, j ) / akm1k + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**t \ b) -> b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_dtrsm('L','L','T','U',n,nrhs,one,a,lda,b,ldb) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_dsyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_dsytrs2 + + !> DSYTRS_3: solves a system of linear equations A * X = B with a real + !> symmetric matrix A using the factorization computed + !> by DSYTRF_RK or DSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_dsytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*), e(*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + real(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_dtrsm( 'L', 'U', 'N', 'U', n, nrhs, one, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + call stdlib_dscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_dtrsm( 'L', 'U', 'T', 'U', n, nrhs, one, a, lda, b, ldb ) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_dtrsm( 'L', 'L', 'N', 'U', n, nrhs, one, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + call stdlib_dscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else if( i b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_dtrsm('L', 'L', 'T', 'U', n, nrhs, one, a, lda, b, ldb ) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_dsytrs_3 + + !> DSYTRS_AA: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by DSYTRF_AA. + + pure subroutine stdlib_dsytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**t \ b -> b [ (u**t \p**t * b) ] + call stdlib_dtrsm('L', 'U', 'T', 'U', n-1, nrhs, one, a( 1, 2 ),lda, b( 2, 1 ), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**t \p**t * b) ] + call stdlib_dlacpy( 'F', 1, n, a( 1, 1 ), lda+1, work( n ), 1) + if( n>1 ) then + call stdlib_dlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_dlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_dgtsv( n, nrhs, work( 1 ), work( n ), work( 2*n ), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**t \p**t * b) ) ] + call stdlib_dtrsm( 'L', 'U', 'N', 'U', n-1, nrhs, one, a( 1, 2 ),lda, b( 2, 1 ), & + ldb) + ! pivot, p * b -> b [ p * (u \ (t \ (u**t \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**t. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_dtrsm( 'L', 'L', 'N', 'U', n-1, nrhs, one, a( 2, 1 ),lda, b( 2, 1 ), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_dlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_dlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1 ) + call stdlib_dlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_dgtsv( n, nrhs, work( 1 ), work(n), work( 2*n ), b, ldb,info) + ! 3) backward substitution with l**t + if( n>1 ) then + ! compute (l**t \ b) -> b [ l**t \ (t \ (l \p**t * b) ) ] + call stdlib_dtrsm( 'L', 'L', 'T', 'U', n-1, nrhs, one, a( 2, 1 ),lda, b( 2, 1 ), & + ldb) + ! pivot, p * b -> b [ p * (l**t \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_dsytrs_aa + + !> DSYTRS_ROOK: solves a system of linear equations A*X = B with + !> a real symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSYTRF_ROOK. + + pure subroutine stdlib_dsytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_dger( k-1, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_dscal( nrhs, one / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1 ) + if( kp/=k-1 )call stdlib_dswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + if( k>2 ) then + call stdlib_dger( k-2, nrhs, -one, a( 1, k ), 1, b( k, 1 ),ldb, b( 1, 1 ), & + ldb ) + call stdlib_dger( k-2, nrhs, -one, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ),& + ldb ) + end if + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 )call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k ), 1, & + one, b( k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k ), 1, one, b( & + k, 1 ), ldb ) + call stdlib_dgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k+1 ), 1, one, & + b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1). + kp = -ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_dswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_dswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k DTBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by DTBTRS or some other + !> means before entering this routine. DTBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_dtbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, ferr,& + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: ab(ldab,*), b(ldb,*), x(ldx,*) + real(dp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldabsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_dlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_dtbsv( uplo, transt, diag, n, kd, ab, ldab,work( n+1 ), 1 ) + + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_dtbsv( uplo, trans, diag, n, kd, ab, ldab,work( n+1 ), 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_dtbrfs + + !> DTBTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by NRHS matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_dtbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab Level 3 BLAS like routine for A in RFP Format. + !> DTFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_dtfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, diag, side, trans, uplo + integer(ilp), intent(in) :: ldb, m, n + real(dp), intent(in) :: alpha + ! Array Arguments + real(dp), intent(in) :: a(0:*) + real(dp), intent(inout) :: b(0:ldb-1,0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans + integer(ilp) :: m1, m2, n1, n2, k, info, i, j + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lside = stdlib_lsame( side, 'L' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lside .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -3 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -4 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 ) then + info = -7 + else if( ldb DTFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + + pure subroutine stdlib_dtfttp( transr, uplo, n, arf, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(out) :: ap(0:*) + real(dp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTFTTP', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + ap( 0 ) = arf( 0 ) + else + ap( 0 ) = arf( 0 ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_dtfttp + + !> DTFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + + pure subroutine stdlib_dtfttr( transr, uplo, n, arf, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(dp), intent(out) :: a(0:lda-1,0:*) + real(dp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt, nx2, np1x2 + integer(ilp) :: i, j, l, ij + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DTPRFB: applies a real "triangular-pentagonal" block reflector H or its + !> transpose H**T to a real matrix C, which is composed of two + !> blocks A and B, either from the left or right. + + pure subroutine stdlib_dtprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(in) :: t(ldt,*), v(ldv,*) + real(dp), intent(out) :: work(ldwork,*) + ! ========================================================================== + + ! Local Scalars + integer(ilp) :: i, j, mp, np, kp + logical(lk) :: left, forward, column, right, backward, row + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return + if( stdlib_lsame( storev, 'C' ) ) then + column = .true. + row = .false. + else if ( stdlib_lsame( storev, 'R' ) ) then + column = .false. + row = .true. + else + column = .false. + row = .false. + end if + if( stdlib_lsame( side, 'L' ) ) then + left = .true. + right = .false. + else if( stdlib_lsame( side, 'R' ) ) then + left = .false. + right = .true. + else + left = .false. + right = .false. + end if + if( stdlib_lsame( direct, 'F' ) ) then + forward = .true. + backward = .false. + else if( stdlib_lsame( direct, 'B' ) ) then + forward = .false. + backward = .true. + else + forward = .false. + backward = .false. + end if + ! --------------------------------------------------------------------------- + if( column .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (m-by-k) + ! form h c or h**t c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - t (a + v**t b) or a = a - t**t (a + v**t b) + ! b = b - v t (a + v**t b) or b = b - v t**t (a + v**t b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'U', 'T', 'N', l, n, one, v( mp, 1 ), ldv,work, ldwork ) + + call stdlib_dgemm( 'T', 'N', l, n, m-l, one, v, ldv, b, ldb,one, work, ldwork ) + + call stdlib_dgemm( 'T', 'N', k-l, n, m, one, v( 1, kp ), ldv,b, ldb, zero, work( kp,& + 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'N', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb ) + + call stdlib_dgemm( 'N', 'N', l, n, k-l, -one, v( mp, kp ), ldv,work( kp, 1 ), & + ldwork, one, b( mp, 1 ), ldb ) + call stdlib_dtrmm( 'L', 'U', 'N', 'N', l, n, one, v( mp, 1 ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (n-by-k) + ! form c h or c h**t where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - (a + b v) t or a = a - (a + b v) t**t + ! b = b - (a + b v) t v**t or b = b - (a + b v) t**t v**t + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_dtrmm( 'R', 'U', 'N', 'N', m, l, one, v( np, 1 ), ldv,work, ldwork ) + + call stdlib_dgemm( 'N', 'N', m, l, n-l, one, b, ldb,v, ldv, one, work, ldwork ) + + call stdlib_dgemm( 'N', 'N', m, k-l, n, one, b, ldb,v( 1, kp ), ldv, zero, work( 1, & + kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_dgemm( 'N', 'T', m, l, k-l, -one, work( 1, kp ), ldwork,v( np, kp ), & + ldv, one, b( 1, np ), ldb ) + call stdlib_dtrmm( 'R', 'U', 'T', 'N', m, l, one, v( np, 1 ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (m-by-k) + ! [ i ] (k-by-k) + ! form h c or h**t c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - t (a + v**t b) or a = a - t**t (a + v**t b) + ! b = b - v t (a + v**t b) or b = b - v t**t (a + v**t b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'L', 'T', 'N', l, n, one, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_dgemm( 'T', 'N', l, n, m-l, one, v( mp, kp ), ldv,b( mp, 1 ), ldb, one, & + work( kp, 1 ), ldwork ) + call stdlib_dgemm( 'T', 'N', k-l, n, m, one, v, ldv,b, ldb, zero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'L', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'N', 'N', m-l, n, k, -one, v( mp, 1 ), ldv,work, ldwork, one, b( & + mp, 1 ), ldb ) + call stdlib_dgemm( 'N', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb ) + + call stdlib_dtrmm( 'L', 'L', 'N', 'N', l, n, one, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (n-by-k) + ! [ i ] (k-by-k) + ! form c h or c h**t where c = [ b a ] (b is m-by-n, a is m-by-k) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - (a + b v) t or a = a - (a + b v) t**t + ! b = b - (a + b v) t v**t or b = b - (a + b v) t**t v**t + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_dtrmm( 'R', 'L', 'N', 'N', m, l, one, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_dgemm( 'N', 'N', m, l, n-l, one, b( 1, np ), ldb,v( np, kp ), ldv, one, & + work( 1, kp ), ldwork ) + call stdlib_dgemm( 'N', 'N', m, k-l, n, one, b, ldb,v, ldv, zero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v( np, 1 ), ldv, one, b( & + 1, np ), ldb ) + call stdlib_dgemm( 'N', 'T', m, l, k-l, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_dtrmm( 'R', 'L', 'T', 'N', m, l, one, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**t c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - t (a + v b) or a = a - t**t (a + v b) + ! b = b - v**t t (a + v b) or b = b - v**t t**t (a + v b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'L', 'N', 'N', l, n, one, v( 1, mp ), ldv,work, ldb ) + + call stdlib_dgemm( 'N', 'N', l, n, m-l, one, v, ldv,b, ldb,one, work, ldwork ) + + call stdlib_dgemm( 'N', 'N', k-l, n, m, one, v( kp, 1 ), ldv,b, ldb, zero, work( kp,& + 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'T', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb ) + + call stdlib_dgemm( 'T', 'N', l, n, k-l, -one, v( kp, mp ), ldv,work( kp, 1 ), & + ldwork, one, b( mp, 1 ), ldb ) + call stdlib_dtrmm( 'L', 'L', 'T', 'N', l, n, one, v( 1, mp ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**t where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - (a + b v**t) t or a = a - (a + b v**t) t**t + ! b = b - (a + b v**t) t v or b = b - (a + b v**t) t**t v + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_dtrmm( 'R', 'L', 'T', 'N', m, l, one, v( 1, np ), ldv,work, ldwork ) + + call stdlib_dgemm( 'N', 'T', m, l, n-l, one, b, ldb, v, ldv,one, work, ldwork ) + + call stdlib_dgemm( 'N', 'T', m, k-l, n, one, b, ldb,v( kp, 1 ), ldv, zero, work( 1, & + kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_dgemm( 'N', 'N', m, l, k-l, -one, work( 1, kp ), ldwork,v( kp, np ), & + ldv, one, b( 1, np ), ldb ) + call stdlib_dtrmm( 'R', 'L', 'N', 'N', m, l, one, v( 1, np ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**t c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - t (a + v b) or a = a - t**t (a + v b) + ! b = b - v**t t (a + v b) or b = b - v**t t**t (a + v b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'U', 'N', 'N', l, n, one, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_dgemm( 'N', 'N', l, n, m-l, one, v( kp, mp ), ldv,b( mp, 1 ), ldb, one, & + work( kp, 1 ), ldwork ) + call stdlib_dgemm( 'N', 'N', k-l, n, m, one, v, ldv, b, ldb,zero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'L', 'L ', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'T', 'N', m-l, n, k, -one, v( 1, mp ), ldv,work, ldwork, one, b( & + mp, 1 ), ldb ) + call stdlib_dgemm( 'T', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb ) + + call stdlib_dtrmm( 'L', 'U', 'T', 'N', l, n, one, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**t where c = [ b a ] (a is m-by-k, b is m-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - (a + b v**t) t or a = a - (a + b v**t) t**t + ! b = b - (a + b v**t) t v or b = b - (a + b v**t) t**t v + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_dtrmm( 'R', 'U', 'T', 'N', m, l, one, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_dgemm( 'N', 'T', m, l, n-l, one, b( 1, np ), ldb,v( kp, np ), ldv, one, & + work( 1, kp ), ldwork ) + call stdlib_dgemm( 'N', 'T', m, k-l, n, one, b, ldb, v, ldv,zero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_dtrmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_dgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v( 1, np ), ldv, one, b( & + 1, np ), ldb ) + call stdlib_dgemm( 'N', 'N', m, l, k-l , -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_dtrmm( 'R', 'U', 'N', 'N', m, l, one, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + end if + return + end subroutine stdlib_dtprfb + + !> DTPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by DTPTRS or some other + !> means before entering this routine. DTPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_dtprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: ap(*), b(ldb,*), x(ldx,*) + real(dp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, kc, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_dlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_dtpsv( uplo, transt, diag, n, ap, work( n+1 ), 1 ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_dtpsv( uplo, trans, diag, n, ap, work( n+1 ), 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_dtprfs + + !> DTPTRI: computes the inverse of a real upper or lower triangular + !> matrix A stored in packed format. + + pure subroutine stdlib_dtptri( uplo, diag, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc, jclast, jj + real(dp) :: ajj + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTPTRI', -info ) + return + end if + ! check for singularity if non-unit. + if( nounit ) then + if( upper ) then + jj = 0 + do info = 1, n + jj = jj + info + if( ap( jj )==zero )return + end do + else + jj = 1 + do info = 1, n + if( ap( jj )==zero )return + jj = jj + n - info + 1 + end do + end if + info = 0 + end if + if( upper ) then + ! compute inverse of upper triangular matrix. + jc = 1 + do j = 1, n + if( nounit ) then + ap( jc+j-1 ) = one / ap( jc+j-1 ) + ajj = -ap( jc+j-1 ) + else + ajj = -one + end if + ! compute elements 1:j-1 of j-th column. + call stdlib_dtpmv( 'UPPER', 'NO TRANSPOSE', diag, j-1, ap,ap( jc ), 1 ) + call stdlib_dscal( j-1, ajj, ap( jc ), 1 ) + jc = jc + j + end do + else + ! compute inverse of lower triangular matrix. + jc = n*( n+1 ) / 2 + do j = n, 1, -1 + if( nounit ) then + ap( jc ) = one / ap( jc ) + ajj = -ap( jc ) + else + ajj = -one + end if + if( j DTPTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + + pure subroutine stdlib_dtptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldb DTPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + + pure subroutine stdlib_dtpttf( transr, uplo, n, ap, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: ap(0:*) + real(dp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTPTTF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + arf( 0 ) = ap( 0 ) + else + arf( 0 ) = ap( 0 ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! n is odd, transr = 'n', and uplo = 'l' + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + else + ! n is odd, transr = 'n', and uplo = 'u' + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! n is odd, transr = 't', and uplo = 'l' + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! n is odd, transr = 't', and uplo = 'u' + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! n is even, transr = 'n', and uplo = 'l' + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + else + ! n is even, transr = 'n', and uplo = 'u' + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 't' + if( lower ) then + ! n is even, transr = 't', and uplo = 'l' + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! n is even, transr = 't', and uplo = 'u' + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_dtpttf + + !> DTPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + + pure subroutine stdlib_dtpttr( uplo, n, ap, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(dp), intent(out) :: a(lda,*) + real(dp), intent(in) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DTRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by DTRTRS or some other + !> means before entering this routine. DTRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_dtrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*), b(ldb,*), x(ldx,*) + real(dp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_dlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_dtrsv( uplo, transt, diag, n, a, lda, work( n+1 ),1 ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_dtrsv( uplo, trans, diag, n, a, lda, work( n+1 ),1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_dtrrfs + + !> DTRTI2: computes the inverse of a real upper or lower triangular + !> matrix. + !> This is the Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_dtrti2( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DTRTRI: computes the inverse of a real upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_dtrtri( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jb, nb, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_dtrti2( uplo, diag, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute inverse of upper triangular matrix + do j = 1, n, nb + jb = min( nb, n-j+1 ) + ! compute rows 1:j-1 of current block column + call stdlib_dtrmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, one, a, lda,& + a( 1, j ), lda ) + call stdlib_dtrsm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, -one, a( j,& + j ), lda, a( 1, j ), lda ) + ! compute inverse of current diagonal block + call stdlib_dtrti2( 'UPPER', diag, jb, a( j, j ), lda, info ) + end do + else + ! compute inverse of lower triangular matrix + nn = ( ( n-1 ) / nb )*nb + 1 + do j = nn, 1, -nb + jb = min( nb, n-j+1 ) + if( j+jb<=n ) then + ! compute rows j+jb:n of current block column + call stdlib_dtrmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, one,& + a( j+jb, j+jb ), lda,a( j+jb, j ), lda ) + call stdlib_dtrsm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, -& + one, a( j, j ), lda,a( j+jb, j ), lda ) + end if + ! compute inverse of current diagonal block + call stdlib_dtrti2( 'LOWER', diag, jb, a( j, j ), lda, info ) + end do + end if + end if + return + end subroutine stdlib_dtrtri + + !> DTRTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_dtrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( lda DTRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + + pure subroutine stdlib_dtrttf( transr, uplo, n, a, lda, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(dp), intent(in) :: a(0:lda-1,0:*) + real(dp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2 + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DTRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + + pure subroutine stdlib_dtrttp( uplo, n, a, lda, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DZSUM1: takes the sum of the absolute values of a complex + !> vector and returns a double precision result. + !> Based on DZASUM from the Level 1 BLAS. + !> The change is to use the 'genuine' absolute value. + + pure real(dp) function stdlib_dzsum1( n, cx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(in) :: cx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + real(dp) :: stemp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + stdlib_dzsum1 = zero + stemp = zero + if( n<=0 )return + if( incx==1 )go to 20 + ! code for increment not equal to 1 + nincx = n*incx + do i = 1, nincx, incx + ! next line modified. + stemp = stemp + abs( cx( i ) ) + end do + stdlib_dzsum1 = stemp + return + ! code for increment equal to 1 + 20 continue + do i = 1, n + ! next line modified. + stemp = stemp + abs( cx( i ) ) + end do + stdlib_dzsum1 = stemp + return + end function stdlib_dzsum1 + +#:if WITH_QP + !> DLAG2Q: converts a SINGLE PRECISION matrix, SA, to a DOUBLE + !> PRECISION matrix, A. + !> Note that while it is possible to overflow while converting + !> from double to single, it is not possible to overflow when + !> converting from single to double. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_dlag2q( m, n, sa, ldsa, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + real(dp), intent(in) :: sa(ldsa,*) + real(qp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + info = 0 + do j = 1, n + do i = 1, m + a( i, j ) = sa( i, j ) + end do + end do + return + end subroutine stdlib_dlag2q +#:endif + !> DBBCSD: computes the CS decomposition of an orthogonal matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See DORCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The orthogonal matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + + pure subroutine stdlib_dbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q + ! Array Arguments + real(dp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& + b22e(*), work(*) + real(dp), intent(inout) :: phi(*), theta(*) + real(dp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) + ! =================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 6 + real(dp), parameter :: hundred = 100.0_dp + real(dp), parameter :: meighth = -0.125_dp + real(dp), parameter :: piover2 = 1.57079632679489661923132169163975144210_dp + + + + + ! Local Scalars + logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & + wantu2, wantv1t, wantv2t + integer(ilp) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & + iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini + real(dp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & + sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 + ! Intrinsic Functions + intrinsic :: abs,atan2,cos,max,min,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( m < 0 ) then + info = -6 + else if( p < 0 .or. p > m ) then + info = -7 + else if( q < 0 .or. q > m ) then + info = -8 + else if( q > p .or. q > m-p .or. q > m-q ) then + info = -8 + else if( wantu1 .and. ldu1 < p ) then + info = -12 + else if( wantu2 .and. ldu2 < m-p ) then + info = -14 + else if( wantv1t .and. ldv1t < q ) then + info = -16 + else if( wantv2t .and. ldv2t < m-q ) then + info = -18 + end if + ! quick return if q = 0 + if( info == 0 .and. q == 0 ) then + lworkmin = 1 + work(1) = lworkmin + return + end if + ! compute workspace + if( info == 0 ) then + iu1cs = 1 + iu1sn = iu1cs + q + iu2cs = iu1sn + q + iu2sn = iu2cs + q + iv1tcs = iu2sn + q + iv1tsn = iv1tcs + q + iv2tcs = iv1tsn + q + iv2tsn = iv2tcs + q + lworkopt = iv2tsn + q - 1 + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not. lquery ) then + info = -28 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DBBCSD', -info ) + return + else if( lquery ) then + return + end if + ! get machine constants + eps = stdlib_dlamch( 'EPSILON' ) + unfl = stdlib_dlamch( 'SAFE MINIMUM' ) + tolmul = max( ten, min( hundred, eps**meighth ) ) + tol = tolmul*eps + thresh = max( tol, maxitr*q*q*unfl ) + ! test for negligible sines or cosines + do i = 1, q + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = 1, q-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! initial deflation + imax = q + do while( imax > 1 ) + if( phi(imax-1) /= zero ) then + exit + end if + imax = imax - 1 + end do + imin = imax - 1 + if ( imin > 1 ) then + do while( phi(imin-1) /= zero ) + imin = imin - 1 + if ( imin <= 1 ) exit + end do + end if + ! initialize iteration counter + maxit = maxitr*q*q + iter = 0 + ! begin main iteration loop + do while( imax > 1 ) + ! compute the matrix entries + b11d(imin) = cos( theta(imin) ) + b21d(imin) = -sin( theta(imin) ) + do i = imin, imax - 1 + b11e(i) = -sin( theta(i) ) * sin( phi(i) ) + b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) + b12d(i) = sin( theta(i) ) * cos( phi(i) ) + b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) + b21e(i) = -cos( theta(i) ) * sin( phi(i) ) + b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) + b22d(i) = cos( theta(i) ) * cos( phi(i) ) + b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) + end do + b12d(imax) = sin( theta(imax) ) + b22d(imax) = cos( theta(imax) ) + ! abort if not converging; otherwise, increment iter + if( iter > maxit ) then + info = 0 + do i = 1, q + if( phi(i) /= zero )info = info + 1 + end do + return + end if + iter = iter + imax - imin + ! compute shifts + thetamax = theta(imin) + thetamin = theta(imin) + do i = imin+1, imax + if( theta(i) > thetamax )thetamax = theta(i) + if( theta(i) < thetamin )thetamin = theta(i) + end do + if( thetamax > piover2 - thresh ) then + ! zero on diagonals of b11 and b22; induce deflation with a + ! zero shift + mu = zero + nu = one + else if( thetamin < thresh ) then + ! zero on diagonals of b12 and b22; induce deflation with a + ! zero shift + mu = one + nu = zero + else + ! compute shifts for b11 and b21 and use the lesser + call stdlib_dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) + + call stdlib_dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) + + if( sigma11 <= sigma21 ) then + mu = sigma11 + nu = sqrt( one - mu**2 ) + if( mu < thresh ) then + mu = zero + nu = one + end if + else + nu = sigma21 + mu = sqrt( 1.0_dp - nu**2 ) + if( nu < thresh ) then + mu = one + nu = zero + end if + end if + end if + ! rotate to produce bulges in b11 and b21 + if( mu <= nu ) then + call stdlib_dlartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& + imin-1) ) + else + call stdlib_dlartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& + imin-1) ) + end if + temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) + b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) + b11d(imin) = temp + b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) + b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) + temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) + b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) + b21d(imin) = temp + b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) + b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) + ! compute theta(imin) + theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),sqrt( b11d(imin)**2+& + b11bulge**2 ) ) + ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) + if( b11d(imin)**2+b11bulge**2 > thresh**2 ) then + call stdlib_dlartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& + r ) + else if( mu <= nu ) then + call stdlib_dlartgs( b11e( imin ), b11d( imin + 1 ), mu,work(iu1cs+imin-1), work(& + iu1sn+imin-1) ) + else + call stdlib_dlartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& + iu1sn+imin-1) ) + end if + if( b21d(imin)**2+b21bulge**2 > thresh**2 ) then + call stdlib_dlartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& + r ) + else if( nu < mu ) then + call stdlib_dlartgs( b21e( imin ), b21d( imin + 1 ), nu,work(iu2cs+imin-1), work(& + iu2sn+imin-1) ) + else + call stdlib_dlartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& + imin-1) ) + end if + work(iu2cs+imin-1) = -work(iu2cs+imin-1) + work(iu2sn+imin-1) = -work(iu2sn+imin-1) + temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) + b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) + + b11e(imin) = temp + if( imax > imin+1 ) then + b11bulge = work(iu1sn+imin-1)*b11e(imin+1) + b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) + end if + temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) + b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) + b12d(imin) = temp + b12bulge = work(iu1sn+imin-1)*b12d(imin+1) + b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) + temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) + b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) + + b21e(imin) = temp + if( imax > imin+1 ) then + b21bulge = work(iu2sn+imin-1)*b21e(imin+1) + b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) + end if + temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) + b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) + b22d(imin) = temp + b22bulge = work(iu2sn+imin-1)*b22d(imin+1) + b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) + ! inner loop: chase bulges from b11(imin,imin+2), + ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to + ! bottom-right + do i = imin+1, imax-1 + ! compute phi(i-1) + x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) + x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge + y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) + y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge + phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11e(i-1)**2 + b11bulge**2 <= thresh**2 + restart21 = b21e(i-1)**2 + b21bulge**2 <= thresh**2 + restart12 = b12d(i-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), + ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart21 ) then + call stdlib_dlartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) + else if( .not. restart11 .and. restart21 ) then + call stdlib_dlartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & + r ) + else if( restart11 .and. .not. restart21 ) then + call stdlib_dlartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & + r ) + else if( mu <= nu ) then + call stdlib_dlartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) + + else + call stdlib_dlartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) + + end if + work(iv1tcs+i-1) = -work(iv1tcs+i-1) + work(iv1tsn+i-1) = -work(iv1tsn+i-1) + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_dlartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& + 1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& + 1), r ) + else if( nu < mu ) then + call stdlib_dlartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& + 1-1) ) + else + call stdlib_dlartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& + 1-1) ) + end if + temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) + b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) + b11d(i) = temp + b11bulge = work(iv1tsn+i-1)*b11d(i+1) + b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) + temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) + b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) + b21d(i) = temp + b21bulge = work(iv1tsn+i-1)*b21d(i+1) + b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) + temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) + b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) + b12e(i-1) = temp + b12bulge = work(iv2tsn+i-1-1)*b12e(i) + b12e(i) = work(iv2tcs+i-1-1)*b12e(i) + temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) + b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) + b22e(i-1) = temp + b22bulge = work(iv2tsn+i-1-1)*b22e(i) + b22e(i) = work(iv2tcs+i-1-1)*b22e(i) + ! compute theta(i) + x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) + x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge + y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) + y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge + theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11d(i)**2 + b11bulge**2 <= thresh**2 + restart12 = b12e(i-1)**2 + b12bulge**2 <= thresh**2 + restart21 = b21d(i)**2 + b21bulge**2 <= thresh**2 + restart22 = b22e(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), + ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart12 ) then + call stdlib_dlartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) + else if( .not. restart11 .and. restart12 ) then + call stdlib_dlartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) + + else if( restart11 .and. .not. restart12 ) then + call stdlib_dlartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) + + else if( mu <= nu ) then + call stdlib_dlartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) + + else + call stdlib_dlartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) + + end if + if( .not. restart21 .and. .not. restart22 ) then + call stdlib_dlartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) + else if( .not. restart21 .and. restart22 ) then + call stdlib_dlartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) + + else if( restart21 .and. .not. restart22 ) then + call stdlib_dlartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) + + else if( nu < mu ) then + call stdlib_dlartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) + + else + call stdlib_dlartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) + + end if + work(iu2cs+i-1) = -work(iu2cs+i-1) + work(iu2sn+i-1) = -work(iu2sn+i-1) + temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) + b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) + b11e(i) = temp + if( i < imax - 1 ) then + b11bulge = work(iu1sn+i-1)*b11e(i+1) + b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) + end if + temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) + b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) + b21e(i) = temp + if( i < imax - 1 ) then + b21bulge = work(iu2sn+i-1)*b21e(i+1) + b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) + end if + temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) + b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) + b12d(i) = temp + b12bulge = work(iu1sn+i-1)*b12d(i+1) + b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) + temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) + b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) + b22d(i) = temp + b22bulge = work(iu2sn+i-1)*b22d(i+1) + b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) + end do + ! compute phi(imax-1) + x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) + y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) + y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge + phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) ) + ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) + restart12 = b12d(imax-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(imax-1)**2 + b22bulge**2 <= thresh**2 + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_dlartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& + imax-1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& + imax-1-1), r ) + else if( nu < mu ) then + call stdlib_dlartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& + iv2tsn+imax-1-1) ) + else + call stdlib_dlartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& + iv2tsn+imax-1-1) ) + end if + temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) + b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) + + b12e(imax-1) = temp + temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) + b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) + + b22e(imax-1) = temp + ! update singular vectors + if( wantu1 ) then + if( colmajor ) then + call stdlib_dlasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& + iu1sn+imin-1),u1(1,imin), ldu1 ) + else + call stdlib_dlasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& + iu1sn+imin-1),u1(imin,1), ldu1 ) + end if + end if + if( wantu2 ) then + if( colmajor ) then + call stdlib_dlasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& + iu2sn+imin-1),u2(1,imin), ldu2 ) + else + call stdlib_dlasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& + iu2sn+imin-1),u2(imin,1), ldu2 ) + end if + end if + if( wantv1t ) then + if( colmajor ) then + call stdlib_dlasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& + iv1tsn+imin-1),v1t(imin,1), ldv1t ) + else + call stdlib_dlasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& + iv1tsn+imin-1),v1t(1,imin), ldv1t ) + end if + end if + if( wantv2t ) then + if( colmajor ) then + call stdlib_dlasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& + iv2tsn+imin-1),v2t(imin,1), ldv2t ) + else + call stdlib_dlasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& + iv2tsn+imin-1),v2t(1,imin), ldv2t ) + end if + end if + ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) + if( b11e(imax-1)+b21e(imax-1) > 0 ) then + b11d(imax) = -b11d(imax) + b21d(imax) = -b21d(imax) + if( wantv1t ) then + if( colmajor ) then + call stdlib_dscal( q, negone, v1t(imax,1), ldv1t ) + else + call stdlib_dscal( q, negone, v1t(1,imax), 1 ) + end if + end if + end if + ! compute theta(imax) + x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) + y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) + theta(imax) = atan2( abs(y1), abs(x1) ) + ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), + ! and b22(imax,imax-1) + if( b11d(imax)+b12e(imax-1) < 0 ) then + b12d(imax) = -b12d(imax) + if( wantu1 ) then + if( colmajor ) then + call stdlib_dscal( p, negone, u1(1,imax), 1 ) + else + call stdlib_dscal( p, negone, u1(imax,1), ldu1 ) + end if + end if + end if + if( b21d(imax)+b22e(imax-1) > 0 ) then + b22d(imax) = -b22d(imax) + if( wantu2 ) then + if( colmajor ) then + call stdlib_dscal( m-p, negone, u2(1,imax), 1 ) + else + call stdlib_dscal( m-p, negone, u2(imax,1), ldu2 ) + end if + end if + end if + ! fix signs on b12(imax,imax) and b22(imax,imax) + if( b12d(imax)+b22d(imax) < 0 ) then + if( wantv2t ) then + if( colmajor ) then + call stdlib_dscal( m-q, negone, v2t(imax,1), ldv2t ) + else + call stdlib_dscal( m-q, negone, v2t(1,imax), 1 ) + end if + end if + end if + ! test for negligible sines or cosines + do i = imin, imax + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = imin, imax-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! deflate + if (imax > 1) then + do while( phi(imax-1) == zero ) + imax = imax - 1 + if (imax <= 1) exit + end do + end if + if( imin > imax - 1 )imin = imax - 1 + if (imin > 1) then + do while (phi(imin-1) /= zero) + imin = imin - 1 + if (imin <= 1) exit + end do + end if + ! repeat main iteration loop + end do + ! postprocessing: order theta from least to greatest + do i = 1, q + mini = i + thetamin = theta(i) + do j = i+1, q + if( theta(j) < thetamin ) then + mini = j + thetamin = theta(j) + end if + end do + if( mini /= i ) then + theta(mini) = theta(i) + theta(i) = thetamin + if( colmajor ) then + if( wantu1 )call stdlib_dswap( p, u1(1,i), 1, u1(1,mini), 1 ) + if( wantu2 )call stdlib_dswap( m-p, u2(1,i), 1, u2(1,mini), 1 ) + if( wantv1t )call stdlib_dswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t ) + + if( wantv2t )call stdlib_dswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),ldv2t ) + + else + if( wantu1 )call stdlib_dswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 ) + if( wantu2 )call stdlib_dswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 ) + if( wantv1t )call stdlib_dswap( q, v1t(1,i), 1, v1t(1,mini), 1 ) + if( wantv2t )call stdlib_dswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 ) + end if + end if + end do + return + end subroutine stdlib_dbbcsd + + !> DDISNA: computes the reciprocal condition numbers for the eigenvectors + !> of a real symmetric or complex Hermitian matrix or for the left or + !> right singular vectors of a general m-by-n matrix. The reciprocal + !> condition number is the 'gap' between the corresponding eigenvalue or + !> singular value and the nearest other one. + !> The bound on the error, measured by angle in radians, in the I-th + !> computed vector is given by + !> DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) + !> where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed + !> to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of + !> the error bound. + !> DDISNA may also be used to compute error bounds for eigenvectors of + !> the generalized symmetric definite eigenproblem. + + pure subroutine stdlib_ddisna( job, m, n, d, sep, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: m, n + ! Array Arguments + real(dp), intent(in) :: d(*) + real(dp), intent(out) :: sep(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: decr, eigen, incr, left, right, sing + integer(ilp) :: i, k + real(dp) :: anorm, eps, newgap, oldgap, safmin, thresh + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + eigen = stdlib_lsame( job, 'E' ) + left = stdlib_lsame( job, 'L' ) + right = stdlib_lsame( job, 'R' ) + sing = left .or. right + if( eigen ) then + k = m + else if( sing ) then + k = min( m, n ) + end if + if( .not.eigen .and. .not.sing ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( k<0 ) then + info = -3 + else + incr = .true. + decr = .true. + do i = 1, k - 1 + if( incr )incr = incr .and. d( i )<=d( i+1 ) + if( decr )decr = decr .and. d( i )>=d( i+1 ) + end do + if( sing .and. k>0 ) then + if( incr )incr = incr .and. zero<=d( 1 ) + if( decr )decr = decr .and. d( k )>=zero + end if + if( .not.( incr .or. decr ) )info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DDISNA', -info ) + return + end if + ! quick return if possible + if( k==0 )return + ! compute reciprocal condition numbers + if( k==1 ) then + sep( 1 ) = stdlib_dlamch( 'O' ) + else + oldgap = abs( d( 2 )-d( 1 ) ) + sep( 1 ) = oldgap + do i = 2, k - 1 + newgap = abs( d( i+1 )-d( i ) ) + sep( i ) = min( oldgap, newgap ) + oldgap = newgap + end do + sep( k ) = oldgap + end if + if( sing ) then + if( ( left .and. m>n ) .or. ( right .and. m DGBBRD: reduces a real general m-by-n band matrix A to upper + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> The routine computes B, and optionally forms Q or P**T, or computes + !> Q**T*C for a given matrix C. + + pure subroutine stdlib_dgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & + ldc, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*), c(ldc,*) + real(dp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantb, wantc, wantpt, wantq + integer(ilp) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,& + mu, mu0, nr, nrt + real(dp) :: ra, rb, rc, rs + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + wantb = stdlib_lsame( vect, 'B' ) + wantq = stdlib_lsame( vect, 'Q' ) .or. wantb + wantpt = stdlib_lsame( vect, 'P' ) .or. wantb + wantc = ncc>0 + klu1 = kl + ku + 1 + info = 0 + if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncc<0 ) then + info = -4 + else if( kl<0 ) then + info = -5 + else if( ku<0 ) then + info = -6 + else if( ldab1 ) then + ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce + ! first to lower bidiagonal form and then transform to upper + ! bidiagonal + if( ku>0 ) then + ml0 = 1 + mu0 = 2 + else + ml0 = 2 + mu0 = 1 + end if + ! wherever possible, plane rotations are generated and applied in + ! vector operations of length nr over the index set j1:j2:klu1. + ! the sines of the plane rotations are stored in work(1:max(m,n)) + ! and the cosines in work(max(m,n)+1:2*max(m,n)). + mn = max( m, n ) + klm = min( m-1, kl ) + kun = min( n-1, ku ) + kb = klm + kun + kb1 = kb + 1 + inca = kb1*ldab + nr = 0 + j1 = klm + 2 + j2 = 1 - kun + loop_90: do i = 1, minmn + ! reduce i-th column and i-th row of matrix to bidiagonal form + ml = klm + 1 + mu = kun + 1 + loop_80: do kk = 1, kb + j1 = j1 + kb + j2 = j2 + kb + ! generate plane rotations to annihilate nonzero elements + ! which have been created below the band + if( nr>0 )call stdlib_dlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & + work( mn+j1 ), kb1 ) + ! apply plane rotations from the left + do l = 1, kb + if( j2-klm+l-1>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_dlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & + klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 ) + end do + if( ml>ml0 ) then + if( ml<=m-i+1 ) then + ! generate plane rotation to annihilate a(i+ml-1,i) + ! within the band, and apply rotation from the left + call stdlib_dlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), & + work( i+ml-1 ),ra ) + ab( ku+ml-1, i ) = ra + if( in ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j-1,j+ku) above the band + ! and store it in work(n+1:2*n) + work( j+kun ) = work( j )*ab( 1, j+kun ) + ab( 1, j+kun ) = work( mn+j )*ab( 1, j+kun ) + end do + ! generate plane rotations to annihilate nonzero elements + ! which have been generated above the band + if( nr>0 )call stdlib_dlargv( nr, ab( 1, j1+kun-1 ), inca,work( j1+kun ), kb1,& + work( mn+j1+kun ),kb1 ) + ! apply plane rotations from the right + do l = 1, kb + if( j2+l-1>m ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_dlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& + kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 ) + end do + if( ml==ml0 .and. mu>mu0 ) then + if( mu<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+mu-1) + ! within the band, and apply rotation from the right + call stdlib_dlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( & + mn+i+mu-1 ), work( i+mu-1 ),ra ) + ab( ku-mu+3, i+mu-2 ) = ra + call stdlib_drot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1,ab( ku-& + mu+3, i+mu-1 ), 1,work( mn+i+mu-1 ), work( i+mu-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kb1 + end if + if( wantpt ) then + ! accumulate product of plane rotations in p**t + do j = j1, j2, kb1 + call stdlib_drot( n, pt( j+kun-1, 1 ), ldpt,pt( j+kun, 1 ), ldpt, work( & + mn+j+kun ),work( j+kun ) ) + end do + end if + if( j2+kb>m ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j+kl+ku,j+ku-1) below the + ! band and store it in work(1:n) + work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) + ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun ) + end do + if( ml>ml0 ) then + ml = ml - 1 + else + mu = mu - 1 + end if + end do loop_80 + end do loop_90 + end if + if( ku==0 .and. kl>0 ) then + ! a has been reduced to lower bidiagonal form + ! transform lower bidiagonal form to upper bidiagonal by applying + ! plane rotations from the left, storing diagonal elements in d + ! and off-diagonal elements in e + do i = 1, min( m-1, n ) + call stdlib_dlartg( ab( 1, i ), ab( 2, i ), rc, rs, ra ) + d( i ) = ra + if( i0 ) then + ! a has been reduced to upper bidiagonal form + if( m1 ) then + rb = -rs*ab( ku, i ) + e( i-1 ) = rc*ab( ku, i ) + end if + if( wantpt )call stdlib_drot( n, pt( i, 1 ), ldpt, pt( m+1, 1 ), ldpt,rc, rs ) + + end do + else + ! copy off-diagonal elements to e and diagonal elements to d + do i = 1, minmn - 1 + e( i ) = ab( ku, i+1 ) + end do + do i = 1, minmn + d( i ) = ab( ku+1, i ) + end do + end if + else + ! a is diagonal. set elements of e to zero and copy diagonal + ! elements to d. + do i = 1, minmn - 1 + e( i ) = zero + end do + do i = 1, minmn + d( i ) = ab( 1, i ) + end do + end if + return + end subroutine stdlib_dgbbrd + + !> DGBCON: estimates the reciprocal of the condition number of a real + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by DGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_dgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, onenrm + character :: normin + integer(ilp) :: ix, j, jp, kase, kase1, kd, lm + real(dp) :: ainvnm, scale, smlnum, t + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,min + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( anorm0 + kase = 0 + 10 continue + call stdlib_dlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(l). + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + jp = ipiv( j ) + t = work( jp ) + if( jp/=j ) then + work( jp ) = work( j ) + work( j ) = t + end if + call stdlib_daxpy( lm, -t, ab( kd+1, j ), 1, work( j+1 ), 1 ) + end do + end if + ! multiply by inv(u). + call stdlib_dlatbs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, & + ldab, work, scale, work( 2*n+1 ),info ) + else + ! multiply by inv(u**t). + call stdlib_dlatbs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, ldab, & + work, scale, work( 2*n+1 ),info ) + ! multiply by inv(l**t). + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + work( j ) = work( j ) - stdlib_ddot( lm, ab( kd+1, j ), 1,work( j+1 ), 1 ) + + jp = ipiv( j ) + if( jp/=j ) then + t = work( jp ) + work( jp ) = work( j ) + work( j ) = t + end if + end do + end if + end if + ! divide x by 1/scale if doing so will not cause overflow. + normin = 'Y' + if( scale/=one ) then + ix = stdlib_idamax( n, work, 1 ) + if( scale DGBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_dgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(dp) :: bignum, rcmax, rcmin, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab DGBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from DGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_dgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(dp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + ! Intrinsic Functions + intrinsic :: abs,max,min,log + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabzero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = max( j-ku, 1 ), min( j+kl, m ) + c( j ) = max( c( j ), abs( ab( kd+i-j, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_dgbequb + + !> DGBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_dgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, x, & + ldx, ferr, berr, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + real(dp), intent(out) :: berr(*), ferr(*), work(*) + real(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transt + integer(ilp) :: count, i, j, k, kase, kk, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldabsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_dgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, info ) + + call stdlib_daxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_dlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_dgbtrs( transt, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, & + info ) + do i = 1, n + work( n+i ) = work( n+i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( n+i )*work( i ) + end do + call stdlib_dgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, & + info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_dgbrfs + + !> DGBTRF: computes an LU factorization of a real m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_dgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp, ju, k2, km, kv, nb, & + nw + real(dp) :: temp + ! Local Arrays + real(dp) :: work13(ldwork,nbmax), work31(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabkl ) then + ! use unblocked code + call stdlib_dgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + else + ! use blocked code + ! zero the superdiagonal elements of the work array work13 + do j = 1, nb + do i = 1, j - 1 + work13( i, j ) = zero + end do + end do + ! zero the subdiagonal elements of the work array work31 + do j = 1, nb + do i = j + 1, nb + work31( i, j ) = zero + end do + end do + ! gaussian elimination with partial pivoting + ! set fill-in elements in columns ku+2 to kv to zero + do j = ku + 2, min( kv, n ) + do i = kv - j + 2, kl + ab( i, j ) = zero + end do + end do + ! ju is the index of the last column affected by the current + ! stage of the factorization + ju = 1 + loop_180: do j = 1, min( m, n ), nb + jb = min( nb, min( m, n )-j+1 ) + ! the active part of the matrix is partitioned + ! a11 a12 a13 + ! a21 a22 a23 + ! a31 a32 a33 + ! here a11, a21 and a31 denote the current block of jb columns + ! which is about to be factorized. the number of rows in the + ! partitioning are jb, i2, i3 respectively, and the numbers + ! of columns are jb, j2, j3. the superdiagonal elements of a13 + ! and the subdiagonal elements of a31 lie outside the band. + i2 = min( kl-jb, m-j-jb+1 ) + i3 = min( jb, m-j-kl+1 ) + ! j2 and j3 are computed after ju has been updated. + ! factorize the current block of jb columns + loop_80: do jj = j, j + jb - 1 + ! set fill-in elements in column jj+kv to zero + if( jj+kv<=n ) then + do i = 1, kl + ab( i, jj+kv ) = zero + end do + end if + ! find pivot and test for singularity. km is the number of + ! subdiagonal elements in the current column. + km = min( kl, m-jj ) + jp = stdlib_idamax( km+1, ab( kv+1, jj ), 1 ) + ipiv( jj ) = jp + jj - j + if( ab( kv+jp, jj )/=zero ) then + ju = max( ju, min( jj+ku+jp-1, n ) ) + if( jp/=1 ) then + ! apply interchange to columns j to j+jb-1 + if( jp+jj-1jj )call stdlib_dger( km, jm-jj, -one, ab( kv+2, jj ), 1,ab( kv, jj+& + 1 ), ldab-1,ab( kv+1, jj+1 ), ldab-1 ) + else + ! if pivot is zero, set info to the index of the pivot + ! unless a zero pivot has already been found. + if( info==0 )info = jj + end if + ! copy current column of a31 into the work array work31 + nw = min( jj-j+1, i3 ) + if( nw>0 )call stdlib_dcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,work31( 1, jj-j+1 )& + , 1 ) + end do loop_80 + if( j+jb<=n ) then + ! apply the row interchanges to the other blocks. + j2 = min( ju-j+1, kv ) - jb + j3 = max( 0, ju-j-kv+1 ) + ! use stdlib_dlaswp to apply the row interchanges to a12, a22, and + ! a32. + call stdlib_dlaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,ipiv( j ), 1 ) + + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + ! apply the row interchanges to a13, a23, and a33 + ! columnwise. + k2 = j - 1 + jb + j2 + do i = 1, j3 + jj = k2 + i + do ii = j + i - 1, j + jb - 1 + ip = ipiv( ii ) + if( ip/=ii ) then + temp = ab( kv+1+ii-jj, jj ) + ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj ) + ab( kv+1+ip-jj, jj ) = temp + end if + end do + end do + ! update the relevant part of the trailing submatrix + if( j2>0 ) then + ! update a12 + call stdlib_dtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j2, one, ab(& + kv+1, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1 ) + if( i2>0 ) then + ! update a22 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j2,jb, -one, ab( & + kv+1+jb, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1, one,ab( kv+1, j+jb ), & + ldab-1 ) + end if + if( i3>0 ) then + ! update a32 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j2,jb, -one, & + work31, ldwork,ab( kv+1-jb, j+jb ), ldab-1, one,ab( kv+kl+1-jb, j+jb ), & + ldab-1 ) + end if + end if + if( j3>0 ) then + ! copy the lower triangle of a13 into the work array + ! work13 + do jj = 1, j3 + do ii = jj, jb + work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 ) + end do + end do + ! update a13 in the work array + call stdlib_dtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j3, one, ab(& + kv+1, j ), ldab-1,work13, ldwork ) + if( i2>0 ) then + ! update a23 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j3,jb, -one, ab( & + kv+1+jb, j ), ldab-1,work13, ldwork, one, ab( 1+jb, j+kv ),ldab-1 ) + + end if + if( i3>0 ) then + ! update a33 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j3,jb, -one, & + work31, ldwork, work13,ldwork, one, ab( 1+kl, j+kv ), ldab-1 ) + end if + ! copy the lower triangle of a13 back into place + do jj = 1, j3 + do ii = jj, jb + ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj ) + end do + end do + end if + else + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + end if + ! partially undo the interchanges in the current block to + ! restore the upper triangular form of a31 and copy the upper + ! triangle of a31 back into place + do jj = j + jb - 1, j, -1 + jp = ipiv( jj ) - jj + 1 + if( jp/=1 ) then + ! apply interchange to columns j to jj-1 + if( jp+jj-10 )call stdlib_dcopy( nw, work31( 1, jj-j+1 ), 1,ab( kv+kl+1-jj+j, jj )& + , 1 ) + end do + end do loop_180 + end if + return + end subroutine stdlib_dgbtrf + + !> DGECON: estimates the reciprocal of the condition number of a general + !> real matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by DGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_dgecon( norm, n, a, lda, anorm, rcond, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, scale, sl, smlnum, su + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_dgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: bignum, rcmax, rcmin, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from DGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_dgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + ! Intrinsic Functions + intrinsic :: abs,max,min,log + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldazero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = 1, m + c( j ) = max( c( j ), abs( a( i, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_dgeequb + + !> DGEMLQT: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'T': Q**T C C Q**T + !> where Q is a real orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**T + !> generated using the compact WY representation as returned by DGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_dgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, mb, ldt + ! Array Arguments + real(dp), intent(in) :: v(ldv,*), t(ldt,*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( mb<1 .or. (mb>k .and. k>0)) then + info = -6 + else if( ldv DGEMQRT: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'T': Q**T C C Q**T + !> where Q is a real orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**T + !> generated using the compact WY representation as returned by DGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_dgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, nb, ldt + ! Array Arguments + real(dp), intent(in) :: v(ldv,*), t(ldt,*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( nb<1 .or. (nb>k .and. k>0)) then + info = -6 + else if( ldv DGESC2: solves a system of linear equations + !> A * X = scale* RHS + !> with a general N-by-N matrix A using the LU factorization with + !> complete pivoting computed by DGETC2. + + pure subroutine stdlib_dgesc2( n, a, lda, rhs, ipiv, jpiv, scale ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: rhs(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: bignum, eps, smlnum, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! set constant to control overflow + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! apply permutations ipiv to rhs + call stdlib_dlaswp( 1, rhs, lda, 1, n-1, ipiv, 1 ) + ! solve for l part + do i = 1, n - 1 + do j = i + 1, n + rhs( j ) = rhs( j ) - a( j, i )*rhs( i ) + end do + end do + ! solve for u part + scale = one + ! check for scaling + i = stdlib_idamax( n, rhs, 1 ) + if( two*smlnum*abs( rhs( i ) )>abs( a( n, n ) ) ) then + temp = ( one / two ) / abs( rhs( i ) ) + call stdlib_dscal( n, temp, rhs( 1 ), 1 ) + scale = scale*temp + end if + do i = n, 1, -1 + temp = one / a( i, i ) + rhs( i ) = rhs( i )*temp + do j = i + 1, n + rhs( i ) = rhs( i ) - rhs( j )*( a( i, j )*temp ) + end do + end do + ! apply permutations jpiv to the solution (rhs) + call stdlib_dlaswp( 1, rhs, lda, 1, n-1, jpiv, -1 ) + return + end subroutine stdlib_dgesc2 + + !> DGETC2: computes an LU factorization with complete pivoting of the + !> n-by-n matrix A. The factorization has the form A = P * L * U * Q, + !> where P and Q are permutation matrices, L is lower triangular with + !> unit diagonal elements and U is upper triangular. + !> This is the Level 2 BLAS algorithm. + + pure subroutine stdlib_dgetc2( n, a, lda, ipiv, jpiv, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*), jpiv(*) + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ip, ipv, j, jp, jpv + real(dp) :: bignum, eps, smin, smlnum, xmax + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! handle the case n=1 by itself + if( n==1 ) then + ipiv( 1 ) = 1 + jpiv( 1 ) = 1 + if( abs( a( 1, 1 ) )=xmax ) then + xmax = abs( a( ip, jp ) ) + ipv = ip + jpv = jp + end if + end do + end do + if( i==1 )smin = max( eps*xmax, smlnum ) + ! swap rows + if( ipv/=i )call stdlib_dswap( n, a( ipv, 1 ), lda, a( i, 1 ), lda ) + ipiv( i ) = ipv + ! swap columns + if( jpv/=i )call stdlib_dswap( n, a( 1, jpv ), 1, a( 1, i ), 1 ) + jpiv( i ) = jpv + ! check for singularity + if( abs( a( i, i ) ) DGETF2: computes an LU factorization of a general m-by-n matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_dgetf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: sfmin + integer(ilp) :: i, j, jp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_dscal( m-j, one / a( j, j ), a( j+1, j ), 1 ) + else + do i = 1, m-j + a( j+i, j ) = a( j+i, j ) / a( j, j ) + end do + end if + end if + else if( info==0 ) then + info = j + end if + if( j DGETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + + pure recursive subroutine stdlib_dgetrf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: sfmin, temp + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_dscal( m-1, one / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 1, m-1 + a( 1+i, 1 ) = a( 1+i, 1 ) / a( 1, 1 ) + end do + end if + else + info = 1 + end if + else + ! use recursive code + n1 = min( m, n ) / 2 + n2 = n-n1 + ! [ a11 ] + ! factor [ --- ] + ! [ a21 ] + call stdlib_dgetrf2( m, n1, a, lda, ipiv, iinfo ) + if ( info==0 .and. iinfo>0 )info = iinfo + ! [ a12 ] + ! apply interchanges to [ --- ] + ! [ a22 ] + call stdlib_dlaswp( n2, a( 1, n1+1 ), lda, 1, n1, ipiv, 1 ) + ! solve a12 + call stdlib_dtrsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1, n1+1 ), lda ) + + ! update a22 + call stdlib_dgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, one, a( n1+1, n1+1 ), lda ) + ! factor a22 + call stdlib_dgetrf2( m-n1, n2, a( n1+1, n1+1 ), lda, ipiv( n1+1 ),iinfo ) + ! adjust info and the pivot indices + if ( info==0 .and. iinfo>0 )info = iinfo + n1 + do i = n1+1, min( m, n ) + ipiv( i ) = ipiv( i ) + n1 + end do + ! apply interchanges to a21 + call stdlib_dlaswp( n1, a( 1, 1 ), lda, n1+1, min( m, n), ipiv, 1 ) + end if + return + end subroutine stdlib_dgetrf2 + + !> DGETRI: computes the inverse of a matrix using the LU factorization + !> computed by DGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + + pure subroutine stdlib_dgetri( n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iws, j, jb, jj, jp, ldwork, lwkopt, nb, nbmin, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'DGETRI', ' ', n, -1, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( lda 0 from stdlib_dtrtri, then u is singular, + ! and the inverse is not computed. + call stdlib_dtrtri( 'UPPER', 'NON-UNIT', n, a, lda, info ) + if( info>0 )return + nbmin = 2 + ldwork = n + if( nb>1 .and. nb=n ) then + ! use unblocked code. + do j = n, 1, -1 + ! copy current column of l to work and replace with zeros. + do i = j + 1, n + work( i ) = a( i, j ) + a( i, j ) = zero + end do + ! compute current column of inv(a). + if( j DGETRS: solves a system of linear equations + !> A * X = B or A**T * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by DGETRF. + + pure subroutine stdlib_dgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DGGBAL: balances a pair of general real matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + + pure subroutine stdlib_dggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: lscale(*), rscale(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sclfac = 1.0e+1_dp + + + ! Local Scalars + integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, & + lrab, lsfmax, lsfmin, m, nr, nrp2 + real(dp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, & + pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc + ! Intrinsic Functions + intrinsic :: abs,real,int,log10,max,min,sign + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldacmax )cmax = abs( cor ) + lscale( i ) = lscale( i ) + cor + cor = alpha*work( i ) + if( abs( cor )>cmax )cmax = abs( cor ) + rscale( i ) = rscale( i ) + cor + end do + if( cmax DGGHRD: reduces a pair of real matrices (A,B) to generalized upper + !> Hessenberg form using orthogonal transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the orthogonal matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**T*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**T*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**T*x. + !> The orthogonal matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T + !> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T + !> If Q1 is the orthogonal matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then DGGHRD reduces the original + !> problem to generalized Hessenberg form. + + pure subroutine stdlib_dgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilq, ilz + integer(ilp) :: icompq, icompz, jcol, jrow + real(dp) :: c, s, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode compq + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + ! decode compz + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! test the input parameters. + info = 0 + if( icompq<=0 ) then + info = -1 + else if( icompz<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi DGTTRS: solves one of the systems of equations + !> A*X = B or A**T*X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by DGTTRF. + + pure subroutine stdlib_dgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: itrans, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + notran = ( trans=='N' .or. trans=='N' ) + if( .not.notran .and. .not.( trans=='T' .or. trans=='T' ) .and. .not.( trans=='C' .or. & + trans=='C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_dgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_dgtts2( itrans, n, jb, dl, d, du, du2, ipiv, b( 1, j ),ldb ) + end do + end if + end subroutine stdlib_dgttrs + + !> DISNAN: returns .TRUE. if its argument is NaN, and .FALSE. + !> otherwise. To be replaced by the Fortran 2003 intrinsic in the + !> future. + + pure logical(lk) function stdlib_disnan( din ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: din + ! ===================================================================== + ! Executable Statements + stdlib_disnan = stdlib_dlaisnan(din,din) + return + end function stdlib_disnan + + !> DLA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_dla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, ldab, m, n, kl, ku, trans + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( kl<0 .or. kl>m-1 ) then + info = 4 + else if( ku<0 .or. ku>n-1 ) then + info = 5 + else if( ldab0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + kd = ku + 1 + ke = kl + 1 + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_dla_gbamv + + !> DLA_GBRCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(dp) function stdlib_dla_gbrcond( trans, n, kl, ku, ab, ldab,afb, ldafb, ipiv, cmode, c,& + info, work, iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: n, ldab, ldafb, kl, ku, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: ab(ldab,*), afb(ldafb,*), c(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j, kd, ke + real(dp) :: ainvnm, tmp + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_dla_gbrcond = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame(trans, 'T').and. .not. stdlib_lsame(trans, & + 'C') ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 .or. kl>n-1 ) then + info = -3 + else if( ku<0 .or. ku>n-1 ) then + info = -4 + else if( ldab DLA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_dla_geamv ( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n, trans + ! Array Arguments + real(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' )) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, lenx + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, lenx + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = 1, lenx + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = 1, lenx + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_dla_geamv + + !> DLA_GERCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(dp) function stdlib_dla_gercond( trans, n, a, lda, af,ldaf, ipiv, cmode, c,info, work, & + iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(dp) :: ainvnm, tmp + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_dla_gercond = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame(trans, 'T').and. .not. stdlib_lsame(trans, & + 'C') ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DLA_LIN_BERR: computes component-wise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the component-wise absolute value of the matrix + !> or vector Z. + + pure subroutine stdlib_dla_lin_berr ( n, nz, nrhs, res, ayb, berr ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, nz, nrhs + ! Array Arguments + real(dp), intent(in) :: ayb(n,nrhs) + real(dp), intent(out) :: berr(nrhs) + real(dp), intent(in) :: res(n,nrhs) + ! ===================================================================== + ! Local Scalars + real(dp) :: tmp,safe1 + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! adding safe1 to the numerator guards against spuriously zero + ! residuals. a similar safeguard is in the sla_yyamv routine used + ! to compute ayb. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (nz+1)*safe1 + do j = 1, nrhs + berr(j) = zero + do i = 1, n + if (ayb(i,j) /= zero) then + tmp = (safe1+abs(res(i,j)))/ayb(i,j) + berr(j) = max( berr(j), tmp ) + end if + ! if ayb is exactly 0.0_dp (and if computed by sla_yyamv), then we know + ! the true residual also must be exactly zero. + end do + end do + end subroutine stdlib_dla_lin_berr + + !> DLA_PORCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(dp) function stdlib_dla_porcond( uplo, n, a, lda, af, ldaf,cmode, c, info, work,iwork ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + real(dp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(dp), intent(out) :: work(*) + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase, i, j + real(dp) :: ainvnm, tmp + logical(lk) :: up + ! Array Arguments + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_dla_porcond = zero + info = 0 + if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLA_PORCOND', -info ) + return + end if + if( n==0 ) then + stdlib_dla_porcond = one + return + end if + up = .false. + if ( stdlib_lsame( uplo, 'U' ) ) up = .true. + ! compute the equilibration matrix r such that + ! inv(r)*a*c has unit 1-norm. + if ( up ) then + do i = 1, n + tmp = zero + if ( cmode == 1 ) then + do j = 1, i + tmp = tmp + abs( a( j, i ) * c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) * c( j ) ) + end do + else if ( cmode == 0 ) then + do j = 1, i + tmp = tmp + abs( a( j, i ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) ) + end do + else + do j = 1, i + tmp = tmp + abs( a( j ,i ) / c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) / c( j ) ) + end do + end if + work( 2*n+i ) = tmp + end do + else + do i = 1, n + tmp = zero + if ( cmode == 1 ) then + do j = 1, i + tmp = tmp + abs( a( i, j ) * c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) * c( j ) ) + end do + else if ( cmode == 0 ) then + do j = 1, i + tmp = tmp + abs( a( i, j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) ) + end do + else + do j = 1, i + tmp = tmp + abs( a( i, j ) / c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) / c( j ) ) + end do + end if + work( 2*n+i ) = tmp + end do + endif + ! estimate the norm of inv(op(a)). + ainvnm = zero + kase = 0 + 10 continue + call stdlib_dlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==2 ) then + ! multiply by r. + do i = 1, n + work( i ) = work( i ) * work( 2*n+i ) + end do + if (up) then + call stdlib_dpotrs( 'UPPER', n, 1, af, ldaf, work, n, info ) + else + call stdlib_dpotrs( 'LOWER', n, 1, af, ldaf, work, n, info ) + endif + ! multiply by inv(c). + if ( cmode == 1 ) then + do i = 1, n + work( i ) = work( i ) / c( i ) + end do + else if ( cmode == -1 ) then + do i = 1, n + work( i ) = work( i ) * c( i ) + end do + end if + else + ! multiply by inv(c**t). + if ( cmode == 1 ) then + do i = 1, n + work( i ) = work( i ) / c( i ) + end do + else if ( cmode == -1 ) then + do i = 1, n + work( i ) = work( i ) * c( i ) + end do + end if + if ( up ) then + call stdlib_dpotrs( 'UPPER', n, 1, af, ldaf, work, n, info ) + else + call stdlib_dpotrs( 'LOWER', n, 1, af, ldaf, work, n, info ) + endif + ! multiply by r. + do i = 1, n + work( i ) = work( i ) * work( 2*n+i ) + end do + end if + go to 10 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm /= zero )stdlib_dla_porcond = ( one / ainvnm ) + return + end function stdlib_dla_porcond + + !> DLA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_dla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n, uplo + ! Array Arguments + real(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) ) then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + do j = i+1, n + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + do j = i+1, n + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_dla_syamv + + !> DLA_SYRCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(dp) function stdlib_dla_syrcond( uplo, n, a, lda, af, ldaf,ipiv, cmode, c, info, work,& + iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + character :: normin + integer(ilp) :: kase, i, j + real(dp) :: ainvnm, smlnum, tmp + logical(lk) :: up + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_dla_syrcond = zero + info = 0 + if( n<0 ) then + info = -2 + else if( lda DLA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(dp) function stdlib_dla_syrpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(in) :: a(lda,*), af(ldaf,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(dp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if ( upper ) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( abs( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( abs( a( i, j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( abs( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( abs( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_dsytrs. + ! calls to stdlib_sswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( abs( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( abs( af( i, k ) ), work( k ) ) + work( k-1 ) = max( abs( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( abs( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( abs( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( abs( af( i, k ) ), work( k ) ) + work( k+1 ) = max( abs( af(i, k+1 ) ), work( k+1 ) ) + end do + work( k ) = max( abs( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_dla_syrpvgrw = rpvgrw + end function stdlib_dla_syrpvgrw + + + pure subroutine stdlib_dladiv1( a, b, c, d, p, q ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(inout) :: a + real(dp), intent(in) :: b, c, d + real(dp), intent(out) :: p, q + ! ===================================================================== + + ! Local Scalars + real(dp) :: r, t + ! Executable Statements + r = d / c + t = one / (c + d * r) + p = stdlib_dladiv2(a, b, c, d, r, t) + a = -a + q = stdlib_dladiv2(b, a, c, d, r, t) + return + end subroutine stdlib_dladiv1 + + !> DLAED6: computes the positive or negative root (closest to the origin) + !> of + !> z(1) z(2) z(3) + !> f(x) = rho + --------- + ---------- + --------- + !> d(1)-x d(2)-x d(3)-x + !> It is assumed that + !> if ORGATI = .true. the root is between d(2) and d(3); + !> otherwise it is between d(1) and d(2) + !> This routine will be called by DLAED4 when necessary. In most cases, + !> the root sought is the smallest in magnitude, though it might not be + !> in some extremely rare situations. + + pure subroutine stdlib_dlaed6( kniter, orgati, rho, d, z, finit, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: orgati + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kniter + real(dp), intent(in) :: finit, rho + real(dp), intent(out) :: tau + ! Array Arguments + real(dp), intent(in) :: d(3), z(3) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + + + ! Local Arrays + real(dp) :: dscale(3), zscale(3) + ! Local Scalars + logical(lk) :: scale + integer(ilp) :: i, iter, niter + real(dp) :: a, b, base, c, ddf, df, eps, erretm, eta, f, fc, sclfac, sclinv, small1, & + small2, sminv1, sminv2, temp, temp1, temp2, temp3, temp4, lbd, ubd + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + info = 0 + if( orgati ) then + lbd = d(2) + ubd = d(3) + else + lbd = d(1) + ubd = d(2) + end if + if( finit < zero )then + lbd = zero + else + ubd = zero + end if + niter = 1 + tau = zero + if( kniter==2 ) then + if( orgati ) then + temp = ( d( 3 )-d( 2 ) ) / two + c = rho + z( 1 ) / ( ( d( 1 )-d( 2 ) )-temp ) + a = c*( d( 2 )+d( 3 ) ) + z( 2 ) + z( 3 ) + b = c*d( 2 )*d( 3 ) + z( 2 )*d( 3 ) + z( 3 )*d( 2 ) + else + temp = ( d( 1 )-d( 2 ) ) / two + c = rho + z( 3 ) / ( ( d( 3 )-d( 2 ) )-temp ) + a = c*( d( 1 )+d( 2 ) ) + z( 1 ) + z( 2 ) + b = c*d( 1 )*d( 2 ) + z( 1 )*d( 2 ) + z( 2 )*d( 1 ) + end if + temp = max( abs( a ), abs( b ), abs( c ) ) + a = a / temp + b = b / temp + c = c / temp + if( c==zero ) then + tau = b / a + else if( a<=zero ) then + tau = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + tau = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + if( tau < lbd .or. tau > ubd )tau = ( lbd+ubd )/two + if( d(1)==tau .or. d(2)==tau .or. d(3)==tau ) then + tau = zero + else + temp = finit + tau*z(1)/( d(1)*( d( 1 )-tau ) ) +tau*z(2)/( d(2)*( d( 2 )-tau ) )& + +tau*z(3)/( d(3)*( d( 3 )-tau ) ) + if( temp <= zero )then + lbd = tau + else + ubd = tau + end if + if( abs( finit )<=abs( temp ) )tau = zero + end if + end if + ! get machine parameters for possible scaling to avoid overflow + ! modified by sven: parameters small1, sminv1, small2, + ! sminv2, eps are not saved anymore between one call to the + ! others but recomputed at each call + eps = stdlib_dlamch( 'EPSILON' ) + base = stdlib_dlamch( 'BASE' ) + small1 = base**( int( log( stdlib_dlamch( 'SAFMIN' ) ) / log( base ) /three,KIND=ilp) ) + + sminv1 = one / small1 + small2 = small1*small1 + sminv2 = sminv1*sminv1 + ! determine if scaling of inputs necessary to avoid overflow + ! when computing 1/temp**3 + if( orgati ) then + temp = min( abs( d( 2 )-tau ), abs( d( 3 )-tau ) ) + else + temp = min( abs( d( 1 )-tau ), abs( d( 2 )-tau ) ) + end if + scale = .false. + if( temp<=small1 ) then + scale = .true. + if( temp<=small2 ) then + ! scale up by power of radix nearest 1/safmin**(2/3) + sclfac = sminv2 + sclinv = small2 + else + ! scale up by power of radix nearest 1/safmin**(1/3) + sclfac = sminv1 + sclinv = small1 + end if + ! scaling up safe because d, z, tau scaled elsewhere to be o(1) + do i = 1, 3 + dscale( i ) = d( i )*sclfac + zscale( i ) = z( i )*sclfac + end do + tau = tau*sclfac + lbd = lbd*sclfac + ubd = ubd*sclfac + else + ! copy d and z to dscale and zscale + do i = 1, 3 + dscale( i ) = d( i ) + zscale( i ) = z( i ) + end do + end if + fc = zero + df = zero + ddf = zero + do i = 1, 3 + temp = one / ( dscale( i )-tau ) + temp1 = zscale( i )*temp + temp2 = temp1*temp + temp3 = temp2*temp + fc = fc + temp1 / dscale( i ) + df = df + temp2 + ddf = ddf + temp3 + end do + f = finit + tau*fc + if( abs( f )<=zero )go to 60 + if( f <= zero )then + lbd = tau + else + ubd = tau + end if + ! iteration begins -- use gragg-thornton-warner cubic convergent + ! scheme + ! it is not hard to see that + ! 1) iterations will go up monotonically + ! if finit < 0; + ! 2) iterations will go down monotonically + ! if finit > 0. + iter = niter + 1 + loop_50: do niter = iter, maxit + if( orgati ) then + temp1 = dscale( 2 ) - tau + temp2 = dscale( 3 ) - tau + else + temp1 = dscale( 1 ) - tau + temp2 = dscale( 2 ) - tau + end if + a = ( temp1+temp2 )*f - temp1*temp2*df + b = temp1*temp2*f + c = f - ( temp1+temp2 )*df + temp1*temp2*ddf + temp = max( abs( a ), abs( b ), abs( c ) ) + a = a / temp + b = b / temp + c = c / temp + if( c==zero ) then + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + if( f*eta>=zero ) then + eta = -f / df + end if + tau = tau + eta + if( tau < lbd .or. tau > ubd )tau = ( lbd + ubd )/two + fc = zero + erretm = zero + df = zero + ddf = zero + do i = 1, 3 + if ( ( dscale( i )-tau )/=zero ) then + temp = one / ( dscale( i )-tau ) + temp1 = zscale( i )*temp + temp2 = temp1*temp + temp3 = temp2*temp + temp4 = temp1 / dscale( i ) + fc = fc + temp4 + erretm = erretm + abs( temp4 ) + df = df + temp2 + ddf = ddf + temp3 + else + go to 60 + end if + end do + f = finit + tau*fc + erretm = eight*( abs( finit )+abs( tau )*erretm ) +abs( tau )*df + if( ( abs( f )<=four*eps*erretm ) .or.( (ubd-lbd)<=four*eps*abs(tau) ) )go to & + 60 + if( f <= zero )then + lbd = tau + else + ubd = tau + end if + end do loop_50 + info = 1 + 60 continue + ! undo scaling + if( scale )tau = tau*sclinv + return + end subroutine stdlib_dlaed6 + + !> DLAGS2: computes 2-by-2 orthogonal matrices U, V and Q, such + !> that if ( UPPER ) then + !> U**T *A*Q = U**T *( A1 A2 )*Q = ( x 0 ) + !> ( 0 A3 ) ( x x ) + !> and + !> V**T*B*Q = V**T *( B1 B2 )*Q = ( x 0 ) + !> ( 0 B3 ) ( x x ) + !> or if ( .NOT.UPPER ) then + !> U**T *A*Q = U**T *( A1 0 )*Q = ( x x ) + !> ( A2 A3 ) ( 0 x ) + !> and + !> V**T*B*Q = V**T*( B1 0 )*Q = ( x x ) + !> ( B2 B3 ) ( 0 x ) + !> The rows of the transformed A and B are parallel, where + !> U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) + !> ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) + !> Z**T denotes the transpose of Z. + + pure subroutine stdlib_dlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu, csv,snv, csq, snq ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: upper + real(dp), intent(in) :: a1, a2, a3, b1, b2, b3 + real(dp), intent(out) :: csq, csu, csv, snq, snu, snv + ! ===================================================================== + + ! Local Scalars + real(dp) :: a, aua11, aua12, aua21, aua22, avb11, avb12, avb21, avb22, b, c, csl, csr, & + d, r, s1, s2, snl, snr, ua11, ua11r, ua12, ua21, ua22, ua22r, vb11, vb11r, vb12, vb21, & + vb22, vb22r + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + if( upper ) then + ! input matrices a and b are upper triangular matrices + ! form matrix c = a*adj(b) = ( a b ) + ! ( 0 d ) + a = a1*b3 + d = a3*b1 + b = a2*b1 - a1*b2 + ! the svd of real 2-by-2 triangular c + ! ( csl -snl )*( a b )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( 0 d ) ( -snr csr ) ( 0 t ) + call stdlib_dlasv2( a, b, d, s1, s2, snr, csr, snl, csl ) + if( abs( csl )>=abs( snl ) .or. abs( csr )>=abs( snr ) )then + ! compute the (1,1) and (1,2) elements of u**t *a and v**t *b, + ! and (1,2) element of |u|**t *|a| and |v|**t *|b|. + ua11r = csl*a1 + ua12 = csl*a2 + snl*a3 + vb11r = csr*b1 + vb12 = csr*b2 + snr*b3 + aua12 = abs( csl )*abs( a2 ) + abs( snl )*abs( a3 ) + avb12 = abs( csr )*abs( b2 ) + abs( snr )*abs( b3 ) + ! zero (1,2) elements of u**t *a and v**t *b + if( ( abs( ua11r )+abs( ua12 ) )/=zero ) then + if( aua12 / ( abs( ua11r )+abs( ua12 ) )<=avb12 /( abs( vb11r )+abs( vb12 ) ) & + ) then + call stdlib_dlartg( -ua11r, ua12, csq, snq, r ) + else + call stdlib_dlartg( -vb11r, vb12, csq, snq, r ) + end if + else + call stdlib_dlartg( -vb11r, vb12, csq, snq, r ) + end if + csu = csl + snu = -snl + csv = csr + snv = -snr + else + ! compute the (2,1) and (2,2) elements of u**t *a and v**t *b, + ! and (2,2) element of |u|**t *|a| and |v|**t *|b|. + ua21 = -snl*a1 + ua22 = -snl*a2 + csl*a3 + vb21 = -snr*b1 + vb22 = -snr*b2 + csr*b3 + aua22 = abs( snl )*abs( a2 ) + abs( csl )*abs( a3 ) + avb22 = abs( snr )*abs( b2 ) + abs( csr )*abs( b3 ) + ! zero (2,2) elements of u**t*a and v**t*b, and then swap. + if( ( abs( ua21 )+abs( ua22 ) )/=zero ) then + if( aua22 / ( abs( ua21 )+abs( ua22 ) )<=avb22 /( abs( vb21 )+abs( vb22 ) ) ) & + then + call stdlib_dlartg( -ua21, ua22, csq, snq, r ) + else + call stdlib_dlartg( -vb21, vb22, csq, snq, r ) + end if + else + call stdlib_dlartg( -vb21, vb22, csq, snq, r ) + end if + csu = snl + snu = csl + csv = snr + snv = csr + end if + else + ! input matrices a and b are lower triangular matrices + ! form matrix c = a*adj(b) = ( a 0 ) + ! ( c d ) + a = a1*b3 + d = a3*b1 + c = a2*b3 - a3*b2 + ! the svd of real 2-by-2 triangular c + ! ( csl -snl )*( a 0 )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( c d ) ( -snr csr ) ( 0 t ) + call stdlib_dlasv2( a, c, d, s1, s2, snr, csr, snl, csl ) + if( abs( csr )>=abs( snr ) .or. abs( csl )>=abs( snl ) )then + ! compute the (2,1) and (2,2) elements of u**t *a and v**t *b, + ! and (2,1) element of |u|**t *|a| and |v|**t *|b|. + ua21 = -snr*a1 + csr*a2 + ua22r = csr*a3 + vb21 = -snl*b1 + csl*b2 + vb22r = csl*b3 + aua21 = abs( snr )*abs( a1 ) + abs( csr )*abs( a2 ) + avb21 = abs( snl )*abs( b1 ) + abs( csl )*abs( b2 ) + ! zero (2,1) elements of u**t *a and v**t *b. + if( ( abs( ua21 )+abs( ua22r ) )/=zero ) then + if( aua21 / ( abs( ua21 )+abs( ua22r ) )<=avb21 /( abs( vb21 )+abs( vb22r ) ) & + ) then + call stdlib_dlartg( ua22r, ua21, csq, snq, r ) + else + call stdlib_dlartg( vb22r, vb21, csq, snq, r ) + end if + else + call stdlib_dlartg( vb22r, vb21, csq, snq, r ) + end if + csu = csr + snu = -snr + csv = csl + snv = -snl + else + ! compute the (1,1) and (1,2) elements of u**t *a and v**t *b, + ! and (1,1) element of |u|**t *|a| and |v|**t *|b|. + ua11 = csr*a1 + snr*a2 + ua12 = snr*a3 + vb11 = csl*b1 + snl*b2 + vb12 = snl*b3 + aua11 = abs( csr )*abs( a1 ) + abs( snr )*abs( a2 ) + avb11 = abs( csl )*abs( b1 ) + abs( snl )*abs( b2 ) + ! zero (1,1) elements of u**t*a and v**t*b, and then swap. + if( ( abs( ua11 )+abs( ua12 ) )/=zero ) then + if( aua11 / ( abs( ua11 )+abs( ua12 ) )<=avb11 /( abs( vb11 )+abs( vb12 ) ) ) & + then + call stdlib_dlartg( ua12, ua11, csq, snq, r ) + else + call stdlib_dlartg( vb12, vb11, csq, snq, r ) + end if + else + call stdlib_dlartg( vb12, vb11, csq, snq, r ) + end if + csu = snr + snu = csr + csv = snl + snv = csl + end if + end if + return + end subroutine stdlib_dlags2 + + !> DLAGTF: factorizes the matrix (T - lambda*I), where T is an n by n + !> tridiagonal matrix and lambda is a scalar, as + !> T - lambda*I = PLU, + !> where P is a permutation matrix, L is a unit lower tridiagonal matrix + !> with at most one non-zero sub-diagonal elements per column and U is + !> an upper triangular matrix with at most two non-zero super-diagonal + !> elements per column. + !> The factorization is obtained by Gaussian elimination with partial + !> pivoting and implicit row scaling. + !> The parameter LAMBDA is included in the routine so that DLAGTF may + !> be used, in conjunction with DLAGTS, to obtain eigenvectors of T by + !> inverse iteration. + + pure subroutine stdlib_dlagtf( n, a, lambda, b, c, tol, d, in, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: lambda, tol + ! Array Arguments + integer(ilp), intent(out) :: in(*) + real(dp), intent(inout) :: a(*), b(*), c(*) + real(dp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: k + real(dp) :: eps, mult, piv1, piv2, scale1, scale2, temp, tl + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DLAGTF', -info ) + return + end if + if( n==0 )return + a( 1 ) = a( 1 ) - lambda + in( n ) = 0 + if( n==1 ) then + if( a( 1 )==zero )in( 1 ) = 1 + return + end if + eps = stdlib_dlamch( 'EPSILON' ) + tl = max( tol, eps ) + scale1 = abs( a( 1 ) ) + abs( b( 1 ) ) + loop_10: do k = 1, n - 1 + a( k+1 ) = a( k+1 ) - lambda + scale2 = abs( c( k ) ) + abs( a( k+1 ) ) + if( k<( n-1 ) )scale2 = scale2 + abs( b( k+1 ) ) + if( a( k )==zero ) then + piv1 = zero + else + piv1 = abs( a( k ) ) / scale1 + end if + if( c( k )==zero ) then + in( k ) = 0 + piv2 = zero + scale1 = scale2 + if( k<( n-1 ) )d( k ) = zero + else + piv2 = abs( c( k ) ) / scale2 + if( piv2<=piv1 ) then + in( k ) = 0 + scale1 = scale2 + c( k ) = c( k ) / a( k ) + a( k+1 ) = a( k+1 ) - c( k )*b( k ) + if( k<( n-1 ) )d( k ) = zero + else + in( k ) = 1 + mult = a( k ) / c( k ) + a( k ) = c( k ) + temp = a( k+1 ) + a( k+1 ) = b( k ) - mult*temp + if( k<( n-1 ) ) then + d( k ) = b( k+1 ) + b( k+1 ) = -mult*d( k ) + end if + b( k ) = temp + c( k ) = mult + end if + end if + if( ( max( piv1, piv2 )<=tl ) .and. ( in( n )==0 ) )in( n ) = k + end do loop_10 + if( ( abs( a( n ) )<=scale1*tl ) .and. ( in( n )==0 ) )in( n ) = n + return + end subroutine stdlib_dlagtf + + !> DLAGTS: may be used to solve one of the systems of equations + !> (T - lambda*I)*x = y or (T - lambda*I)**T*x = y, + !> where T is an n by n tridiagonal matrix, for x, following the + !> factorization of (T - lambda*I) as + !> (T - lambda*I) = P*L*U , + !> by routine DLAGTF. The choice of equation to be solved is + !> controlled by the argument JOB, and in each case there is an option + !> to perturb zero or very small diagonal elements of U, this option + !> being intended for use in applications such as inverse iteration. + + pure subroutine stdlib_dlagts( job, n, a, b, c, d, in, y, tol, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: job, n + real(dp), intent(inout) :: tol + ! Array Arguments + integer(ilp), intent(in) :: in(*) + real(dp), intent(in) :: a(*), b(*), c(*), d(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: k + real(dp) :: absak, ak, bignum, eps, pert, sfmin, temp + ! Intrinsic Functions + intrinsic :: abs,max,sign + ! Executable Statements + info = 0 + if( ( abs( job )>2 ) .or. ( job==0 ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAGTS', -info ) + return + end if + if( n==0 )return + eps = stdlib_dlamch( 'EPSILON' ) + sfmin = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / sfmin + if( job<0 ) then + if( tol<=zero ) then + tol = abs( a( 1 ) ) + if( n>1 )tol = max( tol, abs( a( 2 ) ), abs( b( 1 ) ) ) + do k = 3, n + tol = max( tol, abs( a( k ) ), abs( b( k-1 ) ),abs( d( k-2 ) ) ) + end do + tol = tol*eps + if( tol==zero )tol = eps + end if + end if + if( abs( job )==1 ) then + do k = 2, n + if( in( k-1 )==0 ) then + y( k ) = y( k ) - c( k-1 )*y( k-1 ) + else + temp = y( k-1 ) + y( k-1 ) = y( k ) + y( k ) = temp - c( k-1 )*y( k ) + end if + end do + if( job==1 ) then + loop_30: do k = n, 1, -1 + if( k<=n-2 ) then + temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 ) + else if( k==n-1 ) then + temp = y( k ) - b( k )*y( k+1 ) + else + temp = y( k ) + end if + ak = a( k ) + absak = abs( ak ) + if( absakabsak )then + info = k + return + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + info = k + return + end if + end if + y( k ) = temp / ak + end do loop_30 + else + loop_50: do k = n, 1, -1 + if( k<=n-2 ) then + temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 ) + else if( k==n-1 ) then + temp = y( k ) - b( k )*y( k+1 ) + else + temp = y( k ) + end if + ak = a( k ) + pert = sign( tol, ak ) + 40 continue + absak = abs( ak ) + if( absakabsak )then + ak = ak + pert + pert = 2*pert + go to 40 + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + ak = ak + pert + pert = 2*pert + go to 40 + end if + end if + y( k ) = temp / ak + end do loop_50 + end if + else + ! come to here if job = 2 or -2 + if( job==2 ) then + loop_60: do k = 1, n + if( k>=3 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 ) + else if( k==2 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) + else + temp = y( k ) + end if + ak = a( k ) + absak = abs( ak ) + if( absakabsak )then + info = k + return + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + info = k + return + end if + end if + y( k ) = temp / ak + end do loop_60 + else + loop_80: do k = 1, n + if( k>=3 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 ) + else if( k==2 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) + else + temp = y( k ) + end if + ak = a( k ) + pert = sign( tol, ak ) + 70 continue + absak = abs( ak ) + if( absakabsak )then + ak = ak + pert + pert = 2*pert + go to 70 + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + ak = ak + pert + pert = 2*pert + go to 70 + end if + end if + y( k ) = temp / ak + end do loop_80 + end if + do k = n, 2, -1 + if( in( k-1 )==0 ) then + y( k-1 ) = y( k-1 ) - c( k-1 )*y( k ) + else + temp = y( k-1 ) + y( k-1 ) = y( k ) + y( k ) = temp - c( k-1 )*y( k ) + end if + end do + end if + end subroutine stdlib_dlagts + + !> DLAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then DLAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**T gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**T and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ alpha ] + !> [ gamma ] + !> where alpha = x**T*w. + + pure subroutine stdlib_dlaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: j, job + real(dp), intent(out) :: c, s, sestpr + real(dp), intent(in) :: gamma, sest + ! Array Arguments + real(dp), intent(in) :: w(j), x(j) + ! ===================================================================== + + + ! Local Scalars + real(dp) :: absalp, absest, absgam, alpha, b, cosine, eps, norma, s1, s2, sine, t, & + test, tmp, zeta1, zeta2 + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + eps = stdlib_dlamch( 'EPSILON' ) + alpha = stdlib_ddot( j, x, 1, w, 1 ) + absalp = abs( alpha ) + absgam = abs( gamma ) + absest = abs( sest ) + if( job==1 ) then + ! estimating largest singular value + ! special cases + if( sest==zero ) then + s1 = max( absgam, absalp ) + if( s1==zero ) then + s = zero + c = one + sestpr = zero + else + s = alpha / s1 + c = gamma / s1 + tmp = sqrt( s*s+c*c ) + s = s / tmp + c = c / tmp + sestpr = s1*tmp + end if + return + else if( absgam<=eps*absest ) then + s = one + c = zero + tmp = max( absest, absalp ) + s1 = absest / tmp + s2 = absalp / tmp + sestpr = tmp*sqrt( s1*s1+s2*s2 ) + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = one + c = zero + sestpr = s2 + else + s = zero + c = one + sestpr = s1 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + s = sqrt( one+tmp*tmp ) + sestpr = s2*s + c = ( gamma / s2 ) / s + s = sign( one, alpha ) / s + else + tmp = s2 / s1 + c = sqrt( one+tmp*tmp ) + sestpr = s1*c + s = ( alpha / s1 ) / c + c = sign( one, gamma ) / c + end if + return + else + ! normal case + zeta1 = alpha / absest + zeta2 = gamma / absest + b = ( one-zeta1*zeta1-zeta2*zeta2 )*half + c = zeta1*zeta1 + if( b>zero ) then + t = c / ( b+sqrt( b*b+c ) ) + else + t = sqrt( b*b+c ) - b + end if + sine = -zeta1 / t + cosine = -zeta2 / ( one+t ) + tmp = sqrt( sine*sine+cosine*cosine ) + s = sine / tmp + c = cosine / tmp + sestpr = sqrt( t+one )*absest + return + end if + else if( job==2 ) then + ! estimating smallest singular value + ! special cases + if( sest==zero ) then + sestpr = zero + if( max( absgam, absalp )==zero ) then + sine = one + cosine = zero + else + sine = -gamma + cosine = alpha + end if + s1 = max( abs( sine ), abs( cosine ) ) + s = sine / s1 + c = cosine / s1 + tmp = sqrt( s*s+c*c ) + s = s / tmp + c = c / tmp + return + else if( absgam<=eps*absest ) then + s = zero + c = one + sestpr = absgam + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = zero + c = one + sestpr = s1 + else + s = one + c = zero + sestpr = s2 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + c = sqrt( one+tmp*tmp ) + sestpr = absest*( tmp / c ) + s = -( gamma / s2 ) / c + c = sign( one, alpha ) / c + else + tmp = s2 / s1 + s = sqrt( one+tmp*tmp ) + sestpr = absest / s + c = ( alpha / s1 ) / s + s = -sign( one, gamma ) / s + end if + return + else + ! normal case + zeta1 = alpha / absest + zeta2 = gamma / absest + norma = max( one+zeta1*zeta1+abs( zeta1*zeta2 ),abs( zeta1*zeta2 )+zeta2*zeta2 ) + + ! see if root is closer to zero or to one + test = one + two*( zeta1-zeta2 )*( zeta1+zeta2 ) + if( test>=zero ) then + ! root is close to zero, compute directly + b = ( zeta1*zeta1+zeta2*zeta2+one )*half + c = zeta2*zeta2 + t = c / ( b+sqrt( abs( b*b-c ) ) ) + sine = zeta1 / ( one-t ) + cosine = -zeta2 / t + sestpr = sqrt( t+four*eps*eps*norma )*absest + else + ! root is closer to one, shift by that amount + b = ( zeta2*zeta2+zeta1*zeta1-one )*half + c = zeta1*zeta1 + if( b>=zero ) then + t = -c / ( b+sqrt( b*b+c ) ) + else + t = b - sqrt( b*b+c ) + end if + sine = -zeta1 / t + cosine = -zeta2 / ( one+t ) + sestpr = sqrt( one+t+four*eps*eps*norma )*absest + end if + tmp = sqrt( sine*sine+cosine*cosine ) + s = sine / tmp + c = cosine / tmp + return + end if + end if + return + end subroutine stdlib_dlaic1 + + !> DLANEG: computes the Sturm count, the number of negative pivots + !> encountered while factoring tridiagonal T - sigma I = L D L^T. + !> This implementation works directly on the factors without forming + !> the tridiagonal matrix T. The Sturm count is also the number of + !> eigenvalues of T less than sigma. + !> This routine is called from DLARRB. + !> The current routine does not use the PIVMIN parameter but rather + !> requires IEEE-754 propagation of Infinities and NaNs. This + !> routine also has no input range restrictions but does require + !> default exception handling such that x/0 produces Inf when x is + !> non-zero, and Inf/Inf produces NaN. For more information, see: + !> Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in + !> Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on + !> Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 + !> (Tech report version in LAWN 172 with the same title.) + + pure integer(ilp) function stdlib_dlaneg( n, d, lld, sigma, pivmin, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, r + real(dp), intent(in) :: pivmin, sigma + ! Array Arguments + real(dp), intent(in) :: d(*), lld(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blklen = 128 + + ! some architectures propagate infinities and nans very slowly, so + ! the code computes counts in blklen chunks. then a nan can + ! propagate at most blklen columns before being detected. this is + ! not a general tuning parameter; it needs only to be just large + ! enough that the overhead is tiny in common cases. + + ! Local Scalars + integer(ilp) :: bj, j, neg1, neg2, negcnt + real(dp) :: bsav, dminus, dplus, gamma, p, t, tmp + logical(lk) :: sawnan + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + negcnt = 0 + ! i) upper part: l d l^t - sigma i = l+ d+ l+^t + t = -sigma + loop_210: do bj = 1, r-1, blklen + neg1 = 0 + bsav = t + do j = bj, min(bj+blklen-1, r-1) + dplus = d( j ) + t + if( dplus DLANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + + real(dp) function stdlib_dlangb( norm, n, kl, ku, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: kl, ku, ldab, n + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k, l + real(dp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + temp = abs( ab( i, j ) ) + if( value DLANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real matrix A. + + real(dp) function stdlib_dlange( norm, m, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, m + temp = abs( a( i, j ) ) + if( value DLANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real tridiagonal matrix A. + + pure real(dp) function stdlib_dlangt( norm, n, dl, d, du ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: anorm, scale, sum, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + if( anorm1 ) then + call stdlib_dlassq( n-1, dl, 1, scale, sum ) + call stdlib_dlassq( n-1, du, 1, scale, sum ) + end if + anorm = scale*sqrt( sum ) + end if + stdlib_dlangt = anorm + return + end function stdlib_dlangt + + !> DLANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + + real(dp) function stdlib_dlanhs( norm, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, min( n, j+1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + do j = 1, n + sum = zero + do i = 1, min( n, j+1 ) + sum = sum + abs( a( i, j ) ) + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, min( n, j+1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + do j = 1, n + call stdlib_dlassq( min( n, j+1 ), a( 1, j ), 1, scale, sum ) + end do + value = scale*sqrt( sum ) + end if + stdlib_dlanhs = value + return + end function stdlib_dlanhs + + !> DLANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + + real(dp) function stdlib_dlansb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( ab( k+1, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ab( 1, j ) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_dlassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_dlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + call stdlib_dlassq( n, ab( l, 1 ), ldab, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_dlansb = value + return + end function stdlib_dlansb + + !> DLANSF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A in RFP format. + + real(dp) function stdlib_dlansf( norm, transr, uplo, n, a, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, transr, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: a(0:*) + real(dp), intent(out) :: work(0:*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, ifm, ilu, noe, n1, k, l, lda + real(dp) :: scale, s, value, aa, temp + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + if( n==0 ) then + stdlib_dlansf = zero + return + else if( n==1 ) then + stdlib_dlansf = abs( a(0) ) + return + end if + ! set noe = 1 if n is odd. if n is even set noe=0 + noe = 1 + if( mod( n, 2 )==0 )noe = 0 + ! set ifm = 0 when form='t or 't' and 1 otherwise + ifm = 1 + if( stdlib_lsame( transr, 'T' ) )ifm = 0 + ! set ilu = 0 when uplo='u or 'u' and 1 otherwise + ilu = 1 + if( stdlib_lsame( uplo, 'U' ) )ilu = 0 + ! set lda = (n+1)/2 when ifm = 0 + ! set lda = n when ifm = 1 and noe = 1 + ! set lda = n+1 when ifm = 1 and noe = 0 + if( ifm==1 ) then + if( noe==1 ) then + lda = n + else + ! noe=0 + lda = n + 1 + end if + else + ! ifm=0 + lda = ( n+1 ) / 2 + end if + if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = ( n+1 ) / 2 + value = zero + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is n by k + do j = 0, k - 1 + do i = 0, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + else + ! xpose case; a is k by n + do j = 0, n - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + end if + else + ! n is even + if( ifm==1 ) then + ! a is n+1 by k + do j = 0, k - 1 + do i = 0, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + else + ! xpose case; a is k by n+1 + do j = 0, n + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + if( ifm==1 ) then + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + if( i==k+k )go to 10 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + 10 continue + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu = 1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + if( j>0 ) then + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + end if + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu = 1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + end if + else + ! ifm=0 + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + n1 = k + ! n/2 + k = k + 1 + ! k is the row size and lda + do i = n1, n - 1 + work( i ) = zero + end do + do j = 0, n1 - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,n1+i) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=n1=k-1 is special + s = abs( a( 0+j*lda ) ) + ! a(k-1,k-1) + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,i+n1) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k, n - 1 + s = zero + do i = 0, j - k - 1 + aa = abs( a( i+j*lda ) ) + ! a(i,j-k) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-k + aa = abs( a( i+j*lda ) ) + ! a(j-k,j-k) + s = s + aa + work( j-k ) = work( j-k ) + s + i = i + 1 + s = abs( a( i+j*lda ) ) + ! a(j,j) + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu=1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 2 + ! process + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( a( i+j*lda ) ) + ! i=j so process of a(j,j) + s = s + aa + work( j ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( a( i+j*lda ) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k-1 is special :process col a(k-1,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k, n - 1 + ! process col j of a = a(j,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i+k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=k + aa = abs( a( 0+j*lda ) ) + ! a(k,k) + s = aa + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k,k+i) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k + 1, n - 1 + s = zero + do i = 0, j - 2 - k + aa = abs( a( i+j*lda ) ) + ! a(i,j-k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-1-k + aa = abs( a( i+j*lda ) ) + ! a(j-k-1,j-k-1) + s = s + aa + work( j-k-1 ) = work( j-k-1 ) + s + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,j) + s = aa + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + ! j=n + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(i,k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = work( i ) + s + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + ! j=0 is special :process col a(k:n-1,k) + s = abs( a( 0 ) ) + ! a(k,k) + do i = 1, k - 1 + aa = abs( a( i ) ) + ! a(k+i,k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( k ) = work( k ) + s + do j = 1, k - 1 + ! process + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( a( i+j*lda ) ) + ! i=j-1 so process of a(j-1,j-1) + s = s + aa + work( j-1 ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( a( i+j*lda ) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k is special :process col a(k,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k + 1, n + ! process col j-1 of a = a(j-1,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j-1 ) = work( j-1 ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + end if + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + k = ( n+1 ) / 2 + scale = zero + s = one + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 3 + call stdlib_dlassq( k-j-2, a( k+j+1+j*lda ), 1, scale, s ) + ! l at a(k,0) + end do + do j = 0, k - 1 + call stdlib_dlassq( k+j-1, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k-1, a( k ), lda+1, scale, s ) + ! tri l at a(k,0) + call stdlib_dlassq( k, a( k-1 ), lda+1, scale, s ) + ! tri u at a(k-1,0) + else + ! ilu=1 + do j = 0, k - 1 + call stdlib_dlassq( n-j-1, a( j+1+j*lda ), 1, scale, s ) + ! trap l at a(0,0) + end do + do j = 0, k - 2 + call stdlib_dlassq( j, a( 0+( 1+j )*lda ), 1, scale, s ) + ! u at a(0,1) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k, a( 0 ), lda+1, scale, s ) + ! tri l at a(0,0) + call stdlib_dlassq( k-1, a( 0+lda ), lda+1, scale, s ) + ! tri u at a(0,1) + end if + else + ! a is xpose + if( ilu==0 ) then + ! a**t is upper + do j = 1, k - 2 + call stdlib_dlassq( j, a( 0+( k+j )*lda ), 1, scale, s ) + ! u at a(0,k) + end do + do j = 0, k - 2 + call stdlib_dlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k-1 rect. at a(0,0) + end do + do j = 0, k - 2 + call stdlib_dlassq( k-j-1, a( j+1+( j+k-1 )*lda ), 1,scale, s ) + ! l at a(0,k-1) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k-1, a( 0+k*lda ), lda+1, scale, s ) + ! tri u at a(0,k) + call stdlib_dlassq( k, a( 0+( k-1 )*lda ), lda+1, scale, s ) + ! tri l at a(0,k-1) + else + ! a**t is lower + do j = 1, k - 1 + call stdlib_dlassq( j, a( 0+j*lda ), 1, scale, s ) + ! u at a(0,0) + end do + do j = k, n - 1 + call stdlib_dlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k-1 rect. at a(0,k) + end do + do j = 0, k - 3 + call stdlib_dlassq( k-j-2, a( j+2+j*lda ), 1, scale, s ) + ! l at a(1,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + call stdlib_dlassq( k-1, a( 1 ), lda+1, scale, s ) + ! tri l at a(1,0) + end if + end if + else + ! n is even + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 2 + call stdlib_dlassq( k-j-1, a( k+j+2+j*lda ), 1, scale, s ) + ! l at a(k+1,0) + end do + do j = 0, k - 1 + call stdlib_dlassq( k+j, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k, a( k+1 ), lda+1, scale, s ) + ! tri l at a(k+1,0) + call stdlib_dlassq( k, a( k ), lda+1, scale, s ) + ! tri u at a(k,0) + else + ! ilu=1 + do j = 0, k - 1 + call stdlib_dlassq( n-j-1, a( j+2+j*lda ), 1, scale, s ) + ! trap l at a(1,0) + end do + do j = 1, k - 1 + call stdlib_dlassq( j, a( 0+j*lda ), 1, scale, s ) + ! u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k, a( 1 ), lda+1, scale, s ) + ! tri l at a(1,0) + call stdlib_dlassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + end if + else + ! a is xpose + if( ilu==0 ) then + ! a**t is upper + do j = 1, k - 1 + call stdlib_dlassq( j, a( 0+( k+1+j )*lda ), 1, scale, s ) + ! u at a(0,k+1) + end do + do j = 0, k - 1 + call stdlib_dlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k rect. at a(0,0) + end do + do j = 0, k - 2 + call stdlib_dlassq( k-j-1, a( j+1+( j+k )*lda ), 1, scale,s ) + ! l at a(0,k) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k, a( 0+( k+1 )*lda ), lda+1, scale, s ) + ! tri u at a(0,k+1) + call stdlib_dlassq( k, a( 0+k*lda ), lda+1, scale, s ) + ! tri l at a(0,k) + else + ! a**t is lower + do j = 1, k - 1 + call stdlib_dlassq( j, a( 0+( j+1 )*lda ), 1, scale, s ) + ! u at a(0,1) + end do + do j = k + 1, n + call stdlib_dlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k rect. at a(0,k+1) + end do + do j = 0, k - 2 + call stdlib_dlassq( k-j-1, a( j+1+j*lda ), 1, scale, s ) + ! l at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_dlassq( k, a( lda ), lda+1, scale, s ) + ! tri l at a(0,1) + call stdlib_dlassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + end if + end if + end if + value = scale*sqrt( s ) + end if + stdlib_dlansf = value + return + end function stdlib_dlansf + + !> DLANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A, supplied in packed form. + + real(dp) function stdlib_dlansp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 1 + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + end do + else + k = 1 + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( ap( k ) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ap( k ) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_dlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_dlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( ap( k )/=zero ) then + absa = abs( ap( k ) ) + if( scale DLANST: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric tridiagonal matrix A. + + pure real(dp) function stdlib_dlanst( norm, n, d, e ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: anorm, scale, sum + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + sum = abs( d( i ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + sum = abs( e( i ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + end do + else if( stdlib_lsame( norm, 'O' ) .or. norm=='1' .or.stdlib_lsame( norm, 'I' ) ) & + then + ! find norm1(a). + if( n==1 ) then + anorm = abs( d( 1 ) ) + else + anorm = abs( d( 1 ) )+abs( e( 1 ) ) + sum = abs( e( n-1 ) )+abs( d( n ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + do i = 2, n - 1 + sum = abs( d( i ) )+abs( e( i ) )+abs( e( i-1 ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( n>1 ) then + call stdlib_dlassq( n-1, e, 1, scale, sum ) + sum = 2*sum + end if + call stdlib_dlassq( n, d, 1, scale, sum ) + anorm = scale*sqrt( sum ) + end if + stdlib_dlanst = anorm + return + end function stdlib_dlanst + + !> DLANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A. + + real(dp) function stdlib_dlansy( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( a( j, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( a( j, j ) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_dlassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_dlassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + call stdlib_dlassq( n, a, lda+1, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_dlansy = value + return + end function stdlib_dlansy + + !> DLANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + + real(dp) function stdlib_dlantb( norm, uplo, diag, n, k, ab,ldab, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(dp), intent(in) :: ab(ldab,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, l + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = max( k+2-j, 1 ), k + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = max( k+2-j, 1 ), k + 1 + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = 2, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = 1, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = 1 - j + do i = j + 1, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = 1 - j + do i = j, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + end if + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 2, n + call stdlib_dlassq( min( j-1, k ),ab( max( k+2-j, 1 ), j ), 1, scale,& + sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_dlassq( min( j, k+1 ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 1, n - 1 + call stdlib_dlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_dlassq( min( n-j+1, k+1 ), ab( 1, j ), 1, scale,sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_dlantb = value + return + end function stdlib_dlantb + + !> DLANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + + real(dp) function stdlib_dlantp( norm, uplo, diag, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: ap(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, k + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = 1 + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 2 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + k = 1 + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = k, k + j - 2 + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + j - 1 + sum = sum + abs( ap( i ) ) + end do + end if + k = k + j + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = k + 1, k + n - j + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + n - j + sum = sum + abs( ap( i ) ) + end do + end if + k = k + n - j + 1 + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + do i = 1, j - 1 + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + k = k + 1 + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, j + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + k = k + 1 + do i = j + 1, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = j, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + end if + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 2, n + call stdlib_dlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_dlassq( j, ap( k ), 1, scale, sum ) + k = k + j + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 1, n - 1 + call stdlib_dlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_dlassq( n-j+1, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_dlantp = value + return + end function stdlib_dlantp + + !> DLANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + + real(dp) function stdlib_dlantr( norm, uplo, diag, m, n, a, lda,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j-1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j + 1, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( ( udiag ) .and. ( j<=m ) ) then + sum = one + do i = 1, j - 1 + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = 1, min( m, j ) + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = j + 1, m + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = j, m + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, m + work( i ) = one + end do + do j = 1, n + do i = 1, min( m, j-1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = 1, min( m, j ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, min( m, n ) + work( i ) = one + end do + do i = n + 1, m + work( i ) = zero + end do + do j = 1, n + do i = j + 1, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = j, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + end if + value = zero + do i = 1, m + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 2, n + call stdlib_dlassq( min( m, j-1 ), a( 1, j ), 1, scale, sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_dlassq( min( m, j ), a( 1, j ), 1, scale, sum ) + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 1, n + call stdlib_dlassq( m-j, a( min( m, j+1 ), j ), 1, scale,sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_dlassq( m-j+1, a( j, j ), 1, scale, sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_dlantr = value + return + end function stdlib_dlantr + + !> DLAORHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine DORHR_COL. In DORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine DLAORHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + + pure subroutine stdlib_dlaorhr_col_getrfnp( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_dlaorhr_col_getrfnp2( m, n, a, lda, d, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks. + call stdlib_dlaorhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo ) + + if( j+jb<=n ) then + ! compute block row of u. + call stdlib_dtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, & + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_dlaorhr_col_getrfnp + + !> DLAPY2: returns sqrt(x**2+y**2), taking care not to cause unnecessary + !> overflow and unnecessary underflow. + + pure real(dp) function stdlib_dlapy2( x, y ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: x, y + ! ===================================================================== + + + ! Local Scalars + real(dp) :: w, xabs, yabs, z, hugeval + logical(lk) :: x_is_nan, y_is_nan + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + x_is_nan = stdlib_disnan( x ) + y_is_nan = stdlib_disnan( y ) + if ( x_is_nan ) stdlib_dlapy2 = x + if ( y_is_nan ) stdlib_dlapy2 = y + hugeval = stdlib_dlamch( 'OVERFLOW' ) + if ( .not.( x_is_nan.or.y_is_nan ) ) then + xabs = abs( x ) + yabs = abs( y ) + w = max( xabs, yabs ) + z = min( xabs, yabs ) + if( z==zero .or. w>hugeval ) then + stdlib_dlapy2 = w + else + stdlib_dlapy2 = w*sqrt( one+( z / w )**2 ) + end if + end if + return + end function stdlib_dlapy2 + + !> Given a 3-by-3 matrix pencil (A,B), DLAQZ1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1). + !> It is assumed that either + !> 1) sr1 = sr2 + !> or + !> 2) si = 0. + !> This is useful for starting double implicit shift bulges + !> in the QZ algorithm. + + pure subroutine stdlib_dlaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) + ! arguments + integer(ilp), intent( in ) :: lda, ldb + real(dp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1,sr2, si, beta1, beta2 + real(dp), intent( out ) :: v( * ) + + ! local scalars + real(dp) :: w(2), safmin, safmax, scale1, scale2 + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + ! calculate first shifted vector + w( 1 ) = beta1*a( 1, 1 )-sr1*b( 1, 1 ) + w( 2 ) = beta1*a( 2, 1 )-sr1*b( 2, 1 ) + scale1 = sqrt( abs( w( 1 ) ) ) * sqrt( abs( w( 2 ) ) ) + if( scale1 >= safmin .and. scale1 <= safmax ) then + w( 1 ) = w( 1 )/scale1 + w( 2 ) = w( 2 )/scale1 + end if + ! solve linear system + w( 2 ) = w( 2 )/b( 2, 2 ) + w( 1 ) = ( w( 1 )-b( 1, 2 )*w( 2 ) )/b( 1, 1 ) + scale2 = sqrt( abs( w( 1 ) ) ) * sqrt( abs( w( 2 ) ) ) + if( scale2 >= safmin .and. scale2 <= safmax ) then + w( 1 ) = w( 1 )/scale2 + w( 2 ) = w( 2 )/scale2 + end if + ! apply second shift + v( 1 ) = beta2*( a( 1, 1 )*w( 1 )+a( 1, 2 )*w( 2 ) )-sr2*( b( 1,1 )*w( 1 )+b( 1, 2 )*w(& + 2 ) ) + v( 2 ) = beta2*( a( 2, 1 )*w( 1 )+a( 2, 2 )*w( 2 ) )-sr2*( b( 2,1 )*w( 1 )+b( 2, 2 )*w(& + 2 ) ) + v( 3 ) = beta2*( a( 3, 1 )*w( 1 )+a( 3, 2 )*w( 2 ) )-sr2*( b( 3,1 )*w( 1 )+b( 3, 2 )*w(& + 2 ) ) + ! account for imaginary part + v( 1 ) = v( 1 )+si*si*b( 1, 1 )/scale1/scale2 + ! check for overflow + if( abs( v( 1 ) )>safmax .or. abs( v( 2 ) ) > safmax .or.abs( v( 3 ) )>safmax .or. & + stdlib_disnan( v( 1 ) ) .or.stdlib_disnan( v( 2 ) ) .or. stdlib_disnan( v( 3 ) ) ) & + then + v( 1 ) = zero + v( 2 ) = zero + v( 3 ) = zero + end if + end subroutine stdlib_dlaqz1 + + !> DLAQZ2: chases a 2x2 shift bulge in a matrix pencil down a single position + + pure subroutine stdlib_dlaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & + q, ldq, nz, zstart, z, ldz ) + ! arguments + logical(lk), intent( in ) :: ilq, ilz + integer(ilp), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & + zstart, ihi + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + + ! local variables + real(dp) :: h(2,3), c1, s1, c2, s2, temp + if( k+2 == ihi ) then + ! shift is located on the edge of the matrix, remove it + h = b( ihi-1:ihi, ihi-2:ihi ) + ! make h upper triangular + call stdlib_dlartg( h( 1, 1 ), h( 2, 1 ), c1, s1, temp ) + h( 2, 1 ) = zero + h( 1, 1 ) = temp + call stdlib_drot( 2, h( 1, 2 ), 2, h( 2, 2 ), 2, c1, s1 ) + call stdlib_dlartg( h( 2, 3 ), h( 2, 2 ), c1, s1, temp ) + call stdlib_drot( 1, h( 1, 3 ), 1, h( 1, 2 ), 1, c1, s1 ) + call stdlib_dlartg( h( 1, 2 ), h( 1, 1 ), c2, s2, temp ) + call stdlib_drot( ihi-istartm+1, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c1, & + s1 ) + call stdlib_drot( ihi-istartm+1, b( istartm, ihi-1 ), 1, b( istartm,ihi-2 ), 1, c2, & + s2 ) + b( ihi-1, ihi-2 ) = zero + b( ihi, ihi-2 ) = zero + call stdlib_drot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c1, & + s1 ) + call stdlib_drot( ihi-istartm+1, a( istartm, ihi-1 ), 1, a( istartm,ihi-2 ), 1, c2, & + s2 ) + if ( ilz ) then + call stdlib_drot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c1, s1 & + ) + call stdlib_drot( nz, z( 1, ihi-1-zstart+1 ), 1, z( 1,ihi-2-zstart+1 ), 1, c2, & + s2 ) + end if + call stdlib_dlartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp ) + a( ihi-1, ihi-2 ) = temp + a( ihi, ihi-2 ) = zero + call stdlib_drot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 & + ) + call stdlib_drot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 & + ) + if ( ilq ) then + call stdlib_drot( nq, q( 1, ihi-1-qstart+1 ), 1, q( 1, ihi-qstart+1 ), 1, c1, s1 & + ) + end if + call stdlib_dlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp ) + b( ihi, ihi ) = temp + b( ihi, ihi-1 ) = zero + call stdlib_drot( ihi-istartm, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c1, s1 ) + + call stdlib_drot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c1, & + s1 ) + if ( ilz ) then + call stdlib_drot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c1, s1 & + ) + end if + else + ! normal operation, move bulge down + h = b( k+1:k+2, k:k+2 ) + ! make h upper triangular + call stdlib_dlartg( h( 1, 1 ), h( 2, 1 ), c1, s1, temp ) + h( 2, 1 ) = zero + h( 1, 1 ) = temp + call stdlib_drot( 2, h( 1, 2 ), 2, h( 2, 2 ), 2, c1, s1 ) + ! calculate z1 and z2 + call stdlib_dlartg( h( 2, 3 ), h( 2, 2 ), c1, s1, temp ) + call stdlib_drot( 1, h( 1, 3 ), 1, h( 1, 2 ), 1, c1, s1 ) + call stdlib_dlartg( h( 1, 2 ), h( 1, 1 ), c2, s2, temp ) + ! apply transformations from the right + call stdlib_drot( k+3-istartm+1, a( istartm, k+2 ), 1, a( istartm,k+1 ), 1, c1, s1 ) + + call stdlib_drot( k+3-istartm+1, a( istartm, k+1 ), 1, a( istartm,k ), 1, c2, s2 ) + + call stdlib_drot( k+2-istartm+1, b( istartm, k+2 ), 1, b( istartm,k+1 ), 1, c1, s1 ) + + call stdlib_drot( k+2-istartm+1, b( istartm, k+1 ), 1, b( istartm,k ), 1, c2, s2 ) + + if ( ilz ) then + call stdlib_drot( nz, z( 1, k+2-zstart+1 ), 1, z( 1, k+1-zstart+1 ), 1, c1, s1 ) + + call stdlib_drot( nz, z( 1, k+1-zstart+1 ), 1, z( 1, k-zstart+1 ),1, c2, s2 ) + + end if + b( k+1, k ) = zero + b( k+2, k ) = zero + ! calculate q1 and q2 + call stdlib_dlartg( a( k+2, k ), a( k+3, k ), c1, s1, temp ) + a( k+2, k ) = temp + a( k+3, k ) = zero + call stdlib_dlartg( a( k+1, k ), a( k+2, k ), c2, s2, temp ) + a( k+1, k ) = temp + a( k+2, k ) = zero + ! apply transformations from the left + call stdlib_drot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 ) + call stdlib_drot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 ) + call stdlib_drot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 ) + call stdlib_drot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 ) + if ( ilq ) then + call stdlib_drot( nq, q( 1, k+2-qstart+1 ), 1, q( 1, k+3-qstart+1 ), 1, c1, s1 ) + + call stdlib_drot( nq, q( 1, k+1-qstart+1 ), 1, q( 1, k+2-qstart+1 ), 1, c2, s2 ) + + end if + end if + end subroutine stdlib_dlaqz2 + + !> DLAQZ4: Executes a single multishift QZ sweep + + pure subroutine stdlib_dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr, & + si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) + ! function arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & + nblock_desired, ldqc, ldzc + real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), qc( & + ldqc, * ),zc( ldzc, * ), work( * ), sr( * ), si( * ),ss( * ) + integer(ilp), intent( out ) :: info + + ! local scalars + integer(ilp) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & + ishift, nblock, npos + real(dp) :: temp, v(3), c1, s1, c2, s2, swap + info = 0 + if ( nblock_desired < nshifts+1 ) then + info = -8 + end if + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = n*nblock_desired + return + else if ( lwork < n*nblock_desired ) then + info = -25 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAQZ4', -info ) + return + end if + ! executable statements + if ( nshifts < 2 ) then + return + end if + if ( ilo >= ihi ) then + return + end if + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + ! shuffle shifts into pairs of real shifts and pairs + ! of complex conjugate shifts assuming complex + ! conjugate shifts are already adjacent to one + ! another + do i = 1, nshifts-2, 2 + if( si( i )/=-si( i+1 ) ) then + swap = sr( i ) + sr( i ) = sr( i+1 ) + sr( i+1 ) = sr( i+2 ) + sr( i+2 ) = swap + swap = si( i ) + si( i ) = si( i+1 ) + si( i+1 ) = si( i+2 ) + si( i+2 ) = swap + swap = ss( i ) + ss( i ) = ss( i+1 ) + ss( i+1 ) = ss( i+2 ) + ss( i+2 ) = swap + end if + end do + ! nshfts is supposed to be even, but if it is odd, + ! then simply reduce it by one. the shuffle above + ! ensures that the dropped shift is real and that + ! the remaining shifts are paired. + ns = nshifts-mod( nshifts, 2 ) + npos = max( nblock_desired-ns, 1 ) + ! the following block introduces the shifts and chases + ! them down one by one just enough to make space for + ! the other shifts. the near-the-diagonal block is + ! of size (ns+1) x ns. + call stdlib_dlaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc ) + call stdlib_dlaset( 'FULL', ns, ns, zero, one, zc, ldzc ) + do i = 1, ns, 2 + ! introduce the shift + call stdlib_dlaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( & + i ), ss( i ), ss( i+1 ), v ) + temp = v( 2 ) + call stdlib_dlartg( temp, v( 3 ), c1, s1, v( 2 ) ) + call stdlib_dlartg( v( 1 ), v( 2 ), c2, s2, temp ) + call stdlib_drot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 ) + call stdlib_drot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 ) + call stdlib_drot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 ) + call stdlib_drot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 ) + call stdlib_drot( ns+1, qc( 1, 2 ), 1, qc( 1, 3 ), 1, c1, s1 ) + call stdlib_drot( ns+1, qc( 1, 1 ), 1, qc( 1, 2 ), 1, c2, s2 ) + ! chase the shift down + do j = 1, ns-1-i + call stdlib_dlaqz2( .true., .true., j, 1, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & + ilo, ilo ), ldb, ns+1, 1, qc,ldqc, ns, 1, zc, ldzc ) + end do + end do + ! update the rest of the pencil + ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) + ! from the left with qc(1:ns+1,1:ns+1)' + sheight = ns+1 + swidth = istopm-( ilo+ns )+1 + if ( swidth > 0 ) then + call stdlib_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns & + ), lda, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) + + call stdlib_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns & + ), ldb, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) + + end if + if ( ilq ) then + call stdlib_dgemm( 'N', 'N', n, sheight, sheight, one, q( 1, ilo ),ldq, qc, ldqc, & + zero, work, n ) + call stdlib_dlacpy( 'ALL', n, sheight, work, n, q( 1, ilo ), ldq ) + end if + ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) + ! from the right with zc(1:ns,1:ns) + sheight = ilo-1-istartm+1 + swidth = ns + if ( sheight > 0 ) then + call stdlib_dgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, & + zc, ldzc, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) + + call stdlib_dgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, & + zc, ldzc, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) + + end if + if ( ilz ) then + call stdlib_dgemm( 'N', 'N', n, swidth, swidth, one, z( 1, ilo ), ldz,zc, ldzc, & + zero, work, n ) + call stdlib_dlacpy( 'ALL', n, swidth, work, n, z( 1, ilo ), ldz ) + end if + ! the following block chases the shifts down to the bottom + ! right block. if possible, a shift is moved down npos + ! positions at a time + k = ilo + do while ( k < ihi-ns ) + np = min( ihi-ns-k, npos ) + ! size of the near-the-diagonal block + nblock = ns+np + ! istartb points to the first row we will be updating + istartb = k+1 + ! istopb points to the last column we will be updating + istopb = k+nblock-1 + call stdlib_dlaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc ) + call stdlib_dlaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc ) + ! near the diagonal shift chase + do i = ns-1, 0, -2 + do j = 0, np-1 + ! move down the block with index k+i+j-1, updating + ! the (ns+np x ns+np) block: + ! (k:k+ns+np,k:k+ns+np-1) + call stdlib_dlaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, & + ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(k+1:k+ns+np, k+ns+np:istopm) and + ! b(k+1:k+ns+np, k+ns+np:istopm) + ! from the left with qc(1:ns+np,1:ns+np)' + sheight = ns+np + swidth = istopm-( k+ns+np )+1 + if ( swidth > 0 ) then + call stdlib_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+& + ns+np ), lda, zero, work,sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & + ) + call stdlib_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+& + ns+np ), ldb, zero, work,sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & + ) + end if + if ( ilq ) then + call stdlib_dgemm( 'N', 'N', n, nblock, nblock, one, q( 1, k+1 ),ldq, qc, ldqc, & + zero, work, n ) + call stdlib_dlacpy( 'ALL', n, nblock, work, n, q( 1, k+1 ), ldq ) + end if + ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) + ! from the right with zc(1:ns+np,1:ns+np) + sheight = k-istartm+1 + swidth = nblock + if ( sheight > 0 ) then + call stdlib_dgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, & + zc, ldzc, zero, work,sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) + + call stdlib_dgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, & + zc, ldzc, zero, work,sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) + + end if + if ( ilz ) then + call stdlib_dgemm( 'N', 'N', n, nblock, nblock, one, z( 1, k ),ldz, zc, ldzc, & + zero, work, n ) + call stdlib_dlacpy( 'ALL', n, nblock, work, n, z( 1, k ), ldz ) + end if + k = k+np + end do + ! the following block removes the shifts from the bottom right corner + ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). + call stdlib_dlaset( 'FULL', ns, ns, zero, one, qc, ldqc ) + call stdlib_dlaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc ) + ! istartb points to the first row we will be updating + istartb = ihi-ns+1 + ! istopb points to the last column we will be updating + istopb = ihi + do i = 1, ns, 2 + ! chase the shift down to the bottom right corner + do ishift = ihi-i-1, ihi-2 + call stdlib_dlaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & + ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(ihi-ns+1:ihi, ihi+1:istopm) + ! from the left with qc(1:ns,1:ns)' + sheight = ns + swidth = istopm-( ihi+1 )+1 + if ( swidth > 0 ) then + call stdlib_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, & + ihi+1 ), lda, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & + ) + call stdlib_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, & + ihi+1 ), ldb, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & + ) + end if + if ( ilq ) then + call stdlib_dgemm( 'N', 'N', n, ns, ns, one, q( 1, ihi-ns+1 ), ldq,qc, ldqc, zero, & + work, n ) + call stdlib_dlacpy( 'ALL', n, ns, work, n, q( 1, ihi-ns+1 ), ldq ) + end if + ! update a(istartm:ihi-ns,ihi-ns:ihi) + ! from the right with zc(1:ns+1,1:ns+1) + sheight = ihi-ns-istartm+1 + swidth = ns+1 + if ( sheight > 0 ) then + call stdlib_dgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,& + zc, ldzc, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & + ) + call stdlib_dgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,& + zc, ldzc, zero, work, sheight ) + call stdlib_dlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & + ) + end if + if ( ilz ) then + call stdlib_dgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1, ihi-ns ), ldz,zc, ldzc, zero,& + work, n ) + call stdlib_dlacpy( 'ALL', n, ns+1, work, n, z( 1, ihi-ns ), ldz ) + end if + end subroutine stdlib_dlaqz4 + + !> DLAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + + pure subroutine stdlib_dlar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc, & + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1, bn, n + integer(ilp), intent(out) :: negcnt + integer(ilp), intent(inout) :: r + real(dp), intent(in) :: gaptol, lambda, pivmin + real(dp), intent(out) :: mingma, nrminv, resid, rqcorr, ztz + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*) + real(dp), intent(in) :: d(*), l(*), ld(*), lld(*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: sawnan1, sawnan2 + integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2 + real(dp) :: dminus, dplus, eps, s, tmp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + eps = stdlib_dlamch( 'PRECISION' ) + if( r==0 ) then + r1 = b1 + r2 = bn + else + r1 = r + r2 = r + end if + ! storage for lplus + indlpl = 0 + ! storage for uminus + indumn = n + inds = 2*n + 1 + indp = 3*n + 1 + if( b1==1 ) then + work( inds ) = zero + else + work( inds+b1-1 ) = lld( b1-1 ) + end if + ! compute the stationary transform (using the differential form) + ! until the index r2. + sawnan1 = .false. + neg1 = 0 + s = work( inds+b1-1 ) - lambda + do i = b1, r1 - 1 + dplus = d( i ) + s + work( indlpl+i ) = ld( i ) / dplus + if(dplus DLARFG: generates a real elementary reflector H of order n, such + !> that + !> H * ( alpha ) = ( beta ), H**T * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, and x is an (n-1)-element real + !> vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**T ) , + !> ( v ) + !> where tau is a real scalar and v is a real (n-1)-element + !> vector. + !> If the elements of x are all zero, then tau = 0 and H is taken to be + !> the unit matrix. + !> Otherwise 1 <= tau <= 2. + + pure subroutine stdlib_dlarfg( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(dp), intent(inout) :: alpha + real(dp), intent(out) :: tau + ! Array Arguments + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(dp) :: beta, rsafmn, safmin, xnorm + ! Intrinsic Functions + intrinsic :: abs,sign + ! Executable Statements + if( n<=1 ) then + tau = zero + return + end if + xnorm = stdlib_dnrm2( n-1, x, incx ) + if( xnorm==zero ) then + ! h = i + tau = zero + else + ! general case + beta = -sign( stdlib_dlapy2( alpha, xnorm ), alpha ) + safmin = stdlib_dlamch( 'S' ) / stdlib_dlamch( 'E' ) + knt = 0 + if( abs( beta ) DLARFGP: generates a real elementary reflector H of order n, such + !> that + !> H * ( alpha ) = ( beta ), H**T * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is non-negative, and x is + !> an (n-1)-element real vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**T ) , + !> ( v ) + !> where tau is a real scalar and v is a real (n-1)-element + !> vector. + !> If the elements of x are all zero, then tau = 0 and H is taken to be + !> the unit matrix. + + subroutine stdlib_dlarfgp( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(dp), intent(inout) :: alpha + real(dp), intent(out) :: tau + ! Array Arguments + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(dp) :: beta, bignum, savealpha, smlnum, xnorm + ! Intrinsic Functions + intrinsic :: abs,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_dnrm2( n-1, x, incx ) + if( xnorm==zero ) then + ! h = [+/-1, 0; i], sign chosen so alpha >= 0 + if( alpha>=zero ) then + ! when tau.eq.zero, the vector is special-cased to be + ! all zeros in the application routines. we do not need + ! to clear it. + tau = zero + else + ! however, the application routines rely on explicit + ! zero checks when tau.ne.zero, and we must clear x. + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = 0 + end do + alpha = -alpha + end if + else + ! general case + beta = sign( stdlib_dlapy2( alpha, xnorm ), alpha ) + smlnum = stdlib_dlamch( 'S' ) / stdlib_dlamch( 'E' ) + knt = 0 + if( abs( beta )=zero ) then + tau = zero + else + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = 0 + end do + beta = -savealpha + end if + else + ! this is the general case. + call stdlib_dscal( n-1, one / alpha, x, incx ) + end if + ! if beta is subnormal, it may lose relative accuracy + do j = 1, knt + beta = beta*smlnum + end do + alpha = beta + end if + return + end subroutine stdlib_dlarfgp + + !> DLARNV: returns a vector of n random real numbers from a uniform or + !> normal distribution. + + pure subroutine stdlib_dlarnv( idist, iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: idist, n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + real(dp), intent(out) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + real(dp), parameter :: twopi = 6.28318530717958647692528676655900576839e+0_dp + + + + ! Local Scalars + integer(ilp) :: i, il, il2, iv + ! Local Arrays + real(dp) :: u(lv) + ! Intrinsic Functions + intrinsic :: cos,log,min,sqrt + ! Executable Statements + do 40 iv = 1, n, lv / 2 + il = min( lv / 2, n-iv+1 ) + if( idist==3 ) then + il2 = 2*il + else + il2 = il + end if + ! call stdlib_dlaruv to generate il2 numbers from a uniform (0,1) + ! distribution (il2 <= lv) + call stdlib_dlaruv( iseed, il2, u ) + if( idist==1 ) then + ! copy generated numbers + do i = 1, il + x( iv+i-1 ) = u( i ) + end do + else if( idist==2 ) then + ! convert generated numbers to uniform (-1,1) distribution + do i = 1, il + x( iv+i-1 ) = two*u( i ) - one + end do + else if( idist==3 ) then + ! convert generated numbers to normal (0,1) distribution + do i = 1, il + x( iv+i-1 ) = sqrt( -two*log( u( 2*i-1 ) ) )*cos( twopi*u( 2*i ) ) + end do + end if + 40 continue + return + end subroutine stdlib_dlarnv + + !> Given the relatively robust representation(RRR) L D L^T, DLARRB: + !> does "limited" bisection to refine the eigenvalues of L D L^T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses and their gaps are input in WERR + !> and WGAP, respectively. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + + pure subroutine stdlib_dlarrb( n, d, lld, ifirst, ilast, rtol1,rtol2, offset, w, wgap, werr, & + work, iwork,pivmin, spdiam, twist, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ifirst, ilast, n, offset, twist + integer(ilp), intent(out) :: info + real(dp), intent(in) :: pivmin, rtol1, rtol2, spdiam + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: d(*), lld(*) + real(dp), intent(inout) :: w(*), werr(*), wgap(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + integer(ilp) :: maxitr + ! Local Scalars + integer(ilp) :: i, i1, ii, ip, iter, k, negcnt, next, nint, olnint, prev, r + real(dp) :: back, cvrgd, gap, left, lgap, mid, mnwdth, rgap, right, tmp, width + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + maxitr = int( ( log( spdiam+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + mnwdth = two * pivmin + r = twist + if((r<1).or.(r>n)) r = n + ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ]. + ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while + ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 ) + ! for an unconverged interval is set to the index of the next unconverged + ! interval, and is -1 or 0 for a converged interval. thus a linked + ! list of unconverged intervals is set up. + i1 = ifirst + ! the number of unconverged intervals + nint = 0 + ! the last unconverged interval found + prev = 0 + rgap = wgap( i1-offset ) + loop_75: do i = i1, ilast + k = 2*i + ii = i - offset + left = w( ii ) - werr( ii ) + right = w( ii ) + werr( ii ) + lgap = rgap + rgap = wgap( ii ) + gap = min( lgap, rgap ) + ! make sure that [left,right] contains the desired eigenvalue + ! compute negcount from dstqds facto l+d+l+^t = l d l^t - left + ! do while( negcnt(left)>i-1 ) + back = werr( ii ) + 20 continue + negcnt = stdlib_dlaneg( n, d, lld, left, pivmin, r ) + if( negcnt>i-1 ) then + left = left - back + back = two*back + go to 20 + end if + ! do while( negcnt(right)=i1).and.(i<=ilast)) iwork( 2*prev-1 ) = i + 1 + else + ! unconverged interval found + prev = i + nint = nint + 1 + iwork( k-1 ) = i + 1 + iwork( k ) = negcnt + end if + work( k-1 ) = left + work( k ) = right + end do loop_75 + ! do while( nint>0 ), i.e. there are still unconverged intervals + ! and while (iter1) lgap = wgap( ii-1 ) + gap = min( lgap, rgap ) + next = iwork( k-1 ) + left = work( k-1 ) + right = work( k ) + mid = half*( left + right ) + ! semiwidth of interval + width = right - mid + tmp = max( abs( left ), abs( right ) ) + cvrgd = max(rtol1*gap,rtol2*tmp) + if( ( width<=cvrgd ) .or. ( width<=mnwdth ).or.( iter==maxitr ) )then + ! reduce number of unconverged intervals + nint = nint - 1 + ! mark interval as converged. + iwork( k-1 ) = 0 + if( i1==i ) then + i1 = next + else + ! prev holds the last unconverged interval previously examined + if(prev>=i1) iwork( 2*prev-1 ) = next + end if + i = next + cycle loop_100 + end if + prev = i + ! perform one bisection step + negcnt = stdlib_dlaneg( n, d, lld, mid, pivmin, r ) + if( negcnt<=i-1 ) then + work( k-1 ) = mid + else + work( k ) = mid + end if + i = next + end do loop_100 + iter = iter + 1 + ! do another loop if there are still unconverged intervals + ! however, in the last iteration, all intervals are accepted + ! since this is the best we can do. + if( ( nint>0 ).and.(iter<=maxitr) ) go to 80 + ! at this point, all the intervals have converged + do i = ifirst, ilast + k = 2*i + ii = i - offset + ! all intervals marked by '0' have been refined. + if( iwork( k-1 )==0 ) then + w( ii ) = half*( work( k-1 )+work( k ) ) + werr( ii ) = work( k ) - w( ii ) + end if + end do + do i = ifirst+1, ilast + k = 2*i + ii = i - offset + wgap( ii-1 ) = max( zero,w(ii) - werr (ii) - w( ii-1 ) - werr( ii-1 )) + end do + return + end subroutine stdlib_dlarrb + + !> Given the initial representation L D L^T and its cluster of close + !> eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... + !> W( CLEND ), DLARRF: finds a new relatively robust representation + !> L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the + !> eigenvalues of L(+) D(+) L(+)^T is relatively isolated. + + pure subroutine stdlib_dlarrf( n, d, l, ld, clstrt, clend,w, wgap, werr,spdiam, clgapl, & + clgapr, pivmin, sigma,dplus, lplus, work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: clstrt, clend, n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: clgapl, clgapr, pivmin, spdiam + real(dp), intent(out) :: sigma + ! Array Arguments + real(dp), intent(in) :: d(*), l(*), ld(*), w(*), werr(*) + real(dp), intent(out) :: dplus(*), lplus(*), work(*) + real(dp), intent(inout) :: wgap(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: quart = 0.25_dp + real(dp), parameter :: maxgrowth1 = 8._dp + real(dp), parameter :: maxgrowth2 = 8._dp + integer(ilp), parameter :: ktrymax = 1 + integer(ilp), parameter :: sleft = 1 + integer(ilp), parameter :: sright = 2 + + ! Local Scalars + logical(lk) :: dorrr1, forcer, nofail, sawnan1, sawnan2, tryrrr1 + integer(ilp) :: i, indx, ktry, shift + real(dp) :: avgap, bestshift, clwdth, eps, fact, fail, fail2, growthbound, ldelta, & + ldmax, lsigma, max1, max2, mingap, oldp, prod, rdelta, rdmax, rrr1, rrr2, rsigma, s, & + smlgrowth, tmp, znm2 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + fact = real(2**ktrymax,KIND=dp) + eps = stdlib_dlamch( 'PRECISION' ) + shift = 0 + forcer = .false. + ! note that we cannot guarantee that for any of the shifts tried, + ! the factorization has a small or even moderate element growth. + ! there could be ritz values at both ends of the cluster and despite + ! backing off, there are examples where all factorizations tried + ! (in ieee mode, allowing zero pivots + ! element growth. + ! for this reason, we should use pivmin in this subroutine so that at + ! least the l d l^t factorization exists. it can be checked afterwards + ! whether the element growth caused bad residuals/orthogonality. + ! decide whether the code should accept the best among all + ! representations despite large element growth or signal info=1 + ! setting nofail to .false. for quick fix for bug 113 + nofail = .false. + ! compute the average gap length of the cluster + clwdth = abs(w(clend)-w(clstrt)) + werr(clend) + werr(clstrt) + avgap = clwdth / real(clend-clstrt,KIND=dp) + mingap = min(clgapl, clgapr) + ! initial values for shifts to both ends of cluster + lsigma = min(w( clstrt ),w( clend )) - werr( clstrt ) + rsigma = max(w( clstrt ),w( clend )) + werr( clend ) + ! use a small fudge to make sure that we really shift to the outside + lsigma = lsigma - abs(lsigma)* four * eps + rsigma = rsigma + abs(rsigma)* four * eps + ! compute upper bounds for how much to back off the initial shifts + ldmax = quart * mingap + two * pivmin + rdmax = quart * mingap + two * pivmin + ldelta = max(avgap,wgap( clstrt ))/fact + rdelta = max(avgap,wgap( clend-1 ))/fact + ! initialize the record of the best representation found + s = stdlib_dlamch( 'S' ) + smlgrowth = one / s + fail = real(n-1,KIND=dp)*mingap/(spdiam*eps) + fail2 = real(n-1,KIND=dp)*mingap/(spdiam*sqrt(eps)) + bestshift = lsigma + ! while (ktry <= ktrymax) + ktry = 0 + growthbound = maxgrowth1*spdiam + 5 continue + sawnan1 = .false. + sawnan2 = .false. + ! ensure that we do not back off too much of the initial shifts + ldelta = min(ldmax,ldelta) + rdelta = min(rdmax,rdelta) + ! compute the element growth when shifting to both ends of the cluster + ! accept the shift if there is no element growth at one of the two ends + ! left end + s = -lsigma + dplus( 1 ) = d( 1 ) + s + if(abs(dplus(1)) DLARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by DLARRE. + + pure subroutine stdlib_dlarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dol, dou, ldz, m, n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: minrgp, pivmin, vl, vu + real(dp), intent(inout) :: rtol1, rtol2 + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), indexw(*), isplit(*) + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), l(*), w(*), werr(*), wgap(*) + real(dp), intent(in) :: gers(*) + real(dp), intent(out) :: work(*) + real(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 10 + + + ! Local Scalars + logical(lk) :: eskip, needbs, stp2ii, tryrqc, usedbs, usedrq + integer(ilp) :: done, i, ibegin, idone, iend, ii, iindc1, iindc2, iindr, iindwk, iinfo,& + im, in, indeig, indld, indlld, indwrk, isupmn, isupmx, iter, itmp1, j, jblk, k, & + miniwsize, minwsize, nclus, ndepth, negcnt, newcls, newfst, newftt, newlst, newsiz, & + offset, oldcls, oldfst, oldien, oldlst, oldncl, p, parity, q, wbegin, wend, windex, & + windmn, windpl, zfrom, zto, zusedl, zusedu, zusedw + real(dp) :: bstres, bstw, eps, fudge, gap, gaptol, gl, gu, lambda, left, lgap, mingma, & + nrminv, resid, rgap, right, rqcorr, rqtol, savgap, sgndef, sigma, spdiam, ssigma, tau, & + tmp, tol, ztz + ! Intrinsic Functions + intrinsic :: abs,real,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( (n<=0).or.(m<=0) ) then + return + end if + ! the first n entries of work are reserved for the eigenvalues + indld = n+1 + indlld= 2*n+1 + indwrk= 3*n+1 + minwsize = 12 * n + do i= 1,minwsize + work( i ) = zero + end do + ! iwork(iindr+1:iindr+n) hold the twist indices r for the + ! factorization used to compute the fp vector + iindr = 0 + ! iwork(iindc1+1:iinc2+n) are used to store the clusters of the current + ! layer and the one above. + iindc1 = n + iindc2 = 2*n + iindwk = 3*n + 1 + miniwsize = 7 * n + do i= 1,miniwsize + iwork( i ) = 0 + end do + zusedl = 1 + if(dol>1) then + ! set lower bound for use of z + zusedl = dol-1 + endif + zusedu = m + if(doudou) ) then + ibegin = iend + 1 + wbegin = wend + 1 + cycle loop_170 + end if + ! find local spectral diameter of the block + gl = gers( 2*ibegin-1 ) + gu = gers( 2*ibegin ) + do i = ibegin+1 , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + ! oldien is the last index of the previous block + oldien = ibegin - 1 + ! calculate the size of the current block + in = iend - ibegin + 1 + ! the number of eigenvalues in the current block + im = wend - wbegin + 1 + ! this is for a 1x1 block + if( ibegin==iend ) then + done = done+1 + z( ibegin, wbegin ) = one + isuppz( 2*wbegin-1 ) = ibegin + isuppz( 2*wbegin ) = ibegin + w( wbegin ) = w( wbegin ) + sigma + work( wbegin ) = w( wbegin ) + ibegin = iend + 1 + wbegin = wbegin + 1 + cycle loop_170 + end if + ! the desired (shifted) eigenvalues are stored in w(wbegin:wend) + ! note that these can be approximations, in this case, the corresp. + ! entries of werr give the size of the uncertainty interval. + ! the eigenvalue approximations will be refined when necessary as + ! high relative accuracy is required for the computation of the + ! corresponding eigenvectors. + call stdlib_dcopy( im, w( wbegin ), 1,work( wbegin ), 1 ) + ! we store in w the eigenvalue approximations w.r.t. the original + ! matrix t. + do i=1,im + w(wbegin+i-1) = w(wbegin+i-1)+sigma + end do + ! ndepth is the current depth of the representation tree + ndepth = 0 + ! parity is either 1 or 0 + parity = 1 + ! nclus is the number of clusters for the next level of the + ! representation tree, we start with nclus = 1 for the root + nclus = 1 + iwork( iindc1+1 ) = 1 + iwork( iindc1+2 ) = im + ! idone is the number of eigenvectors already computed in the current + ! block + idone = 0 + ! loop while( idonem ) then + info = -2 + return + endif + ! breadth first processing of the current level of the representation + ! tree: oldncl = number of clusters on current level + oldncl = nclus + ! reset nclus to count the number of child clusters + nclus = 0 + parity = 1 - parity + if( parity==0 ) then + oldcls = iindc1 + newcls = iindc2 + else + oldcls = iindc2 + newcls = iindc1 + end if + ! process the clusters on the current level + loop_150: do i = 1, oldncl + j = oldcls + 2*i + ! oldfst, oldlst = first, last index of current cluster. + ! cluster indices start with 1 and are relative + ! to wbegin when accessing w, wgap, werr, z + oldfst = iwork( j-1 ) + oldlst = iwork( j ) + if( ndepth>0 ) then + ! retrieve relatively robust representation (rrr) of cluster + ! that has been computed at the previous level + ! the rrr is stored in z and overwritten once the eigenvectors + ! have been computed or when the cluster is refined + if((dol==1).and.(dou==m)) then + ! get representation from location of the leftmost evalue + ! of the cluster + j = wbegin + oldfst - 1 + else + if(wbegin+oldfst-1dou) then + ! get representation from the right end of z array + j = dou + else + j = wbegin + oldfst - 1 + endif + endif + call stdlib_dcopy( in, z( ibegin, j ), 1, d( ibegin ), 1 ) + call stdlib_dcopy( in-1, z( ibegin, j+1 ), 1, l( ibegin ),1 ) + sigma = z( iend, j+1 ) + ! set the corresponding entries in z to zero + call stdlib_dlaset( 'FULL', in, 2, zero, zero,z( ibegin, j), ldz ) + end if + ! compute dl and dll of current rrr + do j = ibegin, iend-1 + tmp = d( j )*l( j ) + work( indld-1+j ) = tmp + work( indlld-1+j ) = tmp*l( j ) + end do + if( ndepth>0 ) then + ! p and q are index of the first and last eigenvalue to compute + ! within the current block + p = indexw( wbegin-1+oldfst ) + q = indexw( wbegin-1+oldlst ) + ! offset for the arrays work, wgap and werr, i.e., the p-offset + ! through the q-offset elements of these arrays are to be used. + ! offset = p-oldfst + offset = indexw( wbegin ) - 1 + ! perform limited bisection (if necessary) to get approximate + ! eigenvalues to the precision needed. + call stdlib_dlarrb( in, d( ibegin ),work(indlld+ibegin-1),p, q, rtol1, & + rtol2, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ), iwork(& + iindwk ),pivmin, spdiam, in, iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + ! we also recompute the extremal gaps. w holds all eigenvalues + ! of the unshifted matrix and must be used for computation + ! of wgap, the entries of work might stem from rrrs with + ! different shifts. the gaps from wbegin-1+oldfst to + ! wbegin-1+oldlst are correctly computed in stdlib_dlarrb. + ! however, we only allow the gaps to become greater since + ! this is what should happen when we decrease werr + if( oldfst>1) then + wgap( wbegin+oldfst-2 ) =max(wgap(wbegin+oldfst-2),w(wbegin+oldfst-1)-& + werr(wbegin+oldfst-1)- w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) ) + + endif + if( wbegin + oldlst -1 < wend ) then + wgap( wbegin+oldlst-1 ) =max(wgap(wbegin+oldlst-1),w(wbegin+oldlst)-& + werr(wbegin+oldlst)- w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) ) + endif + ! each time the eigenvalues in work get refined, we store + ! the newly found approximation with all shifts applied in w + do j=oldfst,oldlst + w(wbegin+j-1) = work(wbegin+j-1)+sigma + end do + end if + ! process the current node. + newfst = oldfst + loop_140: do j = oldfst, oldlst + if( j==oldlst ) then + ! we are at the right end of the cluster, this is also the + ! boundary of the child cluster + newlst = j + else if ( wgap( wbegin + j -1)>=minrgp* abs( work(wbegin + j -1) ) ) & + then + ! the right relative gap is big enough, the child cluster + ! (newfst,..,newlst) is well separated from the following + newlst = j + else + ! inside a child cluster, the relative gap is not + ! big enough. + cycle loop_140 + end if + ! compute size of child cluster found + newsiz = newlst - newfst + 1 + ! newftt is the place in z where the new rrr or the computed + ! eigenvector is to be stored + if((dol==1).and.(dou==m)) then + ! store representation at location of the leftmost evalue + ! of the cluster + newftt = wbegin + newfst - 1 + else + if(wbegin+newfst-1dou) then + ! store representation at the right end of z array + newftt = dou + else + newftt = wbegin + newfst - 1 + endif + endif + if( newsiz>1) then + ! current child is not a singleton but a cluster. + ! compute and store new representation of child. + ! compute left and right cluster gap. + ! lgap and rgap are not computed from work because + ! the eigenvalue approximations may stem from rrrs + ! different shifts. however, w hold all eigenvalues + ! of the unshifted matrix. still, the entries in wgap + ! have to be computed from work since the entries + ! in w might be of the same order so that gaps are not + ! exhibited correctly for very close eigenvalues. + if( newfst==1 ) then + lgap = max( zero,w(wbegin)-werr(wbegin) - vl ) + else + lgap = wgap( wbegin+newfst-2 ) + endif + rgap = wgap( wbegin+newlst-1 ) + ! compute left- and rightmost eigenvalue of child + ! to high precision in order to shift as close + ! as possible and obtain as large relative gaps + ! as possible + do k =1,2 + if(k==1) then + p = indexw( wbegin-1+newfst ) + else + p = indexw( wbegin-1+newlst ) + endif + offset = indexw( wbegin ) - 1 + call stdlib_dlarrb( in, d(ibegin),work( indlld+ibegin-1 ),p,p,rqtol, & + rqtol, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ),& + iwork( iindwk ), pivmin, spdiam,in, iinfo ) + end do + if((wbegin+newlst-1dou)) then + ! if the cluster contains no desired eigenvalues + ! skip the computation of that branch of the rep. tree + ! we could skip before the refinement of the extremal + ! eigenvalues of the child, but then the representation + ! tree could be different from the one when nothing is + ! skipped. for this reason we skip at this place. + idone = idone + newlst - newfst + 1 + goto 139 + endif + ! compute rrr of child cluster. + ! note that the new rrr is stored in z + ! stdlib_dlarrf needs lwork = 2*n + call stdlib_dlarrf( in, d( ibegin ), l( ibegin ),work(indld+ibegin-1),& + newfst, newlst, work(wbegin),wgap(wbegin), werr(wbegin),spdiam, lgap, & + rgap, pivmin, tau,z(ibegin, newftt),z(ibegin, newftt+1),work( indwrk ), & + iinfo ) + if( iinfo==0 ) then + ! a new rrr for the cluster was found by stdlib_dlarrf + ! update shift and store it + ssigma = sigma + tau + z( iend, newftt+1 ) = ssigma + ! work() are the midpoints and werr() the semi-width + ! note that the entries in w are unchanged. + do k = newfst, newlst + fudge =three*eps*abs(work(wbegin+k-1)) + work( wbegin + k - 1 ) =work( wbegin + k - 1) - tau + fudge = fudge +four*eps*abs(work(wbegin+k-1)) + ! fudge errors + werr( wbegin + k - 1 ) =werr( wbegin + k - 1 ) + fudge + ! gaps are not fudged. provided that werr is small + ! when eigenvalues are close, a zero gap indicates + ! that a new representation is needed for resolving + ! the cluster. a fudge could lead to a wrong decision + ! of judging eigenvalues 'separated' which in + ! reality are not. this could have a negative impact + ! on the orthogonality of the computed eigenvectors. + end do + nclus = nclus + 1 + k = newcls + 2*nclus + iwork( k-1 ) = newfst + iwork( k ) = newlst + else + info = -2 + return + endif + else + ! compute eigenvector of singleton + iter = 0 + tol = four * log(real(in,KIND=dp)) * eps + k = newfst + windex = wbegin + k - 1 + windmn = max(windex - 1,1) + windpl = min(windex + 1,m) + lambda = work( windex ) + done = done + 1 + ! check if eigenvector computation is to be skipped + if((windexdou)) then + eskip = .true. + goto 125 + else + eskip = .false. + endif + left = work( windex ) - werr( windex ) + right = work( windex ) + werr( windex ) + indeig = indexw( windex ) + ! note that since we compute the eigenpairs for a child, + ! all eigenvalue approximations are w.r.t the same shift. + ! in this case, the entries in work should be used for + ! computing the gaps since they exhibit even very small + ! differences in the eigenvalues, as opposed to the + ! entries in w which might "look" the same. + if( k == 1) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vl, the formula + ! lgap = max( zero, (sigma - vl) + lambda ) + ! can lead to an overestimation of the left gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small left gap. + lgap = eps*max(abs(left),abs(right)) + else + lgap = wgap(windmn) + endif + if( k == im) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vu, the formula + ! can lead to an overestimation of the right gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small right gap. + rgap = eps*max(abs(left),abs(right)) + else + rgap = wgap(windex) + endif + gap = min( lgap, rgap ) + if(( k == 1).or.(k == im)) then + ! the eigenvector support can become wrong + ! because significant entries could be cut off due to a + ! large gaptol parameter in lar1v. prevent this. + gaptol = zero + else + gaptol = gap * eps + endif + isupmn = in + isupmx = 1 + ! update wgap so that it holds the minimum gap + ! to the left or the right. this is crucial in the + ! case where bisection is used to ensure that the + ! eigenvalue is refined up to the required precision. + ! the correct value is restored afterwards. + savgap = wgap(windex) + wgap(windex) = gap + ! we want to use the rayleigh quotient correction + ! as often as possible since it converges quadratically + ! when we are close enough to the desired eigenvalue. + ! however, the rayleigh quotient can have the wrong sign + ! and lead us away from the desired eigenvalue. in this + ! case, the best we can do is to use bisection. + usedbs = .false. + usedrq = .false. + ! bisection is initially turned off unless it is forced + needbs = .not.tryrqc + 120 continue + ! check if bisection should be used to refine eigenvalue + if(needbs) then + ! take the bisection as new iterate + usedbs = .true. + itmp1 = iwork( iindr+windex ) + offset = indexw( wbegin ) - 1 + call stdlib_dlarrb( in, d(ibegin),work(indlld+ibegin-1),indeig,& + indeig,zero, two*eps, offset,work(wbegin),wgap(wbegin),werr(wbegin),& + work( indwrk ),iwork( iindwk ), pivmin, spdiam,itmp1, iinfo ) + if( iinfo/=0 ) then + info = -3 + return + endif + lambda = work( windex ) + ! reset twist index from inaccurate lambda to + ! force computation of true mingma + iwork( iindr+windex ) = 0 + endif + ! given lambda, compute the eigenvector. + call stdlib_dlar1v( in, 1, in, lambda, d( ibegin ),l( ibegin ), work(& + indld+ibegin-1),work(indlld+ibegin-1),pivmin, gaptol, z( ibegin, windex & + ),.not.usedbs, negcnt, ztz, mingma,iwork( iindr+windex ), isuppz( & + 2*windex-1 ),nrminv, resid, rqcorr, work( indwrk ) ) + if(iter == 0) then + bstres = resid + bstw = lambda + elseif(residtol*gap .and. abs( rqcorr )>rqtol*abs( lambda ) .and. .not. & + usedbs)then + ! we need to check that the rqcorr update doesn't + ! move the eigenvalue away from the desired one and + ! towards a neighbor. -> protection with bisection + if(indeig<=negcnt) then + ! the wanted eigenvalue lies to the left + sgndef = -one + else + ! the wanted eigenvalue lies to the right + sgndef = one + endif + ! we only use the rqcorr if it improves the + ! the iterate reasonably. + if( ( rqcorr*sgndef>=zero ).and.( lambda + rqcorr<= right).and.( & + lambda + rqcorr>= left)) then + usedrq = .true. + ! store new midpoint of bisection interval in work + if(sgndef==one) then + ! the current lambda is on the left of the true + ! eigenvalue + left = lambda + ! we prefer to assume that the error estimate + ! is correct. we could make the interval not + ! as a bracket but to be modified if the rqcorr + ! chooses to. in this case, the right side should + ! be modified as follows: + ! right = max(right, lambda + rqcorr) + else + ! the current lambda is on the right of the true + ! eigenvalue + right = lambda + ! see comment about assuming the error estimate is + ! correct above. + ! left = min(left, lambda + rqcorr) + endif + work( windex ) =half * (right + left) + ! take rqcorr since it has the correct sign and + ! improves the iterate reasonably + lambda = lambda + rqcorr + ! update width of error interval + werr( windex ) =half * (right-left) + else + needbs = .true. + endif + if(right-leftzto) then + do ii = zto+1,isupmx + z( ii, windex ) = zero + end do + endif + call stdlib_dscal( zto-zfrom+1, nrminv,z( zfrom, windex ), 1 ) + 125 continue + ! update w + w( windex ) = lambda+sigma + ! recompute the gaps on the left and right + ! but only allow them to become larger and not + ! smaller (which can only happen through "bad" + ! cancellation and doesn't reflect the theory + ! where the initial gaps are underestimated due + ! to werr being too crude.) + if(.not.eskip) then + if( k>1) then + wgap( windmn ) = max( wgap(windmn),w(windex)-werr(windex)- w(& + windmn)-werr(windmn) ) + endif + if( windex DLASCL: multiplies the M by N real matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + + pure subroutine stdlib_dlascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, lda, m, n + real(dp), intent(in) :: cfrom, cto + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: i, itype, j, k1, k2, k3, k4 + real(dp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( stdlib_lsame( type, 'G' ) ) then + itype = 0 + else if( stdlib_lsame( type, 'L' ) ) then + itype = 1 + else if( stdlib_lsame( type, 'U' ) ) then + itype = 2 + else if( stdlib_lsame( type, 'H' ) ) then + itype = 3 + else if( stdlib_lsame( type, 'B' ) ) then + itype = 4 + else if( stdlib_lsame( type, 'Q' ) ) then + itype = 5 + else if( stdlib_lsame( type, 'Z' ) ) then + itype = 6 + else + itype = -1 + end if + if( itype==-1 ) then + info = -1 + else if( cfrom==zero .or. stdlib_disnan(cfrom) ) then + info = -4 + else if( stdlib_disnan(cto) ) then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 .or. ( itype==4 .and. n/=m ) .or.( itype==5 .and. n/=m ) ) then + info = -7 + else if( itype<=3 .and. lda=4 ) then + if( kl<0 .or. kl>max( m-1, 0 ) ) then + info = -2 + else if( ku<0 .or. ku>max( n-1, 0 ) .or.( ( itype==4 .or. itype==5 ) .and. kl/=ku ) & + )then + info = -3 + else if( ( itype==4 .and. ldaabs( ctoc ) .and. ctoc/=zero ) then + mul = smlnum + done = .false. + cfromc = cfrom1 + else if( abs( cto1 )>abs( cfromc ) ) then + mul = bignum + done = .false. + ctoc = cto1 + else + mul = ctoc / cfromc + done = .true. + end if + end if + if( itype==0 ) then + ! full matrix + do j = 1, n + do i = 1, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==1 ) then + ! lower triangular matrix + do j = 1, n + do i = j, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==2 ) then + ! upper triangular matrix + do j = 1, n + do i = 1, min( j, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==3 ) then + ! upper hessenberg matrix + do j = 1, n + do i = 1, min( j+1, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==4 ) then + ! lower half of a symmetric band matrix + k3 = kl + 1 + k4 = n + 1 + do j = 1, n + do i = 1, min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==5 ) then + ! upper half of a symmetric band matrix + k1 = ku + 2 + k3 = ku + 1 + do j = 1, n + do i = max( k1-j, 1 ), k3 + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==6 ) then + ! band matrix + k1 = kl + ku + 2 + k2 = kl + 1 + k3 = 2*kl + ku + 1 + k4 = kl + ku + 1 + m + do j = 1, n + do i = max( k1-j, k2 ), min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + end if + if( .not.done )go to 10 + return + end subroutine stdlib_dlascl + + !> This subroutine computes the square root of the I-th updated + !> eigenvalue of a positive symmetric rank-one modification to + !> a positive diagonal matrix whose entries are given as the squares + !> of the corresponding entries in the array d, and that + !> 0 <= D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) * diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + + pure subroutine stdlib_dlasd4( n, i, d, z, delta, rho, sigma, work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i, n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: rho + real(dp), intent(out) :: sigma + ! Array Arguments + real(dp), intent(in) :: d(*), z(*) + real(dp), intent(out) :: delta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 400 + + + ! Local Scalars + logical(lk) :: orgati, swtch, swtch3, geomavg + integer(ilp) :: ii, iim1, iip1, ip1, iter, j, niter + real(dp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, & + dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, & + temp, temp1, temp2, w + ! Local Arrays + real(dp) :: dd(3), zz(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! since this routine is called in an inner loop, we do no argument + ! checking. + ! quick return for n=1 and 2. + info = 0 + if( n==1 ) then + ! presumably, i=1 upon entry + sigma = sqrt( d( 1 )*d( 1 )+rho*z( 1 )*z( 1 ) ) + delta( 1 ) = one + work( 1 ) = one + return + end if + if( n==2 ) then + call stdlib_dlasd5( i, d, z, delta, rho, sigma, work ) + return + end if + ! compute machine epsilon + eps = stdlib_dlamch( 'EPSILON' ) + rhoinv = one / rho + tau2= zero + ! the case i = n + if( i==n ) then + ! initialize some basic variables + ii = n - 1 + niter = 1 + ! calculate initial guess + temp = rho / two + ! if ||z||_2 is not one, then temp should be set to + ! rho * ||z||_2^2 / two + temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) ) + do j = 1, n + work( j ) = d( j ) + d( n ) + temp1 + delta( j ) = ( d( j )-d( n ) ) - temp1 + end do + psi = zero + do j = 1, n - 2 + psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) ) + end do + c = rhoinv + psi + w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )& + *work( n ) ) + if( w<=zero ) then + temp1 = sqrt( d( n )*d( n )+rho ) + temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+& + temp1 ) ) ) +z( n )*z( n ) / rho + ! the following tau2 is to approximate + ! sigma_n^2 - d( n )*d( n ) + if( c<=temp ) then + tau = rho + else + delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) + a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) + b = z( n )*z( n )*delsq + if( a=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = eta - dtnsq + if( temp>rho )eta = rho + dtnsq + eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) + tau = tau + eta + sigma = sigma + eta + do j = 1, n + delta( j ) = delta( j ) - eta + work( j ) = work( j ) + eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, ii + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + tau2 = work( n )*delta( n ) + temp = z( n ) / tau2 + phi = z( n )*temp + dphi = temp*temp + erretm = eight*( -phi-psi ) + erretm - phi + rhoinv + ! $ + abs( tau2 )*( dpsi+dphi ) + w = rhoinv + phi + psi + ! main loop to update the values of the array delta + iter = niter + 1 + loop_90: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + go to 240 + end if + ! calculate the new step + dtnsq1 = work( n-1 )*delta( n-1 ) + dtnsq = work( n )*delta( n ) + c = w - dtnsq1*dpsi - dtnsq*dphi + a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi ) + b = dtnsq1*dtnsq*w + if( a>=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = eta - dtnsq + if( temp<=zero )eta = eta / two + eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) + tau = tau + eta + sigma = sigma + eta + do j = 1, n + delta( j ) = delta( j ) - eta + work( j ) = work( j ) + eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, ii + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + tau2 = work( n )*delta( n ) + temp = z( n ) / tau2 + phi = z( n )*temp + dphi = temp*temp + erretm = eight*( -phi-psi ) + erretm - phi + rhoinv + ! $ + abs( tau2 )*( dpsi+dphi ) + w = rhoinv + phi + psi + end do loop_90 + ! return with info = 1, niter = maxit and not converged + info = 1 + go to 240 + ! end for the case i = n + else + ! the case for i < n + niter = 1 + ip1 = i + 1 + ! calculate initial guess + delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) ) + delsq2 = delsq / two + sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two ) + temp = delsq2 / ( d( i )+sq2 ) + do j = 1, n + work( j ) = d( j ) + d( i ) + temp + delta( j ) = ( d( j )-d( i ) ) - temp + end do + psi = zero + do j = 1, i - 1 + psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) ) + end do + phi = zero + do j = n, i + 2, -1 + phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) ) + end do + c = rhoinv + psi + phi + w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )& + *delta( ip1 ) ) + geomavg = .false. + if( w>zero ) then + ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + ! we choose d(i) as origin. + orgati = .true. + ii = i + sglb = zero + sgub = delsq2 / ( d( i )+sq2 ) + a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 ) + b = z( i )*z( i )*delsq + if( a>zero ) then + tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + else + tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + end if + ! tau2 now is an estimation of sigma^2 - d( i )^2. the + ! following, however, is the corresponding estimation of + ! sigma - d( i ). + tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) ) + temp = sqrt(eps) + if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then + tau = min( ten*d(i), sgub ) + geomavg = .true. + end if + else + ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + ! we choose d(i+1) as origin. + orgati = .false. + ii = ip1 + sglb = -delsq2 / ( d( ii )+sq2 ) + sgub = zero + a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 ) + b = z( ip1 )*z( ip1 )*delsq + if( azero )swtch3 = .true. + end if + if( ii==1 .or. ii==n )swtch3 = .false. + temp = z( ii ) / ( work( ii )*delta( ii ) ) + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = w + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + ! test for convergence + if( abs( w )<=eps*erretm ) then + go to 240 + end if + if( w<=zero ) then + sglb = max( sglb, tau ) + else + sgub = min( sgub, tau ) + end if + ! calculate the new step + niter = niter + 1 + if( .not.swtch3 ) then + dtipsq = work( ip1 )*delta( ip1 ) + dtisq = work( i )*delta( i ) + if( orgati ) then + c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2 + else + c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2 + end if + a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw + b = dtipsq*dtisq*w + if( c==zero ) then + if( a==zero ) then + if( orgati ) then + a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi ) + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + dtiim = work( iim1 )*delta( iim1 ) + dtiip = work( iip1 )*delta( iip1 ) + temp = rhoinv + psi + phi + if( orgati ) then + temp1 = z( iim1 ) / dtiim + temp1 = temp1*temp1 + c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( & + iip1 ) )*temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + if( dpsi 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) + temp = tau + eta + if( temp>sgub .or. temp zero ) then + eta = sqrt(sgub*tau)-tau + end if + else + if( sglb > zero ) then + eta = sqrt(sglb*tau)-tau + end if + end if + end if + end if + prew = w + tau = tau + eta + sigma = sigma + eta + do j = 1, n + work( j ) = work( j ) + eta + delta( j ) = delta( j ) - eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, iim1 + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + dphi = zero + phi = zero + do j = n, iip1, -1 + temp = z( j ) / ( work( j )*delta( j ) ) + phi = phi + z( j )*temp + dphi = dphi + temp*temp + erretm = erretm + phi + end do + tau2 = work( ii )*delta( ii ) + temp = z( ii ) / tau2 + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = rhoinv + phi + psi + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + swtch = .false. + if( orgati ) then + if( -w>abs( prew ) / ten )swtch = .true. + else + if( w>abs( prew ) / ten )swtch = .true. + end if + ! main loop to update the values of the array delta and work + iter = niter + 1 + loop_230: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + ! $ .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then + go to 240 + end if + if( w<=zero ) then + sglb = max( sglb, tau ) + else + sgub = min( sgub, tau ) + end if + ! calculate the new step + if( .not.swtch3 ) then + dtipsq = work( ip1 )*delta( ip1 ) + dtisq = work( i )*delta( i ) + if( .not.swtch ) then + if( orgati ) then + c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2 + else + c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2 + end if + else + temp = z( ii ) / ( work( ii )*delta( ii ) ) + if( orgati ) then + dpsi = dpsi + temp*temp + else + dphi = dphi + temp*temp + end if + c = w - dtisq*dpsi - dtipsq*dphi + end if + a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw + b = dtipsq*dtisq*w + if( c==zero ) then + if( a==zero ) then + if( .not.swtch ) then + if( orgati ) then + a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) + end if + else + a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + dtiim = work( iim1 )*delta( iim1 ) + dtiip = work( iip1 )*delta( iip1 ) + temp = rhoinv + psi + phi + if( swtch ) then + c = temp - dtiim*dpsi - dtiip*dphi + zz( 1 ) = dtiim*dtiim*dpsi + zz( 3 ) = dtiip*dtiip*dphi + else + if( orgati ) then + temp1 = z( iim1 ) / dtiim + temp1 = temp1*temp1 + temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1 + c = temp - dtiip*( dpsi+dphi ) - temp2 + zz( 1 ) = z( iim1 )*z( iim1 ) + if( dpsi 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) + temp=tau+eta + if( temp>sgub .or. temp zero ) then + eta = sqrt(sgub*tau)-tau + end if + else + if( sglb > zero ) then + eta = sqrt(sglb*tau)-tau + end if + end if + end if + end if + prew = w + tau = tau + eta + sigma = sigma + eta + do j = 1, n + work( j ) = work( j ) + eta + delta( j ) = delta( j ) - eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, iim1 + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + dphi = zero + phi = zero + do j = n, iip1, -1 + temp = z( j ) / ( work( j )*delta( j ) ) + phi = phi + z( j )*temp + dphi = dphi + temp*temp + erretm = erretm + phi + end do + tau2 = work( ii )*delta( ii ) + temp = z( ii ) / tau2 + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = rhoinv + phi + psi + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch + end do loop_230 + ! return with info = 1, niter = maxit and not converged + info = 1 + end if + 240 continue + return + end subroutine stdlib_dlasd4 + + !> DLASD7: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. There + !> are two ways in which deflation can occur: when two or more singular + !> values are close together or if there is a tiny entry in the Z + !> vector. For each such occurrence the order of the related + !> secular equation problem is reduced by one. + !> DLASD7 is called from DLASD6. + + pure subroutine stdlib_dlasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & + beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: givptr, info, k + integer(ilp), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre + real(dp), intent(in) :: alpha, beta + real(dp), intent(out) :: c, s + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*) + integer(ilp), intent(inout) :: idxq(*) + real(dp), intent(inout) :: d(*), vf(*), vl(*) + real(dp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*) + + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 + real(dp) :: eps, hlftol, tau, tol, z1 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + m = n + sqre + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldgcoln )go to 90 + if( abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + idxp( k2 ) = j + else + ! check if singular values are close enough to allow deflation. + if( abs( d( j )-d( jprev ) )<=tol ) then + ! deflation is possible. + s = z( jprev ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_dlapy2( c, s ) + z( j ) = tau + z( jprev ) = zero + c = c / tau + s = -s / tau + ! record the appropriate givens rotation + if( icompq==1 ) then + givptr = givptr + 1 + idxjp = idxq( idx( jprev )+1 ) + idxj = idxq( idx( j )+1 ) + if( idxjp<=nlp1 ) then + idxjp = idxjp - 1 + end if + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + givcol( givptr, 2 ) = idxjp + givcol( givptr, 1 ) = idxj + givnum( givptr, 2 ) = c + givnum( givptr, 1 ) = s + end if + call stdlib_drot( 1, vf( jprev ), 1, vf( j ), 1, c, s ) + call stdlib_drot( 1, vl( jprev ), 1, vl( j ), 1, c, s ) + k2 = k2 - 1 + idxp( k2 ) = jprev + jprev = j + else + k = k + 1 + zw( k ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + jprev = j + end if + end if + go to 80 + 90 continue + ! record the last singular value. + k = k + 1 + zw( k ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + 100 continue + ! sort the singular values into dsigma. the singular values which + ! were not deflated go into the first k slots of dsigma, except + ! that dsigma(1) is treated separately. + do j = 2, n + jp = idxp( j ) + dsigma( j ) = d( jp ) + vfw( j ) = vf( jp ) + vlw( j ) = vl( jp ) + end do + if( icompq==1 ) then + do j = 2, n + jp = idxp( j ) + perm( j ) = idxq( idx( jp )+1 ) + if( perm( j )<=nlp1 ) then + perm( j ) = perm( j ) - 1 + end if + end do + end if + ! the deflated singular values go back into the last n - k slots of + ! d. + call stdlib_dcopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 ) + ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and + ! vl(m). + dsigma( 1 ) = zero + hlftol = tol / two + if( abs( dsigma( 2 ) )<=hlftol )dsigma( 2 ) = hlftol + if( m>n ) then + z( 1 ) = stdlib_dlapy2( z1, z( m ) ) + if( z( 1 )<=tol ) then + c = one + s = zero + z( 1 ) = tol + else + c = z1 / z( 1 ) + s = -z( m ) / z( 1 ) + end if + call stdlib_drot( 1, vf( m ), 1, vf( 1 ), 1, c, s ) + call stdlib_drot( 1, vl( m ), 1, vl( 1 ), 1, c, s ) + else + if( abs( z1 )<=tol ) then + z( 1 ) = tol + else + z( 1 ) = z1 + end if + end if + ! restore z, vf, and vl. + call stdlib_dcopy( k-1, zw( 2 ), 1, z( 2 ), 1 ) + call stdlib_dcopy( n-1, vfw( 2 ), 1, vf( 2 ), 1 ) + call stdlib_dcopy( n-1, vlw( 2 ), 1, vl( 2 ), 1 ) + return + end subroutine stdlib_dlasd7 + + !> DLASD8: finds the square roots of the roots of the secular equation, + !> as defined by the values in DSIGMA and Z. It makes the appropriate + !> calls to DLASD4, and stores, for each element in D, the distance + !> to its two nearest poles (elements in DSIGMA). It also updates + !> the arrays VF and VL, the first and last components of all the + !> right singular vectors of the original bidiagonal matrix. + !> DLASD8 is called from DLASD6. + + pure subroutine stdlib_dlasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, k, lddifr + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*) + real(dp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j + real(dp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( k<1 ) then + info = -2 + else if( lddifr DLASQ3: checks for deflation, computes a shift (TAU) and calls dqds. + !> In case of failure it changes shifts, and tries again until output + !> is positive. + + pure subroutine stdlib_dlasq3( i0, n0, z, pp, dmin, sigma, desig, qmax, nfail,iter, ndiv, & + ieee, ttype, dmin1, dmin2, dn, dn1,dn2, g, tau ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0 + integer(ilp), intent(inout) :: iter, n0, ndiv, nfail, pp + real(dp), intent(inout) :: desig, dmin1, dmin2, dn, dn1, dn2, g, qmax, tau + real(dp), intent(out) :: dmin, sigma + ! Array Arguments + real(dp), intent(inout) :: z(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: cbias = 1.50_dp + real(dp), parameter :: qurtr = 0.250_dp + real(dp), parameter :: hundrd = 100.0_dp + + + ! Local Scalars + integer(ilp) :: ipn4, j4, n0in, nn + integer(ilp), intent(inout) :: ttype + real(dp) :: eps, s, t, temp, tol, tol2 + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + n0in = n0 + eps = stdlib_dlamch( 'PRECISION' ) + tol = eps*hundrd + tol2 = tol**2 + ! check for deflation. + 10 continue + if( n0tol2*( sigma+z( nn-3 ) ) .and.z( nn-2*pp-4 )>tol2*z( nn-7 ) )go to & + 30 + 20 continue + z( 4*n0-3 ) = z( 4*n0+pp-3 ) + sigma + n0 = n0 - 1 + go to 10 + ! check whether e(n0-2) is negligible, 2 eigenvalues. + 30 continue + if( z( nn-9 )>tol2*sigma .and.z( nn-2*pp-8 )>tol2*z( nn-11 ) )go to 50 + 40 continue + if( z( nn-3 )>z( nn-7 ) ) then + s = z( nn-3 ) + z( nn-3 ) = z( nn-7 ) + z( nn-7 ) = s + end if + t = half*( ( z( nn-7 )-z( nn-3 ) )+z( nn-5 ) ) + if( z( nn-5 )>z( nn-3 )*tol2.and.t/=zero ) then + s = z( nn-3 )*( z( nn-5 ) / t ) + if( s<=t ) then + s = z( nn-3 )*( z( nn-5 ) /( t*( one+sqrt( one+s / t ) ) ) ) + else + s = z( nn-3 )*( z( nn-5 ) / ( t+sqrt( t )*sqrt( t+s ) ) ) + end if + t = z( nn-7 ) + ( s+z( nn-5 ) ) + z( nn-3 ) = z( nn-3 )*( z( nn-7 ) / t ) + z( nn-7 ) = t + end if + z( 4*n0-7 ) = z( nn-7 ) + sigma + z( 4*n0-3 ) = z( nn-3 ) + sigma + n0 = n0 - 2 + go to 10 + 50 continue + if( pp==2 )pp = 0 + ! reverse the qd-array, if warranted. + if( dmin<=zero .or. n0 0. + 70 continue + call stdlib_dlasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2, dn,dn1, dn2, ieee, & + eps ) + ndiv = ndiv + ( n0-i0+2 ) + iter = iter + 1 + ! check status. + if( dmin>=zero .and. dmin1>=zero ) then + ! success. + go to 90 + else if( dminzero .and.z( 4*( n0-1 )-pp )

0 )info = ierr + call stdlib_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( is, jsp1 ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( is, jsp1 ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_daxpy( js-1, rhs( 1 ), b( 1, js ), 1,f( is, 1 ), ldf ) + + call stdlib_daxpy( js-1, rhs( 2 ), b( 1, jsp1 ), 1,f( is, 1 ), ldf ) + + call stdlib_daxpy( js-1, rhs( 3 ), e( 1, js ), 1,f( is, 1 ), ldf ) + + call stdlib_daxpy( js-1, rhs( 4 ), e( 1, jsp1 ), 1,f( is, 1 ), ldf ) + + end if + if( i

0 )info = ierr + call stdlib_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( isp1, js ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( isp1, js ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_dger( mb, js-1, one, rhs( 1 ), 1, b( 1, js ),1, f( is, 1 ), & + ldf ) + call stdlib_dger( mb, js-1, one, rhs( 3 ), 1, e( 1, js ),1, f( is, 1 ), & + ldf ) + end if + if( i

0 )info = ierr + call stdlib_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + k = 1 + ii = mb*nb + 1 + do jj = 0, nb - 1 + call stdlib_dcopy( mb, rhs( k ), 1, c( is, js+jj ), 1 ) + call stdlib_dcopy( mb, rhs( ii ), 1, f( is, js+jj ), 1 ) + k = k + mb + ii = ii + mb + end do + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_dgemm( 'N', 'T', mb, js-1, nb, one,c( is, js ), ldc, b( 1, & + js ), ldb, one,f( is, 1 ), ldf ) + call stdlib_dgemm( 'N', 'T', mb, js-1, nb, one,f( is, js ), ldf, e( 1, & + js ), lde, one,f( is, 1 ), ldf ) + end if + if( i

DTGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with real entries. (A, D) and (B, E) must be in + !> generalized (real) Schur canonical form, i.e. A, B are upper quasi + !> triangular and D, E are upper triangular. + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output + !> scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale b, where + !> Z is defined as + !> Z = [ kron(In, A) -kron(B**T, Im) ] (2) + !> [ kron(In, D) -kron(E**T, Im) ]. + !> Here Ik is the identity matrix of size k and X**T is the transpose of + !> X. kron(X, Y) is the Kronecker product between the matrices X and Y. + !> If TRANS = 'T', DTGSYL solves the transposed system Z**T*y = scale*b, + !> which is equivalent to solve for R and L in + !> A**T * R + D**T * L = scale * C (3) + !> R * B**T + L * E**T = scale * -F + !> This case (TRANS = 'T') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using DLACON. + !> If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate + !> of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. See [1-2] for more + !> information. + !> This is a level 3 BLAS algorithm. + + pure subroutine stdlib_dtgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, dif, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n + integer(ilp), intent(out) :: info + real(dp), intent(out) :: dif, scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + real(dp), intent(inout) :: c(ldc,*), f(ldf,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_dcopy by calls to stdlib_dlaset. + ! sven hammarling, 1/5/02. + + ! Local Scalars + logical(lk) :: lquery, notran + integer(ilp) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, & + p, ppqq, pq, q + real(dp) :: dscale, dsum, scale2, scaloc + ! Intrinsic Functions + intrinsic :: real,max,sqrt + ! Executable Statements + ! decode and test input parameters + info = 0 + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>4 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda=3 ) then + ifunc = ijob - 2 + call stdlib_dlaset( 'F', m, n, zero, zero, c, ldc ) + call stdlib_dlaset( 'F', m, n, zero, zero, f, ldf ) + else if( ijob>=1 ) then + isolve = 2 + end if + end if + if( ( mb<=1 .and. nb<=1 ) .or. ( mb>=m .and. nb>=n ) )then + loop_30: do iround = 1, isolve + ! use unblocked level 2 solver + dscale = zero + dsum = one + pq = 0 + call stdlib_dtgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,& + ldf, scale, dsum, dscale,iwork, pq, info ) + if( dscale/=zero ) then + if( ijob==1 .or. ijob==3 ) then + dif = sqrt( real( 2*m*n,KIND=dp) ) / ( dscale*sqrt( dsum ) ) + else + dif = sqrt( real( pq,KIND=dp) ) / ( dscale*sqrt( dsum ) ) + end if + end if + if( isolve==2 .and. iround==1 ) then + if( notran ) then + ifunc = ijob + end if + scale2 = scale + call stdlib_dlacpy( 'F', m, n, c, ldc, work, m ) + call stdlib_dlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m ) + call stdlib_dlaset( 'F', m, n, zero, zero, c, ldc ) + call stdlib_dlaset( 'F', m, n, zero, zero, f, ldf ) + else if( isolve==2 .and. iround==2 ) then + call stdlib_dlacpy( 'F', m, n, work, m, c, ldc ) + call stdlib_dlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf ) + scale = scale2 + end if + end do loop_30 + return + end if + ! determine block structure of a + p = 0 + i = 1 + 40 continue + if( i>m )go to 50 + p = p + 1 + iwork( p ) = i + i = i + mb + if( i>=m )go to 50 + if( a( i, i-1 )/=zero )i = i + 1 + go to 40 + 50 continue + iwork( p+1 ) = m + 1 + if( iwork( p )==iwork( p+1 ) )p = p - 1 + ! determine block structure of b + q = p + 1 + j = 1 + 60 continue + if( j>n )go to 70 + q = q + 1 + iwork( q ) = j + j = j + nb + if( j>=n )go to 70 + if( b( j, j-1 )/=zero )j = j + 1 + go to 60 + 70 continue + iwork( q+1 ) = n + 1 + if( iwork( q )==iwork( q+1 ) )q = q - 1 + if( notran ) then + loop_150: do iround = 1, isolve + ! solve (i, j)-subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1,..., 1; j = 1, 2,..., q + dscale = zero + dsum = one + pq = 0 + scale = one + loop_130: do j = p + 2, q + js = iwork( j ) + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_120: do i = p, 1, -1 + is = iwork( i ) + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + ppqq = 0 + call stdlib_dtgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), & + ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, & + scaloc, dsum, dscale,iwork( q+2 ), ppqq, linfo ) + if( linfo>0 )info = linfo + pq = pq + ppqq + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_dscal( is-1, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( is-1, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_dscal( m-ie, scaloc, c( ie+1, k ), 1 ) + call stdlib_dscal( m-ie, scaloc, f( ie+1, k ), 1 ) + end do + do k = je + 1, n + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + call stdlib_dgemm( 'N', 'N', is-1, nb, mb, -one,a( 1, is ), lda, c( is, & + js ), ldc, one,c( 1, js ), ldc ) + call stdlib_dgemm( 'N', 'N', is-1, nb, mb, -one,d( 1, is ), ldd, c( is, & + js ), ldc, one,f( 1, js ), ldf ) + end if + if( j0 )info = linfo + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_dscal( is-1, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( is-1, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_dscal( m-ie, scaloc, c( ie+1, k ), 1 ) + call stdlib_dscal( m-ie, scaloc, f( ie+1, k ), 1 ) + end do + do k = je + 1, n + call stdlib_dscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_dscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! substitute r(i, j) and l(i, j) into remaining equation. + if( j>p+2 ) then + call stdlib_dgemm( 'N', 'T', mb, js-1, nb, one, c( is, js ),ldc, b( 1, js )& + , ldb, one, f( is, 1 ),ldf ) + call stdlib_dgemm( 'N', 'T', mb, js-1, nb, one, f( is, js ),ldf, e( 1, js )& + , lde, one, f( is, 1 ),ldf ) + end if + if( i

DTPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_dtpcon( norm, uplo, diag, n, ap, rcond, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: ap(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTPCON', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + rcond = one + return + end if + rcond = zero + smlnum = stdlib_dlamch( 'SAFE MINIMUM' )*real( max( 1, n ),KIND=dp) + ! compute the norm of the triangular matrix a. + anorm = stdlib_dlantp( norm, uplo, diag, n, ap, work ) + ! continue only if anorm > 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_dlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_dlatps( uplo, 'NO TRANSPOSE', diag, normin, n, ap,work, scale, & + work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_dlatps( uplo, 'TRANSPOSE', diag, normin, n, ap,work, scale, work( & + 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_idamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale DTPLQT2: computes a LQ a factorization of a real "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_dtplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + real(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda DTPMQRT applies a real orthogonal matrix Q obtained from a + !> "triangular-pentagonal" real block reflector H to a general + !> real matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_dtpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, mb, ldt + ! Array Arguments + real(dp), intent(in) :: v(ldv,*), t(ldt,*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, nb, lb, kf, ldaq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldaq = max( 1, k ) + else if ( right ) then + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( mb<1 .or. (mb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_dtprfb( 'L', 'T', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + do i = 1, k, mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_dtprfb( 'R', 'N', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. tran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_dtprfb( 'L', 'N', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_dtprfb( 'R', 'T', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_dtpmlqt + + !> DTPMQRT: applies a real orthogonal matrix Q obtained from a + !> "triangular-pentagonal" real block reflector H to a general + !> real matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_dtpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt + ! Array Arguments + real(dp), intent(in) :: v(ldv,*), t(ldt,*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, mb, lb, kf, ldaq, ldvq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldvq = max( 1, m ) + ldaq = max( 1, k ) + else if ( right ) then + ldvq = max( 1, n ) + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( nb<1 .or. (nb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_dtprfb( 'L', 'T', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + do i = 1, k, nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_dtprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. notran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_dtprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_dtprfb( 'R', 'T', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_dtpmqrt + + !> DTPQRT2: computes a QR factorization of a real "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_dtpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + real(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda DTRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_dtrcon( norm, uplo, diag, n, a, lda, rcond, work,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_dlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_dlatrs( uplo, 'NO TRANSPOSE', diag, normin, n, a,lda, work, scale,& + work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_dlatrs( uplo, 'TRANSPOSE', diag, normin, n, a, lda,work, scale, & + work( 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_idamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale DTZRZF: reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A + !> to upper triangular form by means of orthogonal transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper + !> triangular matrix. + + pure subroutine stdlib_dtzrzf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iws, ki, kk, ldwork, lwkmin, lwkopt, m1, mu, nb, nbmin, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarzt( 'BACKWARD', 'ROWWISE', n-m, ib, a( i, m1 ),lda, tau( i ), & + work, ldwork ) + ! apply h to a(1:i-1,i:n) from the right + call stdlib_dlarzb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', i-1, n-i+1,& + ib, n-m, a( i, m1 ),lda, work, ldwork, a( 1, i ), lda,work( ib+1 ), ldwork ) + + end if + end do + mu = i + nb - 1 + else + mu = m + end if + ! use unblocked code to factor the last or only block + if( mu>0 )call stdlib_dlatrz( mu, n, n-m, a, lda, tau, work ) + work( 1 ) = lwkopt + return + end subroutine stdlib_dtzrzf + + !> DGBSV: computes the solution to a real system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_dgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( kl<0 ) then + info = -2 + else if( ku<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( ldb DGBSVX: uses the LU factorization to compute the solution to a real + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a band matrix of order N with KL subdiagonals and KU + !> superdiagonals, and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_dgbsvx( fact, trans, n, kl, ku, nrhs, ab, ldab, afb,ldafb, ipiv, equed, r, & + c, b, ldb, x, ldx,rcond, ferr, berr, work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*), c(*), r(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j, j1, j2 + real(dp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kl<0 ) then + info = -4 + else if( ku<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -14 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + anorm = zero + do j = 1, info + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + anorm = max( anorm, abs( ab( i, j ) ) ) + end do + end do + rpvgrw = stdlib_dlantb( 'M', 'U', 'N', info, min( info-1, kl+ku ),afb( max( 1, & + kl+ku+2-info ), 1 ), ldafb,work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = anorm / rpvgrw + end if + work( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_dlangb( norm, n, kl, ku, ab, ldab, work ) + rpvgrw = stdlib_dlantb( 'M', 'U', 'N', n, kl+ku, afb, ldafb, work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_dlangb( 'M', n, kl, ku, ab, ldab, work ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_dgbcon( norm, n, kl, ku, afb, ldafb, ipiv, anorm, rcond,work, iwork, info ) + + ! compute the solution matrix x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dgbtrs( trans, n, kl, ku, nrhs, afb, ldafb, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_dgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb, ipiv,b, ldb, x, ldx, & + ferr, berr, work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DGEBAL: balances a general real matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + + pure subroutine stdlib_dgebal( job, n, a, lda, ilo, ihi, scale, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: scale(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sclfac = 2.0e+0_dp + real(dp), parameter :: factor = 0.95e+0_dp + + + + ! Local Scalars + logical(lk) :: noconv + integer(ilp) :: i, ica, iexc, ira, j, k, l, m + real(dp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2 + ! Intrinsic Functions + intrinsic :: abs,max,min + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=g .or. max( f, c, ca )>=sfmax2 .or.min( r, g, ra )<=sfmin2 )go to 170 + if( stdlib_disnan( c+f+ca+r+g+ra ) ) then + ! exit if nan to avoid infinite loop + info = -3 + call stdlib_xerbla( 'DGEBAL', -info ) + return + end if + f = f*sclfac + c = c*sclfac + ca = ca*sclfac + r = r / sclfac + g = g / sclfac + ra = ra / sclfac + go to 160 + 170 continue + g = c / sclfac + 180 continue + if( g=sfmax2 .or.min( f, c, g, ca )<=sfmin2 )go to 190 + f = f / sclfac + c = c / sclfac + g = g / sclfac + ca = ca / sclfac + r = r*sclfac + ra = ra*sclfac + go to 180 + ! now balance. + 190 continue + if( ( c+r )>=factor*s )cycle loop_200 + if( fone .and. scale( i )>one ) then + if( scale( i )>=sfmax1 / f )cycle loop_200 + end if + g = one / f + scale( i ) = scale( i )*f + noconv = .true. + call stdlib_dscal( n-k+1, g, a( i, k ), lda ) + call stdlib_dscal( l, f, a( 1, i ), 1 ) + end do loop_200 + if( noconv )go to 140 + 210 continue + ilo = k + ihi = l + return + end subroutine stdlib_dgebal + + !> DGEBD2: reduces a real general m by n matrix A to upper or lower + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_dgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! reduce to upper bidiagonal form + do i = 1, n + ! generate elementary reflector h(i) to annihilate a(i+1:m,i) + call stdlib_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,tauq( i ) ) + + d( i ) = a( i, i ) + a( i, i ) = one + ! apply h(i) to a(i:m,i+1:n) from the left + if( i DGEHD2: reduces a real general matrix A to upper Hessenberg form H by + !> an orthogonal similarity transformation: Q**T * A * Q = H . + + pure subroutine stdlib_dgehd2( n, ilo, ihi, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda DGELQ2: computes an LQ factorization of a real m-by-n matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a n-by-n orthogonal matrix; + !> L is a lower-triangular m-by-m matrix; + !> 0 is a m-by-(n-m) zero matrix, if m < n. + + pure subroutine stdlib_dgelq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGELQF: computes an LQ factorization of a real M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_dgelqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 ) + lwkopt = m*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb DGELQT3: recursively computes a LQ factorization of a real M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_dgelqt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, m1, m2, iinfo + ! Executable Statements + info = 0 + if( m < 0 ) then + info = -1 + else if( n < m ) then + info = -2 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, m ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DGELQT3', -info ) + return + end if + if( m==1 ) then + ! compute householder transform when m=1 + call stdlib_dlarfg( n, a(1,1), a( 1, min( 2, n ) ), lda, t(1,1) ) + else + ! otherwise, split a into blocks... + m1 = m/2 + m2 = m-m1 + i1 = min( m1+1, m ) + j1 = min( m+1, n ) + ! compute a(1:m1,1:n) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_dgelqt3( m1, n, a, lda, t, ldt, iinfo ) + ! compute a(j1:m,1:n) = q1^h a(j1:m,1:n) [workspace: t(1:n1,j1:n)] + do i=1,m2 + do j=1,m1 + t( i+m1, j ) = a( i+m1, j ) + end do + end do + call stdlib_dtrmm( 'R', 'U', 'T', 'U', m2, m1, one,a, lda, t( i1, 1 ), ldt ) + call stdlib_dgemm( 'N', 'T', m2, m1, n-m1, one, a( i1, i1 ), lda,a( 1, i1 ), lda, & + one, t( i1, 1 ), ldt) + call stdlib_dtrmm( 'R', 'U', 'N', 'N', m2, m1, one,t, ldt, t( i1, 1 ), ldt ) + call stdlib_dgemm( 'N', 'N', m2, n-m1, m1, -one, t( i1, 1 ), ldt,a( 1, i1 ), lda, & + one, a( i1, i1 ), lda ) + call stdlib_dtrmm( 'R', 'U', 'N', 'U', m2, m1 , one,a, lda, t( i1, 1 ), ldt ) + + do i=1,m2 + do j=1,m1 + a( i+m1, j ) = a( i+m1, j ) - t( i+m1, j ) + t( i+m1, j )=0 + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_dgelqt3( m2, n-m1, a( i1, i1 ), lda,t( i1, i1 ), ldt, iinfo ) + ! compute t3 = t(j1:n1,1:n) = -t1 y1^h y2 t2 + do i=1,m2 + do j=1,m1 + t( j, i+m1 ) = (a( j, i+m1 )) + end do + end do + call stdlib_dtrmm( 'R', 'U', 'T', 'U', m1, m2, one,a( i1, i1 ), lda, t( 1, i1 ), & + ldt ) + call stdlib_dgemm( 'N', 'T', m1, m2, n-m, one, a( 1, j1 ), lda,a( i1, j1 ), lda, & + one, t( 1, i1 ), ldt ) + call stdlib_dtrmm( 'L', 'U', 'N', 'N', m1, m2, -one, t, ldt,t( 1, i1 ), ldt ) + + call stdlib_dtrmm( 'R', 'U', 'N', 'N', m1, m2, one,t( i1, i1 ), ldt, t( 1, i1 ), & + ldt ) + ! y = (y1,y2); l = [ l1 0 ]; t = [t1 t3] + ! [ a(1:n1,j1:n) l2 ] [ 0 t2] + end if + return + end subroutine stdlib_dgelqt3 + + !> DGEQL2: computes a QL factorization of a real m by n matrix A: + !> A = Q * L. + + pure subroutine stdlib_dgeql2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEQLF: computes a QL factorization of a real M-by-N matrix A: + !> A = Q * L. + + pure subroutine stdlib_dgeqlf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h**t to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_dlarfb( 'LEFT', 'TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-1, & + n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_dgeql2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_dgeqlf + + !> DGEQR2: computes a QR factorization of a real m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + pure subroutine stdlib_dgeqr2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEQR2P: computes a QR factorization of a real m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + subroutine stdlib_dgeqr2p( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEQRF: computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_dgeqrf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + k = min( m, n ) + info = 0 + nb = stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb DGEQR2P computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + subroutine stdlib_dgeqrfp( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb DGEQRT2: computes a QR factorization of a real M-by-N matrix A, + !> using the compact WY representation of Q. + + pure subroutine stdlib_dgeqrt2( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(dp) :: aii, alpha + ! Executable Statements + ! test the input arguments + info = 0 + if( n<0 ) then + info = -2 + else if( m t(i,1) + call stdlib_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,t( i, 1 ) ) + if( i DGEQRT3: recursively computes a QR factorization of a real M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_dgeqrt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, n1, n2, iinfo + ! Executable Statements + info = 0 + if( n < 0 ) then + info = -2 + else if( m < n ) then + info = -1 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, n ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DGEQRT3', -info ) + return + end if + if( n==1 ) then + ! compute householder transform when n=1 + call stdlib_dlarfg( m, a(1,1), a( min( 2, m ), 1 ), 1, t(1,1) ) + else + ! otherwise, split a into blocks... + n1 = n/2 + n2 = n-n1 + j1 = min( n1+1, n ) + i1 = min( n+1, m ) + ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_dgeqrt3( m, n1, a, lda, t, ldt, iinfo ) + ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)] + do j=1,n2 + do i=1,n1 + t( i, j+n1 ) = a( i, j+n1 ) + end do + end do + call stdlib_dtrmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1, j1 ), ldt ) + call stdlib_dgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1 ), lda,a( j1, j1 ), lda, & + one, t( 1, j1 ), ldt) + call stdlib_dtrmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1, j1 ), ldt ) + call stdlib_dgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1 ), lda,t( 1, j1 ), ldt, & + one, a( j1, j1 ), lda ) + call stdlib_dtrmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1, j1 ), ldt ) + do j=1,n2 + do i=1,n1 + a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 ) + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_dgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo ) + ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2 + do i=1,n1 + do j=1,n2 + t( i, j+n1 ) = (a( j+n1, i )) + end do + end do + call stdlib_dtrmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1, j1 ), & + ldt ) + call stdlib_dgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1 ), lda,a( i1, j1 ), lda, & + one, t( 1, j1 ), ldt ) + call stdlib_dtrmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1, j1 ), ldt ) + + call stdlib_dtrmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1, j1 ), & + ldt ) + ! y = (y1,y2); r = [ r1 a(1:n1,j1:n) ]; t = [t1 t3] + ! [ 0 r2 ] [ 0 t2] + end if + return + end subroutine stdlib_dgeqrt3 + + !> DGERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + + pure subroutine stdlib_dgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + real(dp), intent(out) :: berr(*), ferr(*), work(*) + real(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transt + integer(ilp) :: count, i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_dgetrs( trans, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + call stdlib_daxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_dlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_dgetrs( transt, n, 1, af, ldaf, ipiv, work( n+1 ),n, info ) + + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_dgetrs( trans, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_dgerfs + + !> DGERQ2: computes an RQ factorization of a real m by n matrix A: + !> A = R * Q. + + pure subroutine stdlib_dgerq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGERQF: computes an RQ factorization of a real M-by-N matrix A: + !> A = R * Q. + + pure subroutine stdlib_dgerqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_dlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_dlarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-& + k+i+ib-1, ib,a( m-k+i, 1 ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_dgerq2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_dgerqf + + !> DGETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_dgetrf( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_dgetrf2( m, n, a, lda, ipiv, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks and test for exact + ! singularity. + call stdlib_dgetrf2( m-j+1, jb, a( j, j ), lda, ipiv( j ), iinfo ) + ! adjust info and the pivot indices. + if( info==0 .and. iinfo>0 )info = iinfo + j - 1 + do i = j, min( m, j+jb-1 ) + ipiv( i ) = j - 1 + ipiv( i ) + end do + ! apply interchanges to columns 1:j-1. + call stdlib_dlaswp( j-1, a, lda, j, j+jb-1, ipiv, 1 ) + if( j+jb<=n ) then + ! apply interchanges to columns j+jb:n. + call stdlib_dlaswp( n-j-jb+1, a( 1, j+jb ), lda, j, j+jb-1,ipiv, 1 ) + ! compute block row of u. + call stdlib_dtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, & + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_dgetrf + + !> DGGHD3: reduces a pair of real matrices (A,B) to generalized upper + !> Hessenberg form using orthogonal transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the orthogonal matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**T*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**T*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**T*x. + !> The orthogonal matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T + !> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T + !> If Q1 is the orthogonal matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then DGGHD3 reduces the original + !> problem to generalized Hessenberg form. + !> This is a blocked variant of DGGHRD, using matrix-matrix + !> multiplications for parts of the computation to enhance performance. + + pure subroutine stdlib_dgghd3( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: blk22, initq, initz, lquery, wantq, wantz + character :: compq2, compz2 + integer(ilp) :: cola, i, ierr, j, j0, jcol, jj, jrow, k, kacc22, len, lwkopt, n2nb, nb,& + nblst, nbmin, nh, nnb, nx, ppw, ppwo, pw, top, topq + real(dp) :: c, c1, c2, s, s1, s2, temp, temp1, temp2, temp3 + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 ) + lwkopt = max( 6*n*nb, 1 ) + work( 1 ) = real( lwkopt,KIND=dp) + initq = stdlib_lsame( compq, 'I' ) + wantq = initq .or. stdlib_lsame( compq, 'V' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi1 )call stdlib_dlaset( 'LOWER', n-1, n-1, zero, zero, b(2, 1), ldb ) + ! quick return if possible + nh = ihi - ilo + 1 + if( nh<=1 ) then + work( 1 ) = one + return + end if + ! determine the blocksize. + nbmin = stdlib_ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 ) + if( nb>1 .and. nb=6*n*nbmin ) then + nb = lwork / ( 6*n ) + else + nb = 1 + end if + end if + end if + end if + if( nb=nh ) then + ! use unblocked code below + jcol = ilo + else + ! use blocked code + kacc22 = stdlib_ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 ) + blk22 = kacc22==2 + do jcol = ilo, ihi-2, nb + nnb = min( nb, ihi-jcol-1 ) + ! initialize small orthogonal factors that will hold the + ! accumulated givens rotations in workspace. + ! n2nb denotes the number of 2*nnb-by-2*nnb factors + ! nblst denotes the (possibly smaller) order of the last + ! factor. + n2nb = ( ihi-jcol-1 ) / nnb - 1 + nblst = ihi - jcol - n2nb*nnb + call stdlib_dlaset( 'ALL', nblst, nblst, zero, one, work, nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_dlaset( 'ALL', 2*nnb, 2*nnb, zero, one,work( pw ), 2*nnb ) + pw = pw + 4*nnb*nnb + end do + ! reduce columns jcol:jcol+nnb-1 of a to hessenberg form. + do j = jcol, jcol+nnb-1 + ! reduce jth column of a. store cosines and sines in jth + ! column of a and b, respectively. + do i = ihi, j+2, -1 + temp = a( i-1, j ) + call stdlib_dlartg( temp, a( i, j ), c, s, a( i-1, j ) ) + a( i, j ) = c + b( i, j ) = s + end do + ! accumulate givens rotations into workspace array. + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + c = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + c = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + ! top denotes the number of top rows in a and b that will + ! not be updated during the next steps. + if( jcol<=2 ) then + top = 0 + else + top = jcol + end if + ! propagate transformations through b and replace stored + ! left sines/cosines by right sines/cosines. + do jj = n, j+1, -1 + ! update jjth column of b. + do i = min( jj+1, ihi ), j+2, -1 + c = a( i, j ) + s = b( i, j ) + temp = b( i, jj ) + b( i, jj ) = c*temp - s*b( i-1, jj ) + b( i-1, jj ) = s*temp + c*b( i-1, jj ) + end do + ! annihilate b( jj+1, jj ). + if( jj0 ) then + do i = jj, 1, -1 + call stdlib_drot( ihi-top, a( top+1, j+i+1 ), 1,a( top+1, j+i ), 1, a( & + j+1+i, j ),-b( j+1+i, j ) ) + end do + end if + ! update (j+1)th column of a by transformations from left. + if ( j < jcol + nnb - 1 ) then + len = 1 + j - jcol + ! multiply with the trailing accumulated orthogonal + ! matrix, which takes the form + ! [ u11 u12 ] + ! u = [ ], + ! [ u21 u22 ] + ! where u21 is a len-by-len matrix and u12 is lower + ! triangular. + jrow = ihi - nblst + 1 + call stdlib_dgemv( 'TRANSPOSE', nblst, len, one, work,nblst, a( jrow, j+1 )& + , 1, zero,work( pw ), 1 ) + ppw = pw + len + do i = jrow, jrow+nblst-len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_dtrmv( 'LOWER', 'TRANSPOSE', 'NON-UNIT',nblst-len, work( & + len*nblst + 1 ), nblst,work( pw+len ), 1 ) + call stdlib_dgemv( 'TRANSPOSE', len, nblst-len, one,work( (len+1)*nblst - & + len + 1 ), nblst,a( jrow+nblst-len, j+1 ), 1, one,work( pw+len ), 1 ) + + ppw = pw + do i = jrow, jrow+nblst-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ! multiply with the other accumulated orthogonal + ! matrices, which take the form + ! [ u11 u12 0 ] + ! [ ] + ! u = [ u21 u22 0 ], + ! [ ] + ! [ 0 0 i ] + ! where i denotes the (nnb-len)-by-(nnb-len) identity + ! matrix, u21 is a len-by-len upper triangular matrix + ! and u12 is an nnb-by-nnb lower triangular matrix. + ppwo = 1 + nblst*nblst + j0 = jrow - nnb + do jrow = j0, jcol+1, -nnb + ppw = pw + len + do i = jrow, jrow+nnb-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + ppw = pw + do i = jrow+nnb, jrow+nnb+len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_dtrmv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', len,work( ppwo + & + nnb ), 2*nnb, work( pw ),1 ) + call stdlib_dtrmv( 'LOWER', 'TRANSPOSE', 'NON-UNIT', nnb,work( ppwo + & + 2*len*nnb ),2*nnb, work( pw + len ), 1 ) + call stdlib_dgemv( 'TRANSPOSE', nnb, len, one,work( ppwo ), 2*nnb, a( & + jrow, j+1 ), 1,one, work( pw ), 1 ) + call stdlib_dgemv( 'TRANSPOSE', len, nnb, one,work( ppwo + 2*len*nnb + & + nnb ), 2*nnb,a( jrow+nnb, j+1 ), 1, one,work( pw+len ), 1 ) + ppw = pw + do i = jrow, jrow+len+nnb-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + ! apply accumulated orthogonal matrices to a. + cola = n - jcol - nnb + 1 + j = ihi - nblst + 1 + call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', nblst,cola, nblst, one, work, & + nblst,a( j, jcol+nnb ), lda, zero, work( pw ),nblst ) + call stdlib_dlacpy( 'ALL', nblst, cola, work( pw ), nblst,a( j, jcol+nnb ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of + ! [ u11 u12 ] + ! u = [ ] + ! [ u21 u22 ], + ! where all blocks are nnb-by-nnb, u21 is upper + ! triangular and u12 is lower triangular. + call stdlib_dorm22( 'LEFT', 'TRANSPOSE', 2*nnb, cola, nnb,nnb, work( ppwo )& + , 2*nnb,a( j, jcol+nnb ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', 2*nnb,cola, 2*nnb, one, & + work( ppwo ), 2*nnb,a( j, jcol+nnb ), lda, zero, work( pw ),2*nnb ) + call stdlib_dlacpy( 'ALL', 2*nnb, cola, work( pw ), 2*nnb,a( j, jcol+nnb ),& + lda ) + end if + ppwo = ppwo + 4*nnb*nnb + end do + ! apply accumulated orthogonal matrices to q. + if( wantq ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, one, q( & + topq, j ), ldq,work, nblst, zero, work( pw ), nh ) + call stdlib_dlacpy( 'ALL', nh, nblst, work( pw ), nh,q( topq, j ), ldq ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_dorm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,q( topq, j ), ldq, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, one,& + q( topq, j ), ldq,work( ppwo ), 2*nnb, zero, work( pw ),nh ) + call stdlib_dlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,q( topq, j ), ldq ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! accumulate right givens rotations if required. + if ( wantz .or. top>0 ) then + ! initialize small orthogonal factors that will hold the + ! accumulated givens rotations in workspace. + call stdlib_dlaset( 'ALL', nblst, nblst, zero, one, work,nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_dlaset( 'ALL', 2*nnb, 2*nnb, zero, one,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! accumulate givens rotations into workspace array. + do j = jcol, jcol+nnb-1 + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + c = a( i, j ) + a( i, j ) = zero + s = b( i, j ) + b( i, j ) = zero + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + c = a( i, j ) + a( i, j ) = zero + s = b( i, j ) + b( i, j ) = zero + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end do + else + call stdlib_dlaset( 'LOWER', ihi - jcol - 1, nnb, zero, zero,a( jcol + 2, & + jcol ), lda ) + call stdlib_dlaset( 'LOWER', ihi - jcol - 1, nnb, zero, zero,b( jcol + 2, & + jcol ), ldb ) + end if + ! apply accumulated orthogonal matrices to a and b. + if ( top>0 ) then + j = ihi - nblst + 1 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, one, a( & + 1, j ), lda,work, nblst, zero, work( pw ), top ) + call stdlib_dlacpy( 'ALL', top, nblst, work( pw ), top,a( 1, j ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_dorm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,a( 1, j ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + one, a( 1, j ), lda,work( ppwo ), 2*nnb, zero,work( pw ), top ) + call stdlib_dlacpy( 'ALL', top, 2*nnb, work( pw ), top,a( 1, j ), lda ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + j = ihi - nblst + 1 + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, one, b( & + 1, j ), ldb,work, nblst, zero, work( pw ), top ) + call stdlib_dlacpy( 'ALL', top, nblst, work( pw ), top,b( 1, j ), ldb ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_dorm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,b( 1, j ), ldb, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + one, b( 1, j ), ldb,work( ppwo ), 2*nnb, zero,work( pw ), top ) + call stdlib_dlacpy( 'ALL', top, 2*nnb, work( pw ), top,b( 1, j ), ldb ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! apply accumulated orthogonal matrices to z. + if( wantz ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, one, z( & + topq, j ), ldz,work, nblst, zero, work( pw ), nh ) + call stdlib_dlacpy( 'ALL', nh, nblst, work( pw ), nh,z( topq, j ), ldz ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_dorm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,z( topq, j ), ldz, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, one,& + z( topq, j ), ldz,work( ppwo ), 2*nnb, zero, work( pw ),nh ) + call stdlib_dlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,z( topq, j ), ldz ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + end if + ! use unblocked code to reduce the rest of the matrix + ! avoid re-initialization of modified q and z. + compq2 = compq + compz2 = compz + if ( jcol/=ilo ) then + if ( wantq )compq2 = 'V' + if ( wantz )compz2 = 'V' + end if + if ( jcol DGGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal + !> matrix, and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**T*(inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the + !> transpose of the matrix Z. + + pure subroutine stdlib_dggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'DGEQRF', ' ', n, m, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'DGERQF', ' ', n, p, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'DORMQR', ' ', n, m, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( p<0 ) then + info = -3 + else if( lda DGGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**T + !> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the + !> transpose of the matrix Z. + + pure subroutine stdlib_dggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'DGERQF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'DGEQRF', ' ', p, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'DORMRQ', ' ', m, n, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( p<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DGSVJ0: is called from DGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as DGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + + pure subroutine stdlib_dgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep + real(dp), intent(in) :: eps, sfmin, tol + character, intent(in) :: jobv + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*) + real(dp), intent(out) :: work(lwork) + ! ===================================================================== + + ! Local Scalars + real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Local Arrays + real(dp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,real,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( lda sqrt(overflow_threshold), and + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_dnrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_dnrm2 is available, the if-then-else + ! below should read "aapp = stdlib_dnrm2( m, a(1,p), 1 ) * d(p)". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_dnrm2( m, a( 1, p ), 1 )*d( p ) + else + temp1 = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp )*d( p ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_dcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_ddot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_dcopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_ddot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + ! Rotate + ! rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + end if + else + if( d( q )>=one ) then + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_daxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + else + if( d( p )>=d( q ) ) then + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_daxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_dcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, one, m,1, work, lda, & + ierr ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_daxpy( m, temp1, work, 1,a( 1, q ), 1 ) + call stdlib_dlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dnrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_dlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dnrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ........................................................ + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! -#- m x 2 jacobi svd -#- + ! -#- safe gram matrix computation -#- + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_dcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_ddot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_dcopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_ddot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_daxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + end if + else + if( d( q )>=one ) then + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_daxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_daxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + else + if( d( p )>=d( q ) ) then + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_daxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_dcopy( m, a( 1, p ), 1, work,1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, one,m, 1, work, lda,& + ierr ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_daxpy( m, temp1, work, 1,a( 1, q ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_dcopy( m, a( 1, q ), 1, work,1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one,m, 1, work, lda,& + ierr ) + call stdlib_dlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*d( q ) / d( p ) + call stdlib_daxpy( m, temp1, work, 1,a( 1, p ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dnrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_dlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dnrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_dnrm2( m, a( 1, n ), 1 )*d( n ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*d( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:) reaching this point means that the procedure has completed the given + ! number of iterations. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means that during the i-th sweep all pivots were + ! below the given tolerance, causing early exit. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector d. + do p = 1, n - 1 + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = d( p ) + d( p ) = d( q ) + d( q ) = temp1 + call stdlib_dswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_dswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_dgsvj0 + + !> DGSVJ1: is called from DGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as DGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> DGSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + + pure subroutine stdlib_dgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: eps, sfmin, tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep + character, intent(in) :: jobv + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*) + real(dp), intent(out) :: work(lwork) + ! ===================================================================== + + ! Local Scalars + real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, & + mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, & + thsign + integer(ilp) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & + notrot, nblc, nblr, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Local Arrays + real(dp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,real,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( n1<0 ) then + info = -4 + else if( ldazero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! Safe Gram Matrix Computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_dcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_ddot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_dcopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_ddot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs(aqoap-apoaq) / aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_daxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + end if + else + if( d( q )>=one ) then + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_daxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_daxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + else + if( d( p )>=d( q ) ) then + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_daxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_dcopy( m, a( 1, p ), 1, work,1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, one,m, 1, work, lda,& + ierr ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_daxpy( m, temp1, work, 1,a( 1, q ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_dcopy( m, a( 1, q ), 1, work,1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one,m, 1, work, lda,& + ierr ) + call stdlib_dlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*d( q ) / d( p ) + call stdlib_daxpy( m, temp1, work, 1,a( 1, p ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dnrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_dlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dnrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + ! if ( notrot >= emptsw ) go to 2011 + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapp= emptsw ) go to 2011 + end if + end do loop_2100 + ! end of the p-loop + end do loop_2010 + ! end of the jbc-loop + 2011 continue + ! 2011 bailed out of the jbc-loop + do p = igl, min( igl+kbl-1, n ) + sva( p ) = abs( sva( p ) ) + end do + ! ** if ( notrot >= emptsw ) go to 1994 + end do loop_2000 + ! 2000 :: end of the ibr-loop + ! .. update sva(n) + if( ( sva( n )rootsfmin ) )then + sva( n ) = stdlib_dnrm2( m, a( 1, n ), 1 )*d( n ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*d( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:) reaching this point means that the procedure has completed the given + ! number of sweeps. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means that during the i-th sweep all pivots were + ! below the given threshold, causing early exit. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector d + do p = 1, n - 1 + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = d( p ) + d( p ) = d( q ) + d( q ) = temp1 + call stdlib_dswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_dswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_dgsvj1 + + !> DGTCON: estimates the reciprocal of the condition number of a real + !> tridiagonal matrix A using the LU factorization as computed by + !> DGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_dgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, iwork, info & + ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: d(*), dl(*), du(*), du2(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + integer(ilp) :: i, kase, kase1 + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input arguments. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm DGTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_dgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, x, & + ldx, ferr, berr, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: b(ldb,*), d(*), df(*), dl(*), dlf(*), du(*), du2(*), duf(*) + + real(dp), intent(out) :: berr(*), ferr(*), work(*) + real(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_dgttrs( trans, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, info ) + + call stdlib_daxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 70 continue + call stdlib_dlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_dgttrs( transt, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, & + info ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_dgttrs( transn, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, & + info ) + end if + go to 70 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_110 + return + end subroutine stdlib_dgtrfs + + !> DGTSVX: uses the LU factorization to compute the solution to a real + !> system of linear equations A * X = B or A**T * X = B, + !> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_dgtsvx( fact, trans, n, nrhs, dl, d, du, dlf, df, duf,du2, ipiv, b, & + ldb, x, ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: b(ldb,*), d(*), dl(*), du(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + real(dp), intent(inout) :: df(*), dlf(*), du2(*), duf(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact, notran + character :: norm + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + notran = stdlib_lsame( trans, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb1 ) then + call stdlib_dcopy( n-1, dl, 1, dlf, 1 ) + call stdlib_dcopy( n-1, du, 1, duf, 1 ) + end if + call stdlib_dgttrf( n, dlf, df, duf, du2, ipiv, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_dlangt( norm, n, dl, d, du ) + ! compute the reciprocal of the condition number of a. + call stdlib_dgtcon( norm, n, dlf, df, duf, du2, ipiv, anorm, rcond, work,iwork, info ) + + ! compute the solution vectors x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dgttrs( trans, n, nrhs, dlf, df, duf, du2, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_dgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2, ipiv,b, ldb, x, ldx, & + ferr, berr, work, iwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DHGEQZ: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**T, B = Q1*T*Z1**T, + !> as computed by DGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**T, T = Q*P*Z**T, + !> where Q and Z are orthogonal matrices, P is an upper triangular + !> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 + !> diagonal blocks. + !> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair + !> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of + !> eigenvalues. + !> Additionally, the 2-by-2 upper triangular diagonal blocks of P + !> corresponding to 2-by-2 blocks of S are reduced to positive diagonal + !> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, + !> P(j,j) > 0, and P(j+1,j+1) > 0. + !> Optionally, the orthogonal matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> orthogonal matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the orthogonal factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Real eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + + subroutine stdlib_dhgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alphar, alphai, & + beta, q, ldq, z, ldz, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz, job + integer(ilp), intent(in) :: ihi, ilo, ldh, ldq, ldt, ldz, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), work(*) + real(dp), intent(inout) :: h(ldh,*), q(ldq,*), t(ldt,*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: safety = 1.0e+2_dp + ! $ safety = one ) + + ! Local Scalars + logical(lk) :: ilazr2, ilazro, ilpivt, ilq, ilschr, ilz, lquery + integer(ilp) :: icompq, icompz, ifirst, ifrstm, iiter, ilast, ilastm, in, ischur, & + istart, j, jc, jch, jiter, jr, maxit + real(dp) :: a11, a12, a1i, a1r, a21, a22, a2i, a2r, ad11, ad11l, ad12, ad12l, ad21, & + ad21l, ad22, ad22l, ad32l, an, anorm, ascale, atol, b11, b1a, b1i, b1r, b22, b2a, b2i, & + b2r, bn, bnorm, bscale, btol, c, c11i, c11r, c12, c21, c22i, c22r, cl, cq, cr, cz, & + eshift, s, s1, s1inv, s2, safmax, safmin, scale, sl, sqi, sqr, sr, szi, szr, t1, tau, & + temp, temp2, tempi, tempr, u1, u12, u12l, u2, ulp, vs, w11, w12, w21, w22, wabs, wi, & + wr, wr2 + ! Local Arrays + real(dp) :: v(3) + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + ! decode job, compq, compz + if( stdlib_lsame( job, 'E' ) ) then + ilschr = .false. + ischur = 1 + else if( stdlib_lsame( job, 'S' ) ) then + ilschr = .true. + ischur = 2 + else + ischur = 0 + end if + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! check argument values + info = 0 + work( 1 ) = max( 1, n ) + lquery = ( lwork==-1 ) + if( ischur==0 ) then + info = -1 + else if( icompq==0 ) then + info = -2 + else if( icompz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi ilo )temp = temp + abs ( t( j - 1, j ) ) + if( abs( t( j, j ) )=btol ) then + if( jch+1>=ilast ) then + go to 80 + else + ifirst = jch + 1 + go to 110 + end if + end if + t( jch+1, jch+1 ) = zero + end do + go to 70 + else + ! only test 2 passed -- chase the zero to t(ilast,ilast) + ! then process as in the case t(ilast,ilast)=0 + do jch = j, ilast - 1 + temp = t( jch, jch+1 ) + call stdlib_dlartg( temp, t( jch+1, jch+1 ), c, s,t( jch, jch+1 ) ) + + t( jch+1, jch+1 ) = zero + if( jchilast )ifrstm = ilo + end if + go to 350 + ! qz step + ! this iteration only involves rows/columns ifirst:ilast. we + ! assume ifirst < ilast, and that the diagonal of b is non-zero. + 110 continue + iiter = iiter + 1 + if( .not.ilschr ) then + ifrstm = ifirst + end if + ! compute single shifts. + ! at this point, ifirst < ilast, and the diagonal elements of + ! t(ifirst:ilast,ifirst,ilast) are larger than btol (in + ! magnitude) + if( ( iiter / 10 )*10==iiter ) then + ! exceptional shift. chosen for no particularly good reason. + ! (single shift only.) + if( ( real( maxit,KIND=dp)*safmin )*abs( h( ilast, ilast-1 ) ) abs( (wr2/s2)*t( & + ilast, ilast )- h( ilast, ilast ) ) ) then + temp = wr + wr = wr2 + wr2 = temp + temp = s1 + s1 = s2 + s2 = temp + end if + temp = max( s1, safmin*max( one, abs( wr ), abs( wi ) ) ) + if( wi/=zero )go to 200 + end if + ! fiddle with shift to avoid overflow + temp = min( ascale, one )*( half*safmax ) + if( s1>temp ) then + scale = temp / s1 + else + scale = one + end if + temp = min( bscale, one )*( half*safmax ) + if( abs( wr )>temp )scale = min( scale, temp / abs( wr ) ) + s1 = scale*s1 + wr = scale*wr + ! now check for two consecutive small subdiagonals. + do j = ilast - 1, ifirst + 1, -1 + istart = j + temp = abs( s1*h( j, j-1 ) ) + temp2 = abs( s1*h( j, j )-wr*t( j, j ) ) + tempr = max( temp, temp2 ) + if( tempristart ) then + temp = h( j, j-1 ) + call stdlib_dlartg( temp, h( j+1, j-1 ), c, s, h( j, j-1 ) ) + h( j+1, j-1 ) = zero + end if + do jc = j, ilastm + temp = c*h( j, jc ) + s*h( j+1, jc ) + h( j+1, jc ) = -s*h( j, jc ) + c*h( j+1, jc ) + h( j, jc ) = temp + temp2 = c*t( j, jc ) + s*t( j+1, jc ) + t( j+1, jc ) = -s*t( j, jc ) + c*t( j+1, jc ) + t( j, jc ) = temp2 + end do + if( ilq ) then + do jr = 1, n + temp = c*q( jr, j ) + s*q( jr, j+1 ) + q( jr, j+1 ) = -s*q( jr, j ) + c*q( jr, j+1 ) + q( jr, j ) = temp + end do + end if + temp = t( j+1, j+1 ) + call stdlib_dlartg( temp, t( j+1, j ), c, s, t( j+1, j+1 ) ) + t( j+1, j ) = zero + do jr = ifrstm, min( j+2, ilast ) + temp = c*h( jr, j+1 ) + s*h( jr, j ) + h( jr, j ) = -s*h( jr, j+1 ) + c*h( jr, j ) + h( jr, j+1 ) = temp + end do + do jr = ifrstm, j + temp = c*t( jr, j+1 ) + s*t( jr, j ) + t( jr, j ) = -s*t( jr, j+1 ) + c*t( jr, j ) + t( jr, j+1 ) = temp + end do + if( ilz ) then + do jr = 1, n + temp = c*z( jr, j+1 ) + s*z( jr, j ) + z( jr, j ) = -s*z( jr, j+1 ) + c*z( jr, j ) + z( jr, j+1 ) = temp + end do + end if + end do loop_190 + go to 350 + ! use francis double-shift + ! note: the francis double-shift should work with real shifts, + ! but only if the block is at least 3x3. + ! this code may break if this point is reached with + ! a 2x2 block with real eigenvalues. + 200 continue + if( ifirst+1==ilast ) then + ! special case -- 2x2 block with complex eigenvectors + ! step 1: standardize, that is, rotate so that + ! ( b11 0 ) + ! b = ( ) with b11 non-negative. + ! ( 0 b22 ) + call stdlib_dlasv2( t( ilast-1, ilast-1 ), t( ilast-1, ilast ),t( ilast, ilast ),& + b22, b11, sr, cr, sl, cl ) + if( b11 unfl ) + ! __ + ! (sa - wb) ( cz -sz ) + ! ( sz cz ) + c11r = s1*a11 - wr*b11 + c11i = -wi*b11 + c12 = s1*a12 + c21 = s1*a21 + c22r = s1*a22 - wr*b22 + c22i = -wi*b22 + if( abs( c11r )+abs( c11i )+abs( c12 )>abs( c21 )+abs( c22r )+abs( c22i ) ) & + then + t1 = stdlib_dlapy3( c12, c11r, c11i ) + cz = c12 / t1 + szr = -c11r / t1 + szi = -c11i / t1 + else + cz = stdlib_dlapy2( c22r, c22i ) + if( cz<=safmin ) then + cz = zero + szr = one + szi = zero + else + tempr = c22r / cz + tempi = c22i / cz + t1 = stdlib_dlapy2( cz, c21 ) + cz = cz / t1 + szr = -c21*tempr / t1 + szi = c21*tempi / t1 + end if + end if + ! compute givens rotation on left + ! ( cq sq ) + ! ( __ ) a or b + ! ( -sq cq ) + an = abs( a11 ) + abs( a12 ) + abs( a21 ) + abs( a22 ) + bn = abs( b11 ) + abs( b22 ) + wabs = abs( wr ) + abs( wi ) + if( s1*an>wabs*bn ) then + cq = cz*b11 + sqr = szr*b22 + sqi = -szi*b22 + else + a1r = cz*a11 + szr*a12 + a1i = szi*a12 + a2r = cz*a21 + szr*a22 + a2i = szi*a22 + cq = stdlib_dlapy2( a1r, a1i ) + if( cq<=safmin ) then + cq = zero + sqr = one + sqi = zero + else + tempr = a1r / cq + tempi = a1i / cq + sqr = tempr*a2r + tempi*a2i + sqi = tempi*a2r - tempr*a2i + end if + end if + t1 = stdlib_dlapy3( cq, sqr, sqi ) + cq = cq / t1 + sqr = sqr / t1 + sqi = sqi / t1 + ! compute diagonal elements of qbz + tempr = sqr*szr - sqi*szi + tempi = sqr*szi + sqi*szr + b1r = cq*cz*b11 + tempr*b22 + b1i = tempi*b22 + b1a = stdlib_dlapy2( b1r, b1i ) + b2r = cq*cz*b22 + tempr*b11 + b2i = -tempi*b11 + b2a = stdlib_dlapy2( b2r, b2i ) + ! normalize so beta > 0, and im( alpha1 ) > 0 + beta( ilast-1 ) = b1a + beta( ilast ) = b2a + alphar( ilast-1 ) = ( wr*b1a )*s1inv + alphai( ilast-1 ) = ( wi*b1a )*s1inv + alphar( ilast ) = ( wr*b2a )*s1inv + alphai( ilast ) = -( wi*b2a )*s1inv + ! step 3: go to next block -- exit if finished. + ilast = ifirst - 1 + if( ilastilast )ifrstm = ilo + end if + go to 350 + else + ! usual case: 3x3 or larger block, using francis implicit + ! double-shift + ! 2 + ! eigenvalue equation is w - c w + d = 0, + ! -1 2 -1 + ! so compute 1st column of (a b ) - c a b + d + ! using the formula in qzit (from eispack) + ! we assume that the block is at least 3x3 + ad11 = ( ascale*h( ilast-1, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad21 = ( ascale*h( ilast, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad12 = ( ascale*h( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad22 = ( ascale*h( ilast, ilast ) ) /( bscale*t( ilast, ilast ) ) + u12 = t( ilast-1, ilast ) / t( ilast, ilast ) + ad11l = ( ascale*h( ifirst, ifirst ) ) /( bscale*t( ifirst, ifirst ) ) + ad21l = ( ascale*h( ifirst+1, ifirst ) ) /( bscale*t( ifirst, ifirst ) ) + ad12l = ( ascale*h( ifirst, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + ad22l = ( ascale*h( ifirst+1, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + ad32l = ( ascale*h( ifirst+2, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + u12l = t( ifirst, ifirst+1 ) / t( ifirst+1, ifirst+1 ) + v( 1 ) = ( ad11-ad11l )*( ad22-ad11l ) - ad12*ad21 +ad21*u12*ad11l + ( ad12l-& + ad11l*u12l )*ad21l + v( 2 ) = ( ( ad22l-ad11l )-ad21l*u12l-( ad11-ad11l )-( ad22-ad11l )+ad21*u12 )& + *ad21l + v( 3 ) = ad32l*ad21l + istart = ifirst + call stdlib_dlarfg( 3, v( 1 ), v( 2 ), 1, tau ) + v( 1 ) = one + ! sweep + loop_290: do j = istart, ilast - 2 + ! all but last elements: use 3x3 householder transforms. + ! zero (j-1)st column of a + if( j>istart ) then + v( 1 ) = h( j, j-1 ) + v( 2 ) = h( j+1, j-1 ) + v( 3 ) = h( j+2, j-1 ) + call stdlib_dlarfg( 3, h( j, j-1 ), v( 2 ), 1, tau ) + v( 1 ) = one + h( j+1, j-1 ) = zero + h( j+2, j-1 ) = zero + end if + do jc = j, ilastm + temp = tau*( h( j, jc )+v( 2 )*h( j+1, jc )+v( 3 )*h( j+2, jc ) ) + h( j, jc ) = h( j, jc ) - temp + h( j+1, jc ) = h( j+1, jc ) - temp*v( 2 ) + h( j+2, jc ) = h( j+2, jc ) - temp*v( 3 ) + temp2 = tau*( t( j, jc )+v( 2 )*t( j+1, jc )+v( 3 )*t( j+2, jc ) ) + t( j, jc ) = t( j, jc ) - temp2 + t( j+1, jc ) = t( j+1, jc ) - temp2*v( 2 ) + t( j+2, jc ) = t( j+2, jc ) - temp2*v( 3 ) + end do + if( ilq ) then + do jr = 1, n + temp = tau*( q( jr, j )+v( 2 )*q( jr, j+1 )+v( 3 )*q( jr, j+2 ) ) + + q( jr, j ) = q( jr, j ) - temp + q( jr, j+1 ) = q( jr, j+1 ) - temp*v( 2 ) + q( jr, j+2 ) = q( jr, j+2 ) - temp*v( 3 ) + end do + end if + ! zero j-th column of b (see dlagbc for details) + ! swap rows to pivot + ilpivt = .false. + temp = max( abs( t( j+1, j+1 ) ), abs( t( j+1, j+2 ) ) ) + temp2 = max( abs( t( j+2, j+1 ) ), abs( t( j+2, j+2 ) ) ) + if( max( temp, temp2 )=temp2 ) then + w11 = t( j+1, j+1 ) + w21 = t( j+2, j+1 ) + w12 = t( j+1, j+2 ) + w22 = t( j+2, j+2 ) + u1 = t( j+1, j ) + u2 = t( j+2, j ) + else + w21 = t( j+1, j+1 ) + w11 = t( j+2, j+1 ) + w22 = t( j+1, j+2 ) + w12 = t( j+2, j+2 ) + u2 = t( j+1, j ) + u1 = t( j+2, j ) + end if + ! swap columns if nec. + if( abs( w12 )>abs( w11 ) ) then + ilpivt = .true. + temp = w12 + temp2 = w22 + w12 = w11 + w22 = w21 + w11 = temp + w21 = temp2 + end if + ! lu-factor + temp = w21 / w11 + u2 = u2 - temp*u1 + w22 = w22 - temp*w12 + w21 = zero + ! compute scale + scale = one + if( abs( w22 ) DLABRD: reduces the first NB rows and columns of a real general + !> m by n matrix A to upper or lower bidiagonal form by an orthogonal + !> transformation Q**T * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by DGEBRD + + pure subroutine stdlib_dlabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldx, ldy, m, n, nb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), x(ldx,*), y(ldy,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( m>=n ) then + ! reduce to upper bidiagonal form + loop_10: do i = 1, nb + ! update a(i:m,i) + call stdlib_dgemv( 'NO TRANSPOSE', m-i+1, i-1, -one, a( i, 1 ),lda, y( i, 1 ), & + ldy, one, a( i, i ), 1 ) + call stdlib_dgemv( 'NO TRANSPOSE', m-i+1, i-1, -one, x( i, 1 ),ldx, a( 1, i ), 1,& + one, a( i, i ), 1 ) + ! generate reflection q(i) to annihilate a(i+1:m,i) + call stdlib_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,tauq( i ) ) + + d( i ) = a( i, i ) + if( i DLADIV: performs complex division in real arithmetic + !> a + i*b + !> p + i*q = --------- + !> c + i*d + !> The algorithm is due to Michael Baudin and Robert L. Smith + !> and can be found in the paper + !> "A Robust Complex Division in Scilab" + + pure subroutine stdlib_dladiv( a, b, c, d, p, q ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: a, b, c, d + real(dp), intent(out) :: p, q + ! ===================================================================== + ! Parameters + real(dp), parameter :: bs = 2.0_dp + + + + ! Local Scalars + real(dp) :: aa, bb, cc, dd, ab, cd, s, ov, un, be, eps + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + aa = a + bb = b + cc = c + dd = d + ab = max( abs(a), abs(b) ) + cd = max( abs(c), abs(d) ) + s = one + ov = stdlib_dlamch( 'OVERFLOW THRESHOLD' ) + un = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'EPSILON' ) + be = bs / (eps*eps) + if( ab >= half*ov ) then + aa = half * aa + bb = half * bb + s = two * s + end if + if( cd >= half*ov ) then + cc = half * cc + dd = half * dd + s = half * s + end if + if( ab <= un*bs/eps ) then + aa = aa * be + bb = bb * be + s = s / be + end if + if( cd <= un*bs/eps ) then + cc = cc * be + dd = dd * be + s = s * be + end if + if( abs( d )<=abs( c ) ) then + call stdlib_dladiv1(aa, bb, cc, dd, p, q) + else + call stdlib_dladiv1(bb, aa, dd, cc, p, q) + q = -q + end if + p = p * s + q = q * s + return + end subroutine stdlib_dladiv + + !> This subroutine computes the I-th updated eigenvalue of a symmetric + !> rank-one modification to a diagonal matrix whose elements are + !> given in the array d, and that + !> D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + + pure subroutine stdlib_dlaed4( n, i, d, z, delta, rho, dlam, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i, n + integer(ilp), intent(out) :: info + real(dp), intent(out) :: dlam + real(dp), intent(in) :: rho + ! Array Arguments + real(dp), intent(in) :: d(*), z(*) + real(dp), intent(out) :: delta(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + ! Local Scalars + logical(lk) :: orgati, swtch, swtch3 + integer(ilp) :: ii, iim1, iip1, ip1, iter, j, niter + real(dp) :: a, b, c, del, dltlb, dltub, dphi, dpsi, dw, eps, erretm, eta, midpt, phi, & + prew, psi, rhoinv, tau, temp, temp1, w + ! Local Arrays + real(dp) :: zz(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! since this routine is called in an inner loop, we do no argument + ! checking. + ! quick return for n=1 and 2. + info = 0 + if( n==1 ) then + ! presumably, i=1 upon entry + dlam = d( 1 ) + rho*z( 1 )*z( 1 ) + delta( 1 ) = one + return + end if + if( n==2 ) then + call stdlib_dlaed5( i, d, z, delta, rho, dlam ) + return + end if + ! compute machine epsilon + eps = stdlib_dlamch( 'EPSILON' ) + rhoinv = one / rho + ! the case i = n + if( i==n ) then + ! initialize some basic variables + ii = n - 1 + niter = 1 + ! calculate initial guess + midpt = rho / two + ! if ||z||_2 is not one, then temp should be set to + ! rho * ||z||_2^2 / two + do j = 1, n + delta( j ) = ( d( j )-d( i ) ) - midpt + end do + psi = zero + do j = 1, n - 2 + psi = psi + z( j )*z( j ) / delta( j ) + end do + c = rhoinv + psi + w = c + z( ii )*z( ii ) / delta( ii ) +z( n )*z( n ) / delta( n ) + if( w<=zero ) then + temp = z( n-1 )*z( n-1 ) / ( d( n )-d( n-1 )+rho ) +z( n )*z( n ) / rho + if( c<=temp ) then + tau = rho + else + del = d( n ) - d( n-1 ) + a = -c*del + z( n-1 )*z( n-1 ) + z( n )*z( n ) + b = z( n )*z( n )*del + if( a=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = tau + eta + if( temp>dltub .or. temp=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = tau + eta + if( temp>dltub .or. tempzero ) then + ! d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + ! we choose d(i) as origin. + orgati = .true. + a = c*del + z( i )*z( i ) + z( ip1 )*z( ip1 ) + b = z( i )*z( i )*del + if( a>zero ) then + tau = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + else + tau = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + end if + dltlb = zero + dltub = midpt + else + ! (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + ! we choose d(i+1) as origin. + orgati = .false. + a = c*del - z( i )*z( i ) - z( ip1 )*z( ip1 ) + b = z( ip1 )*z( ip1 )*del + if( azero )swtch3 = .true. + end if + if( ii==1 .or. ii==n )swtch3 = .false. + temp = z( ii ) / delta( ii ) + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = w + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv +three*abs( temp ) + abs( tau )& + *dw + ! test for convergence + if( abs( w )<=eps*erretm ) then + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + go to 250 + end if + if( w<=zero ) then + dltlb = max( dltlb, tau ) + else + dltub = min( dltub, tau ) + end if + ! calculate the new step + niter = niter + 1 + if( .not.swtch3 ) then + if( orgati ) then + c = w - delta( ip1 )*dw - ( d( i )-d( ip1 ) )*( z( i ) / delta( i ) )& + **2 + else + c = w - delta( i )*dw - ( d( ip1 )-d( i ) )*( z( ip1 ) / delta( ip1 ) )& + **2 + end if + a = ( delta( i )+delta( ip1 ) )*w -delta( i )*delta( ip1 )*dw + b = delta( i )*delta( ip1 )*w + if( c==zero ) then + if( a==zero ) then + if( orgati ) then + a = z( i )*z( i ) + delta( ip1 )*delta( ip1 )*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) + delta( i )*delta( i )*( dpsi+dphi ) + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + temp = rhoinv + psi + phi + if( orgati ) then + temp1 = z( iim1 ) / delta( iim1 ) + temp1 = temp1*temp1 + c = temp - delta( iip1 )*( dpsi+dphi ) -( d( iim1 )-d( iip1 ) )*temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + zz( 3 ) = delta( iip1 )*delta( iip1 )*( ( dpsi-temp1 )+dphi ) + else + temp1 = z( iip1 ) / delta( iip1 ) + temp1 = temp1*temp1 + c = temp - delta( iim1 )*( dpsi+dphi ) -( d( iip1 )-d( iim1 ) )*temp1 + zz( 1 ) = delta( iim1 )*delta( iim1 )*( dpsi+( dphi-temp1 ) ) + zz( 3 ) = z( iip1 )*z( iip1 ) + end if + zz( 2 ) = z( ii )*z( ii ) + call stdlib_dlaed6( niter, orgati, c, delta( iim1 ), zz, w, eta,info ) + if( info/=0 )go to 250 + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + temp = tau + eta + if( temp>dltub .or. tempabs( prew ) / ten )swtch = .true. + else + if( w>abs( prew ) / ten )swtch = .true. + end if + tau = tau + eta + ! main loop to update the values of the array delta + iter = niter + 1 + loop_240: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + go to 250 + end if + if( w<=zero ) then + dltlb = max( dltlb, tau ) + else + dltub = min( dltub, tau ) + end if + ! calculate the new step + if( .not.swtch3 ) then + if( .not.swtch ) then + if( orgati ) then + c = w - delta( ip1 )*dw -( d( i )-d( ip1 ) )*( z( i ) / delta( i ) )& + **2 + else + c = w - delta( i )*dw - ( d( ip1 )-d( i ) )*( z( ip1 ) / delta( ip1 ) )& + **2 + end if + else + temp = z( ii ) / delta( ii ) + if( orgati ) then + dpsi = dpsi + temp*temp + else + dphi = dphi + temp*temp + end if + c = w - delta( i )*dpsi - delta( ip1 )*dphi + end if + a = ( delta( i )+delta( ip1 ) )*w -delta( i )*delta( ip1 )*dw + b = delta( i )*delta( ip1 )*w + if( c==zero ) then + if( a==zero ) then + if( .not.swtch ) then + if( orgati ) then + a = z( i )*z( i ) + delta( ip1 )*delta( ip1 )*( dpsi+dphi ) + + else + a = z( ip1 )*z( ip1 ) +delta( i )*delta( i )*( dpsi+dphi ) + end if + else + a = delta( i )*delta( i )*dpsi +delta( ip1 )*delta( ip1 )& + *dphi + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + temp = rhoinv + psi + phi + if( swtch ) then + c = temp - delta( iim1 )*dpsi - delta( iip1 )*dphi + zz( 1 ) = delta( iim1 )*delta( iim1 )*dpsi + zz( 3 ) = delta( iip1 )*delta( iip1 )*dphi + else + if( orgati ) then + temp1 = z( iim1 ) / delta( iim1 ) + temp1 = temp1*temp1 + c = temp - delta( iip1 )*( dpsi+dphi ) -( d( iim1 )-d( iip1 ) )& + *temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + zz( 3 ) = delta( iip1 )*delta( iip1 )*( ( dpsi-temp1 )+dphi ) + else + temp1 = z( iip1 ) / delta( iip1 ) + temp1 = temp1*temp1 + c = temp - delta( iim1 )*( dpsi+dphi ) -( d( iip1 )-d( iim1 ) )& + *temp1 + zz( 1 ) = delta( iim1 )*delta( iim1 )*( dpsi+( dphi-temp1 ) ) + zz( 3 ) = z( iip1 )*z( iip1 ) + end if + end if + call stdlib_dlaed6( niter, orgati, c, delta( iim1 ), zz, w, eta,info ) + if( info/=0 )go to 250 + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + temp = tau + eta + if( temp>dltub .or. tempzero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch + end do loop_240 + ! return with info = 1, niter = maxit and not converged + info = 1 + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + end if + 250 continue + return + end subroutine stdlib_dlaed4 + + !> DLAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_dlaed8( icompq, k, n, qsiz, d, q, ldq, indxq, rho,cutpnt, z, dlamda, & + q2, ldq2, w, perm, givptr,givcol, givnum, indxp, indx, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, icompq, ldq, ldq2, n, qsiz + integer(ilp), intent(out) :: givptr, info, k + real(dp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: givcol(2,*), indx(*), indxp(*), perm(*) + integer(ilp), intent(inout) :: indxq(*) + real(dp), intent(inout) :: d(*), q(ldq,*), z(*) + real(dp), intent(out) :: dlamda(*), givnum(2,*), q2(ldq2,*), w(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: mone = -1.0_dp + + ! Local Scalars + integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2 + real(dp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>1 ) then + info = -1 + else if( n<0 ) then + info = -3 + else if( icompq==1 .and. qsizn ) then + info = -10 + else if( ldq2n )go to 100 + if( rho*abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + indxp( k2 ) = j + else + ! check if eigenvalues are close enough to allow deflation. + s = z( jlam ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_dlapy2( c, s ) + t = d( j ) - d( jlam ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( j ) = tau + z( jlam ) = zero + ! record the appropriate givens rotation + givptr = givptr + 1 + givcol( 1, givptr ) = indxq( indx( jlam ) ) + givcol( 2, givptr ) = indxq( indx( j ) ) + givnum( 1, givptr ) = c + givnum( 2, givptr ) = s + if( icompq==1 ) then + call stdlib_drot( qsiz, q( 1, indxq( indx( jlam ) ) ), 1,q( 1, indxq( indx( j & + ) ) ), 1, c, s ) + end if + t = d( jlam )*c*c + d( j )*s*s + d( j ) = d( jlam )*s*s + d( j )*c*c + d( jlam ) = t + k2 = k2 - 1 + i = 1 + 90 continue + if( k2+i<=n ) then + if( d( jlam ) DLAED9: finds the roots of the secular equation, as defined by the + !> values in D, Z, and RHO, between KSTART and KSTOP. It makes the + !> appropriate calls to DLAED4 and then stores the new matrix of + !> eigenvectors for use in calculating the next level of Z vectors. + + pure subroutine stdlib_dlaed9( k, kstart, kstop, n, d, q, ldq, rho, dlamda, w,s, lds, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, kstart, kstop, ldq, lds, n + real(dp), intent(in) :: rho + ! Array Arguments + real(dp), intent(out) :: d(*), q(ldq,*), s(lds,*) + real(dp), intent(inout) :: dlamda(*), w(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: temp + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( k<0 ) then + info = -1 + else if( kstart<1 .or. kstart>max( 1, k ) ) then + info = -2 + else if( max( 1, kstop )max( 1, k ) )then + info = -3 + else if( n DLAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg + !> matrix H. + + pure subroutine stdlib_dlaein( rightv, noinit, n, h, ldh, wr, wi, vr, vi, b,ldb, work, eps3, & + smlnum, bignum, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: noinit, rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldh, n + real(dp), intent(in) :: bignum, eps3, smlnum, wi, wr + ! Array Arguments + real(dp), intent(out) :: b(ldb,*), work(*) + real(dp), intent(in) :: h(ldh,*) + real(dp), intent(inout) :: vi(*), vr(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: tenth = 1.0e-1_dp + + ! Local Scalars + character :: normin, trans + integer(ilp) :: i, i1, i2, i3, ierr, its, j + real(dp) :: absbii, absbjj, ei, ej, growto, norm, nrmsml, rec, rootn, scale, temp, & + vcrit, vmax, vnorm, w, w1, x, xi, xr, y + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Executable Statements + info = 0 + ! growto is the threshold used in the acceptance test for an + ! eigenvector. + rootn = sqrt( real( n,KIND=dp) ) + growto = tenth / rootn + nrmsml = max( one, eps3*rootn )*smlnum + ! form b = h - (wr,wi)*i (except that the subdiagonal elements and + ! the imaginary parts of the diagonal elements are not stored). + do j = 1, n + do i = 1, j - 1 + b( i, j ) = h( i, j ) + end do + b( j, j ) = h( j, j ) - wr + end do + if( wi==zero ) then + ! real eigenvalue. + if( noinit ) then + ! set initial vector. + do i = 1, n + vr( i ) = eps3 + end do + else + ! scale supplied initial vector. + vnorm = stdlib_dnrm2( n, vr, 1 ) + call stdlib_dscal( n, ( eps3*rootn ) / max( vnorm, nrmsml ), vr,1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing zero + ! pivots by eps3. + do i = 1, n - 1 + ei = h( i+1, i ) + if( abs( b( i, i ) )=growto*scale )go to 120 + ! choose new orthogonal starting vector and try again. + temp = eps3 / ( rootn+one ) + vr( 1 ) = eps3 + do i = 2, n + vr( i ) = temp + end do + vr( n-its+1 ) = vr( n-its+1 ) - eps3*rootn + end do + ! failure to find eigenvector in n iterations. + info = 1 + 120 continue + ! normalize eigenvector. + i = stdlib_idamax( n, vr, 1 ) + call stdlib_dscal( n, one / abs( vr( i ) ), vr, 1 ) + else + ! complex eigenvalue. + if( noinit ) then + ! set initial vector. + do i = 1, n + vr( i ) = eps3 + vi( i ) = zero + end do + else + ! scale supplied initial vector. + norm = stdlib_dlapy2( stdlib_dnrm2( n, vr, 1 ), stdlib_dnrm2( n, vi, 1 ) ) + + rec = ( eps3*rootn ) / max( norm, nrmsml ) + call stdlib_dscal( n, rec, vr, 1 ) + call stdlib_dscal( n, rec, vi, 1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing zero + ! pivots by eps3. + ! the imaginary part of the (i,j)-th element of u is stored in + ! b(j+1,i). + b( 2, 1 ) = -wi + do i = 2, n + b( i+1, 1 ) = zero + end do + loop_170: do i = 1, n - 1 + absbii = stdlib_dlapy2( b( i, i ), b( i+1, i ) ) + ei = h( i+1, i ) + if( absbiivcrit ) then + rec = one / vmax + call stdlib_dscal( n, rec, vr, 1 ) + call stdlib_dscal( n, rec, vi, 1 ) + scale = scale*rec + vmax = one + vcrit = bignum + end if + xr = vr( i ) + xi = vi( i ) + if( rightv ) then + do j = i + 1, n + xr = xr - b( i, j )*vr( j ) + b( j+1, i )*vi( j ) + xi = xi - b( i, j )*vi( j ) - b( j+1, i )*vr( j ) + end do + else + do j = 1, i - 1 + xr = xr - b( j, i )*vr( j ) + b( i+1, j )*vi( j ) + xi = xi - b( j, i )*vi( j ) - b( i+1, j )*vr( j ) + end do + end if + w = abs( b( i, i ) ) + abs( b( i+1, i ) ) + if( w>smlnum ) then + if( ww*bignum ) then + rec = one / w1 + call stdlib_dscal( n, rec, vr, 1 ) + call stdlib_dscal( n, rec, vi, 1 ) + xr = vr( i ) + xi = vi( i ) + scale = scale*rec + vmax = vmax*rec + end if + end if + ! divide by diagonal element of b. + call stdlib_dladiv( xr, xi, b( i, i ), b( i+1, i ), vr( i ),vi( i ) ) + + vmax = max( abs( vr( i ) )+abs( vi( i ) ), vmax ) + vcrit = bignum / vmax + else + do j = 1, n + vr( j ) = zero + vi( j ) = zero + end do + vr( i ) = one + vi( i ) = one + scale = zero + vmax = one + vcrit = bignum + end if + end do loop_250 + ! test for sufficient growth in the norm of (vr,vi). + vnorm = stdlib_dasum( n, vr, 1 ) + stdlib_dasum( n, vi, 1 ) + if( vnorm>=growto*scale )go to 280 + ! choose a new orthogonal starting vector and try again. + y = eps3 / ( rootn+one ) + vr( 1 ) = eps3 + vi( 1 ) = zero + do i = 2, n + vr( i ) = y + vi( i ) = zero + end do + vr( n-its+1 ) = vr( n-its+1 ) - eps3*rootn + end do loop_270 + ! failure to find eigenvector in n iterations + info = 1 + 280 continue + ! normalize eigenvector. + vnorm = zero + do i = 1, n + vnorm = max( vnorm, abs( vr( i ) )+abs( vi( i ) ) ) + end do + call stdlib_dscal( n, one / vnorm, vr, 1 ) + call stdlib_dscal( n, one / vnorm, vi, 1 ) + end if + return + end subroutine stdlib_dlaein + + !> DLAGV2: computes the Generalized Schur factorization of a real 2-by-2 + !> matrix pencil (A,B) where B is upper triangular. This routine + !> computes orthogonal (rotation) matrices given by CSL, SNL and CSR, + !> SNR such that + !> 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 + !> types), then + !> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] + !> [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] + !> [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] + !> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], + !> 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, + !> then + !> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] + !> [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] + !> [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] + !> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] + !> where b11 >= b22 > 0. + + pure subroutine stdlib_dlagv2( a, lda, b, ldb, alphar, alphai, beta, csl, snl,csr, snr ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb + real(dp), intent(out) :: csl, csr, snl, snr + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(2), alphar(2), beta(2) + ! ===================================================================== + + ! Local Scalars + real(dp) :: anorm, ascale, bnorm, bscale, h1, h2, h3, qq, r, rr, safmin, scale1, & + scale2, t, ulp, wi, wr1, wr2 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + safmin = stdlib_dlamch( 'S' ) + ulp = stdlib_dlamch( 'P' ) + ! scale a + anorm = max( abs( a( 1, 1 ) )+abs( a( 2, 1 ) ),abs( a( 1, 2 ) )+abs( a( 2, 2 ) ), & + safmin ) + ascale = one / anorm + a( 1, 1 ) = ascale*a( 1, 1 ) + a( 1, 2 ) = ascale*a( 1, 2 ) + a( 2, 1 ) = ascale*a( 2, 1 ) + a( 2, 2 ) = ascale*a( 2, 2 ) + ! scale b + bnorm = max( abs( b( 1, 1 ) ), abs( b( 1, 2 ) )+abs( b( 2, 2 ) ),safmin ) + bscale = one / bnorm + b( 1, 1 ) = bscale*b( 1, 1 ) + b( 1, 2 ) = bscale*b( 1, 2 ) + b( 2, 2 ) = bscale*b( 2, 2 ) + ! check if a can be deflated + if( abs( a( 2, 1 ) )<=ulp ) then + csl = one + snl = zero + csr = one + snr = zero + a( 2, 1 ) = zero + b( 2, 1 ) = zero + wi = zero + ! check if b is singular + else if( abs( b( 1, 1 ) )<=ulp ) then + call stdlib_dlartg( a( 1, 1 ), a( 2, 1 ), csl, snl, r ) + csr = one + snr = zero + call stdlib_drot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_drot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + a( 2, 1 ) = zero + b( 1, 1 ) = zero + b( 2, 1 ) = zero + wi = zero + else if( abs( b( 2, 2 ) )<=ulp ) then + call stdlib_dlartg( a( 2, 2 ), a( 2, 1 ), csr, snr, t ) + snr = -snr + call stdlib_drot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_drot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + csl = one + snl = zero + a( 2, 1 ) = zero + b( 2, 1 ) = zero + b( 2, 2 ) = zero + wi = zero + else + ! b is nonsingular, first compute the eigenvalues of (a,b) + call stdlib_dlag2( a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2,wi ) + if( wi==zero ) then + ! two real eigenvalues, compute s*a-w*b + h1 = scale1*a( 1, 1 ) - wr1*b( 1, 1 ) + h2 = scale1*a( 1, 2 ) - wr1*b( 1, 2 ) + h3 = scale1*a( 2, 2 ) - wr1*b( 2, 2 ) + rr = stdlib_dlapy2( h1, h2 ) + qq = stdlib_dlapy2( scale1*a( 2, 1 ), h3 ) + if( rr>qq ) then + ! find right rotation matrix to zero 1,1 element of + ! (sa - wb) + call stdlib_dlartg( h2, h1, csr, snr, t ) + else + ! find right rotation matrix to zero 2,1 element of + ! (sa - wb) + call stdlib_dlartg( h3, scale1*a( 2, 1 ), csr, snr, t ) + end if + snr = -snr + call stdlib_drot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_drot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + ! compute inf norms of a and b + h1 = max( abs( a( 1, 1 ) )+abs( a( 1, 2 ) ),abs( a( 2, 1 ) )+abs( a( 2, 2 ) ) ) + + h2 = max( abs( b( 1, 1 ) )+abs( b( 1, 2 ) ),abs( b( 2, 1 ) )+abs( b( 2, 2 ) ) ) + + if( ( scale1*h1 )>=abs( wr1 )*h2 ) then + ! find left rotation matrix q to zero out b(2,1) + call stdlib_dlartg( b( 1, 1 ), b( 2, 1 ), csl, snl, r ) + else + ! find left rotation matrix q to zero out a(2,1) + call stdlib_dlartg( a( 1, 1 ), a( 2, 1 ), csl, snl, r ) + end if + call stdlib_drot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_drot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + a( 2, 1 ) = zero + b( 2, 1 ) = zero + else + ! a pair of complex conjugate eigenvalues + ! first compute the svd of the matrix b + call stdlib_dlasv2( b( 1, 1 ), b( 1, 2 ), b( 2, 2 ), r, t, snr,csr, snl, csl ) + + ! form (a,b) := q(a,b)z**t where q is left rotation matrix and + ! z is right rotation matrix computed from stdlib_dlasv2 + call stdlib_drot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_drot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + call stdlib_drot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_drot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + b( 2, 1 ) = zero + b( 1, 2 ) = zero + end if + end if + ! unscaling + a( 1, 1 ) = anorm*a( 1, 1 ) + a( 2, 1 ) = anorm*a( 2, 1 ) + a( 1, 2 ) = anorm*a( 1, 2 ) + a( 2, 2 ) = anorm*a( 2, 2 ) + b( 1, 1 ) = bnorm*b( 1, 1 ) + b( 2, 1 ) = bnorm*b( 2, 1 ) + b( 1, 2 ) = bnorm*b( 1, 2 ) + b( 2, 2 ) = bnorm*b( 2, 2 ) + if( wi==zero ) then + alphar( 1 ) = a( 1, 1 ) + alphar( 2 ) = a( 2, 2 ) + alphai( 1 ) = zero + alphai( 2 ) = zero + beta( 1 ) = b( 1, 1 ) + beta( 2 ) = b( 2, 2 ) + else + alphar( 1 ) = anorm*wr1 / scale1 / bnorm + alphai( 1 ) = anorm*wi / scale1 / bnorm + alphar( 2 ) = alphar( 1 ) + alphai( 2 ) = -alphai( 1 ) + beta( 1 ) = one + beta( 2 ) = one + end if + return + end subroutine stdlib_dlagv2 + + !> DLAHR2: reduces the first NB columns of A real general n-BY-(n-k+1) + !> matrix A so that elements below the k-th subdiagonal are zero. The + !> reduction is performed by an orthogonal similarity transformation + !> Q**T * A * Q. The routine returns the matrices V and T which determine + !> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. + !> This is an auxiliary routine called by DGEHRD. + + pure subroutine stdlib_dlahr2( n, k, nb, a, lda, tau, t, ldt, y, ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: k, lda, ldt, ldy, n, nb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,nb), tau(nb), y(ldy,nb) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: ei + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( n<=1 )return + loop_10: do i = 1, nb + if( i>1 ) then + ! update a(k+1:n,i) + ! update i-th column of a - y * v**t + call stdlib_dgemv( 'NO TRANSPOSE', n-k, i-1, -one, y(k+1,1), ldy,a( k+i-1, 1 ), & + lda, one, a( k+1, i ), 1 ) + ! apply i - v * t**t * v**t to this column (call it b) from the + ! left, using the last column of t as workspace + ! let v = ( v1 ) and b = ( b1 ) (first i-1 rows) + ! ( v2 ) ( b2 ) + ! where v1 is unit lower triangular + ! w := v1**t * b1 + call stdlib_dcopy( i-1, a( k+1, i ), 1, t( 1, nb ), 1 ) + call stdlib_dtrmv( 'LOWER', 'TRANSPOSE', 'UNIT',i-1, a( k+1, 1 ),lda, t( 1, nb ),& + 1 ) + ! w := w + v2**t * b2 + call stdlib_dgemv( 'TRANSPOSE', n-k-i+1, i-1,one, a( k+i, 1 ),lda, a( k+i, i ), & + 1, one, t( 1, nb ), 1 ) + ! w := t**t * w + call stdlib_dtrmv( 'UPPER', 'TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, nb ), 1 ) + + ! b2 := b2 - v2*w + call stdlib_dgemv( 'NO TRANSPOSE', n-k-i+1, i-1, -one,a( k+i, 1 ),lda, t( 1, nb )& + , 1, one, a( k+i, i ), 1 ) + ! b1 := b1 - v1*w + call stdlib_dtrmv( 'LOWER', 'NO TRANSPOSE','UNIT', i-1,a( k+1, 1 ), lda, t( 1, & + nb ), 1 ) + call stdlib_daxpy( i-1, -one, t( 1, nb ), 1, a( k+1, i ), 1 ) + a( k+i-1, i-1 ) = ei + end if + ! generate the elementary reflector h(i) to annihilate + ! a(k+i+1:n,i) + call stdlib_dlarfg( n-k-i+1, a( k+i, i ), a( min( k+i+1, n ), i ), 1,tau( i ) ) + + ei = a( k+i, i ) + a( k+i, i ) = one + ! compute y(k+1:n,i) + call stdlib_dgemv( 'NO TRANSPOSE', n-k, n-k-i+1,one, a( k+1, i+1 ),lda, a( k+i, i ),& + 1, zero, y( k+1, i ), 1 ) + call stdlib_dgemv( 'TRANSPOSE', n-k-i+1, i-1,one, a( k+i, 1 ), lda,a( k+i, i ), 1, & + zero, t( 1, i ), 1 ) + call stdlib_dgemv( 'NO TRANSPOSE', n-k, i-1, -one,y( k+1, 1 ), ldy,t( 1, i ), 1, & + one, y( k+1, i ), 1 ) + call stdlib_dscal( n-k, tau( i ), y( k+1, i ), 1 ) + ! compute t(1:i,i) + call stdlib_dscal( i-1, -tau( i ), t( 1, i ), 1 ) + call stdlib_dtrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, i ), 1 ) + + t( i, i ) = tau( i ) + end do loop_10 + a( k+nb, nb ) = ei + ! compute y(1:k,1:nb) + call stdlib_dlacpy( 'ALL', k, nb, a( 1, 2 ), lda, y, ldy ) + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE','UNIT', k, nb,one, a( k+1, 1 ), & + lda, y, ldy ) + if( n>k+nb )call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', k,nb, n-k-nb, one,a( 1, & + 2+nb ), lda, a( k+1+nb, 1 ), lda, one, y,ldy ) + call stdlib_dtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE','NON-UNIT', k, nb,one, t, ldt, y, & + ldy ) + return + end subroutine stdlib_dlahr2 + + !> DLALN2: solves a system of the form (ca A - w D ) X = s B + !> or (ca A**T - w D) X = s B with possible scaling ("s") and + !> perturbation of A. (A**T means A-transpose.) + !> A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + !> real diagonal matrix, w is a real or complex value, and X and B are + !> NA x 1 matrices -- real if w is real, complex if w is complex. NA + !> may be 1 or 2. + !> If w is complex, X and B are represented as NA x 2 matrices, + !> the first column of each being the real part and the second + !> being the imaginary part. + !> "s" is a scaling factor (<= 1), computed by DLALN2, which is + !> so chosen that X can be computed without overflow. X is further + !> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + !> than overflow. + !> If both singular values of (ca A - w D) are less than SMIN, + !> SMIN*identity will be used instead of (ca A - w D). If only one + !> singular value is less than SMIN, one element of (ca A - w D) will be + !> perturbed enough to make the smallest singular value roughly SMIN. + !> If both singular values are at least SMIN, (ca A - w D) will not be + !> perturbed. In any case, the perturbation will be at most some small + !> multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + !> are computed by infinity-norm approximations, and thus will only be + !> correct to a factor of 2 or so. + !> Note: all input quantities are assumed to be smaller than overflow + !> by a reasonable factor. (See BIGNUM.) + + pure subroutine stdlib_dlaln2( ltrans, na, nw, smin, ca, a, lda, d1, d2, b,ldb, wr, wi, x, & + ldx, scale, xnorm, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ltrans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, na, nw + real(dp), intent(in) :: ca, d1, d2, smin, wi, wr + real(dp), intent(out) :: scale, xnorm + ! Array Arguments + real(dp), intent(in) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: icmax, j + real(dp) :: bbnd, bi1, bi2, bignum, bnorm, br1, br2, ci21, ci22, cmax, cnorm, cr21, & + cr22, csi, csr, li21, lr21, smini, smlnum, temp, u22abs, ui11, ui11r, ui12, ui12s, & + ui22, ur11, ur11r, ur12, ur12s, ur22, xi1, xi2, xr1, xr2 + ! Local Arrays + logical(lk) :: rswap(4), zswap(4) + integer(ilp) :: ipivot(4,4) + real(dp) :: ci(2,2), civ(4), cr(2,2), crv(4) + ! Intrinsic Functions + intrinsic :: abs,max + ! Equivalences + equivalence ( ci( 1, 1 ), civ( 1 ) ),( cr( 1, 1 ), crv( 1 ) ) + ! Data Statements + zswap = [.false.,.false.,.true.,.true.] + rswap = [.false.,.true.,.false.,.true.] + ipivot = reshape([1,2,3,4,2,1,4,3,3,4,1,2,4,3,2,1],[4,4]) + ! Executable Statements + ! compute bignum + smlnum = two*stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + smini = max( smin, smlnum ) + ! don't check for input errors + info = 0 + ! standard initializations + scale = one + if( na==1 ) then + ! 1 x 1 (i.e., scalar) system c x = b + if( nw==1 ) then + ! real 1x1 system. + ! c = ca a - w d + csr = ca*a( 1, 1 ) - wr*d1 + cnorm = abs( csr ) + ! if | c | < smini, use c = smini + if( cnormone ) then + if( bnorm>bignum*cnorm )scale = one / bnorm + end if + ! compute x + x( 1, 1 ) = ( b( 1, 1 )*scale ) / csr + xnorm = abs( x( 1, 1 ) ) + else + ! complex 1x1 system (w is complex) + ! c = ca a - w d + csr = ca*a( 1, 1 ) - wr*d1 + csi = -wi*d1 + cnorm = abs( csr ) + abs( csi ) + ! if | c | < smini, use c = smini + if( cnormone ) then + if( bnorm>bignum*cnorm )scale = one / bnorm + end if + ! compute x + call stdlib_dladiv( scale*b( 1, 1 ), scale*b( 1, 2 ), csr, csi,x( 1, 1 ), x( 1, & + 2 ) ) + xnorm = abs( x( 1, 1 ) ) + abs( x( 1, 2 ) ) + end if + else + ! 2x2 system + ! compute the realpart of c = ca a - w d (or ca a**t - w d,KIND=dp) + cr( 1, 1 ) = ca*a( 1, 1 ) - wr*d1 + cr( 2, 2 ) = ca*a( 2, 2 ) - wr*d2 + if( ltrans ) then + cr( 1, 2 ) = ca*a( 2, 1 ) + cr( 2, 1 ) = ca*a( 1, 2 ) + else + cr( 2, 1 ) = ca*a( 2, 1 ) + cr( 1, 2 ) = ca*a( 1, 2 ) + end if + if( nw==1 ) then + ! real2x2 system (w is real,KIND=dp) + ! find the largest element in c + cmax = zero + icmax = 0 + do j = 1, 4 + if( abs( crv( j ) )>cmax ) then + cmax = abs( crv( j ) ) + icmax = j + end if + end do + ! if norm(c) < smini, use smini*identity. + if( cmaxone ) then + if( bnorm>bignum*smini )scale = one / bnorm + end if + temp = scale / smini + x( 1, 1 ) = temp*b( 1, 1 ) + x( 2, 1 ) = temp*b( 2, 1 ) + xnorm = temp*bnorm + info = 1 + return + end if + ! gaussian elimination with complete pivoting. + ur11 = crv( icmax ) + cr21 = crv( ipivot( 2, icmax ) ) + ur12 = crv( ipivot( 3, icmax ) ) + cr22 = crv( ipivot( 4, icmax ) ) + ur11r = one / ur11 + lr21 = ur11r*cr21 + ur22 = cr22 - ur12*lr21 + ! if smaller pivot < smini, use smini + if( abs( ur22 )one .and. abs( ur22 )=bignum*abs( ur22 ) )scale = one / bbnd + end if + xr2 = ( br2*scale ) / ur22 + xr1 = ( scale*br1 )*ur11r - xr2*( ur11r*ur12 ) + if( zswap( icmax ) ) then + x( 1, 1 ) = xr2 + x( 2, 1 ) = xr1 + else + x( 1, 1 ) = xr1 + x( 2, 1 ) = xr2 + end if + xnorm = max( abs( xr1 ), abs( xr2 ) ) + ! further scaling if norm(a) norm(x) > overflow + if( xnorm>one .and. cmax>one ) then + if( xnorm>bignum / cmax ) then + temp = cmax / bignum + x( 1, 1 ) = temp*x( 1, 1 ) + x( 2, 1 ) = temp*x( 2, 1 ) + xnorm = temp*xnorm + scale = temp*scale + end if + end if + else + ! complex 2x2 system (w is complex) + ! find the largest element in c + ci( 1, 1 ) = -wi*d1 + ci( 2, 1 ) = zero + ci( 1, 2 ) = zero + ci( 2, 2 ) = -wi*d2 + cmax = zero + icmax = 0 + do j = 1, 4 + if( abs( crv( j ) )+abs( civ( j ) )>cmax ) then + cmax = abs( crv( j ) ) + abs( civ( j ) ) + icmax = j + end if + end do + ! if norm(c) < smini, use smini*identity. + if( cmaxone ) then + if( bnorm>bignum*smini )scale = one / bnorm + end if + temp = scale / smini + x( 1, 1 ) = temp*b( 1, 1 ) + x( 2, 1 ) = temp*b( 2, 1 ) + x( 1, 2 ) = temp*b( 1, 2 ) + x( 2, 2 ) = temp*b( 2, 2 ) + xnorm = temp*bnorm + info = 1 + return + end if + ! gaussian elimination with complete pivoting. + ur11 = crv( icmax ) + ui11 = civ( icmax ) + cr21 = crv( ipivot( 2, icmax ) ) + ci21 = civ( ipivot( 2, icmax ) ) + ur12 = crv( ipivot( 3, icmax ) ) + ui12 = civ( ipivot( 3, icmax ) ) + cr22 = crv( ipivot( 4, icmax ) ) + ci22 = civ( ipivot( 4, icmax ) ) + if( icmax==1 .or. icmax==4 ) then + ! code when off-diagonals of pivoted c are real + if( abs( ur11 )>abs( ui11 ) ) then + temp = ui11 / ur11 + ur11r = one / ( ur11*( one+temp**2 ) ) + ui11r = -temp*ur11r + else + temp = ur11 / ui11 + ui11r = -one / ( ui11*( one+temp**2 ) ) + ur11r = -temp*ui11r + end if + lr21 = cr21*ur11r + li21 = cr21*ui11r + ur12s = ur12*ur11r + ui12s = ur12*ui11r + ur22 = cr22 - ur12*lr21 + ui22 = ci22 - ur12*li21 + else + ! code when diagonals of pivoted c are real + ur11r = one / ur11 + ui11r = zero + lr21 = cr21*ur11r + li21 = ci21*ur11r + ur12s = ur12*ur11r + ui12s = ui12*ur11r + ur22 = cr22 - ur12*lr21 + ui12*li21 + ui22 = -ur12*li21 - ui12*lr21 + end if + u22abs = abs( ur22 ) + abs( ui22 ) + ! if smaller pivot < smini, use smini + if( u22absone .and. u22abs=bignum*u22abs ) then + scale = one / bbnd + br1 = scale*br1 + bi1 = scale*bi1 + br2 = scale*br2 + bi2 = scale*bi2 + end if + end if + call stdlib_dladiv( br2, bi2, ur22, ui22, xr2, xi2 ) + xr1 = ur11r*br1 - ui11r*bi1 - ur12s*xr2 + ui12s*xi2 + xi1 = ui11r*br1 + ur11r*bi1 - ui12s*xr2 - ur12s*xi2 + if( zswap( icmax ) ) then + x( 1, 1 ) = xr2 + x( 2, 1 ) = xr1 + x( 1, 2 ) = xi2 + x( 2, 2 ) = xi1 + else + x( 1, 1 ) = xr1 + x( 2, 1 ) = xr2 + x( 1, 2 ) = xi1 + x( 2, 2 ) = xi2 + end if + xnorm = max( abs( xr1 )+abs( xi1 ), abs( xr2 )+abs( xi2 ) ) + ! further scaling if norm(a) norm(x) > overflow + if( xnorm>one .and. cmax>one ) then + if( xnorm>bignum / cmax ) then + temp = cmax / bignum + x( 1, 1 ) = temp*x( 1, 1 ) + x( 2, 1 ) = temp*x( 2, 1 ) + x( 1, 2 ) = temp*x( 1, 2 ) + x( 2, 2 ) = temp*x( 2, 2 ) + xnorm = temp*xnorm + scale = temp*scale + end if + end if + end if + end if + return + end subroutine stdlib_dlaln2 + + !> DLALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + + pure subroutine stdlib_dlals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: givptr, icompq, k, ldb, ldbx, ldgcol, ldgnum, nl, nr, nrhs,& + sqre + integer(ilp), intent(out) :: info + real(dp), intent(in) :: c, s + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), perm(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(out) :: bx(ldbx,*), work(*) + real(dp), intent(in) :: difl(*), difr(ldgnum,*), givnum(ldgnum,*), poles(ldgnum,*), z(& + *) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, m, n, nlp1 + real(dp) :: diflj, difrj, dj, dsigj, dsigjp, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( nrhs<1 ) then + info = -5 + else if( ldb DLAMSWLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (DLASWLQ) + + pure subroutine stdlib_dlamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + real(dp), intent(in) :: a(lda,*), t(ldt,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, ctr, lw + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * mb + else + lw = m * mb + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( k<0 ) then + info = -5 + else if( m=max(m,n,k))) then + call stdlib_dgemlqt( side, trans, m, n, k, mb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.tran) then + ! multiply q to the last block of c + kk = mod((m-k),(nb-k)) + ctr = (m-k)/(nb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_dtpmlqt('L','T',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+nb) + ctr = ctr - 1 + call stdlib_dtpmlqt('L','T',nb-k , n, k, 0,mb, a(1,i), lda,t(1, ctr*k+1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:nb) + call stdlib_dgemlqt('L','T',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.notran) then + ! multiply q to the first block of c + kk = mod((m-k),(nb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_dgemlqt('L','N',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (i:i+nb,1:n) + call stdlib_dtpmlqt('L','N',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_dtpmlqt('L','N',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.notran) then + ! multiply q to the last block of c + kk = mod((n-k),(nb-k)) + ctr = (n-k)/(nb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_dtpmlqt('R','N',m , kk, k, 0, mb, a(1, ii), lda,t(1,ctr *k+1), ldt, & + c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_dtpmlqt('R','N', m, nb-k, k, 0, mb, a(1, i), lda,t(1,ctr*k+1), ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_dgemlqt('R','N',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.tran) then + ! multiply q to the first block of c + kk = mod((n-k),(nb-k)) + ctr = 1 + ii=n-kk+1 + call stdlib_dgemlqt('R','T',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_dtpmlqt('R','T',m , nb-k, k, 0,mb, a(1,i), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_dtpmlqt('R','T',m , kk, k, 0,mb, a(1,ii), lda,t(1,ctr*k+1),ldt, c(1,1),& + ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_dlamswlq + + !> DLAMTSQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (DLATSQR) + + pure subroutine stdlib_dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + real(dp), intent(in) :: a(lda,*), t(ldt,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr, q + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * nb + q = m + else + lw = mb * nb + q = n + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m=max(m,n,k))) then + call stdlib_dgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.notran) then + ! multiply q to the last block of c + kk = mod((m-k),(mb-k)) + ctr = (m-k)/(mb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_dtpmqrt('L','N',kk , n, k, 0, nb, a(ii,1), lda,t(1,ctr*k+1),ldt , c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + ctr = ctr - 1 + call stdlib_dtpmqrt('L','N',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:mb,1:n) + call stdlib_dgemqrt('L','N',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.tran) then + ! multiply q to the first block of c + kk = mod((m-k),(mb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_dgemqrt('L','T',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + call stdlib_dtpmqrt('L','T',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_dtpmqrt('L','T',kk , n, k, 0,nb, a(ii,1), lda,t(1,ctr * k + 1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.tran) then + ! multiply q to the last block of c + kk = mod((n-k),(mb-k)) + ctr = (n-k)/(mb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_dtpmqrt('R','T',m , kk, k, 0, nb, a(ii,1), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_dtpmqrt('R','T',m , mb-k, k, 0,nb, a(i,1), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_dgemqrt('R','T',m , mb, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.notran) then + ! multiply q to the first block of c + kk = mod((n-k),(mb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_dgemqrt('R','N', m, mb , k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_dtpmqrt('R','N', m, mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_dtpmqrt('R','N', m, kk , k, 0,nb, a(ii,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_dlamtsqr + + !> DLANV2: computes the Schur factorization of a real 2-by-2 nonsymmetric + !> matrix in standard form: + !> [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + !> [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + !> where either + !> 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + !> 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + !> conjugate eigenvalues. + + pure subroutine stdlib_dlanv2( a, b, c, d, rt1r, rt1i, rt2r, rt2i, cs, sn ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(inout) :: a, b, c, d + real(dp), intent(out) :: cs, rt1i, rt1r, rt2i, rt2r, sn + ! ===================================================================== + ! Parameters + real(dp), parameter :: multpl = 4.0e+0_dp + + + ! Local Scalars + real(dp) :: aa, bb, bcmax, bcmis, cc, cs1, dd, eps, p, sab, sac, scale, sigma, sn1, & + tau, temp, z, safmin, safmn2, safmx2 + integer(ilp) :: count + ! Intrinsic Functions + intrinsic :: abs,max,min,sign,sqrt + ! Executable Statements + safmin = stdlib_dlamch( 'S' ) + eps = stdlib_dlamch( 'P' ) + safmn2 = stdlib_dlamch( 'B' )**int( log( safmin / eps ) /log( stdlib_dlamch( 'B' ) ) / & + two,KIND=ilp) + safmx2 = one / safmn2 + if( c==zero ) then + cs = one + sn = zero + else if( b==zero ) then + ! swap rows and columns + cs = zero + sn = one + temp = d + d = a + a = temp + b = -c + c = zero + else if( ( a-d )==zero .and. sign( one, b )/=sign( one, c ) )then + cs = one + sn = zero + else + temp = a - d + p = half*temp + bcmax = max( abs( b ), abs( c ) ) + bcmis = min( abs( b ), abs( c ) )*sign( one, b )*sign( one, c ) + scale = max( abs( p ), bcmax ) + z = ( p / scale )*p + ( bcmax / scale )*bcmis + ! if z is of the order of the machine accuracy, postpone the + ! decision on the nature of eigenvalues + if( z>=multpl*eps ) then + ! real eigenvalues. compute a and d. + z = p + sign( sqrt( scale )*sqrt( z ), p ) + a = d + z + d = d - ( bcmax / z )*bcmis + ! compute b and the rotation matrix + tau = stdlib_dlapy2( c, z ) + cs = z / tau + sn = c / tau + b = b - c + c = zero + else + ! complex eigenvalues, or real(almost,KIND=dp) equal eigenvalues. + ! make diagonal elements equal. + count = 0 + sigma = b + c + 10 continue + count = count + 1 + scale = max( abs(temp), abs(sigma) ) + if( scale>=safmx2 ) then + sigma = sigma * safmn2 + temp = temp * safmn2 + if (count <= 20)goto 10 + end if + if( scale<=safmn2 ) then + sigma = sigma * safmx2 + temp = temp * safmx2 + if (count <= 20)goto 10 + end if + p = half*temp + tau = stdlib_dlapy2( sigma, temp ) + cs = sqrt( half*( one+abs( sigma ) / tau ) ) + sn = -( p / ( tau*cs ) )*sign( one, sigma ) + ! compute [ aa bb ] = [ a b ] [ cs -sn ] + ! [ cc dd ] [ c d ] [ sn cs ] + aa = a*cs + b*sn + bb = -a*sn + b*cs + cc = c*cs + d*sn + dd = -c*sn + d*cs + ! compute [ a b ] = [ cs sn ] [ aa bb ] + ! [ c d ] [-sn cs ] [ cc dd ] + a = aa*cs + cc*sn + b = bb*cs + dd*sn + c = -aa*sn + cc*cs + d = -bb*sn + dd*cs + temp = half*( a+d ) + a = temp + d = temp + if( c/=zero ) then + if( b/=zero ) then + if( sign( one, b )==sign( one, c ) ) then + ! real eigenvalues: reduce to upper triangular form + sab = sqrt( abs( b ) ) + sac = sqrt( abs( c ) ) + p = sign( sab*sac, c ) + tau = one / sqrt( abs( b+c ) ) + a = temp + p + d = temp - p + b = b - c + c = zero + cs1 = sab*tau + sn1 = sac*tau + temp = cs*cs1 - sn*sn1 + sn = cs*sn1 + sn*cs1 + cs = temp + end if + else + b = -c + c = zero + temp = cs + cs = -sn + sn = temp + end if + end if + end if + end if + ! store eigenvalues in (rt1r,rt1i) and (rt2r,rt2i). + rt1r = a + rt2r = d + if( c==zero ) then + rt1i = zero + rt2i = zero + else + rt1i = sqrt( abs( b ) )*sqrt( abs( c ) ) + rt2i = -rt1i + end if + return + end subroutine stdlib_dlanv2 + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + + pure subroutine stdlib_dlapll( n, x, incx, y, incy, ssmin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(dp), intent(out) :: ssmin + ! Array Arguments + real(dp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: a11, a12, a22, c, ssmax, tau + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + ssmin = zero + return + end if + ! compute the qr factorization of the n-by-2 matrix ( x y ) + call stdlib_dlarfg( n, x( 1 ), x( 1+incx ), incx, tau ) + a11 = x( 1 ) + x( 1 ) = one + c = -tau*stdlib_ddot( n, x, incx, y, incy ) + call stdlib_daxpy( n, c, x, incx, y, incy ) + call stdlib_dlarfg( n-1, y( 1+incy ), y( 1+2*incy ), incy, tau ) + a12 = y( 1 ) + a22 = y( 1+incy ) + ! compute the svd of 2-by-2 upper triangular matrix. + call stdlib_dlas2( a11, a12, a22, ssmin, ssmax ) + return + end subroutine stdlib_dlapll + + !> DLAQP2: computes a QR factorization with column pivoting of + !> the block A(OFFSET+1:M,1:N). + !> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_dlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, m, n, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: a(lda,*), vn1(*), vn2(*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, itemp, j, mn, offpi, pvt + real(dp) :: aii, temp, temp2, tol3z + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + mn = min( m-offset, n ) + tol3z = sqrt(stdlib_dlamch('EPSILON')) + ! compute factorization. + loop_20: do i = 1, mn + offpi = offset + i + ! determine ith pivot column and swap if necessary. + pvt = ( i-1 ) + stdlib_idamax( n-i+1, vn1( i ), 1 ) + if( pvt/=i ) then + call stdlib_dswap( m, a( 1, pvt ), 1, a( 1, i ), 1 ) + itemp = jpvt( pvt ) + jpvt( pvt ) = jpvt( i ) + jpvt( i ) = itemp + vn1( pvt ) = vn1( i ) + vn2( pvt ) = vn2( i ) + end if + ! generate elementary reflector h(i). + if( offpi DLAQPS: computes a step of QR factorization with column pivoting + !> of a real M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_dlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda, ldf, m, n, nb, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*) + real(dp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: itemp, j, k, lastrk, lsticc, pvt, rk + real(dp) :: akk, temp, temp2, tol3z + ! Intrinsic Functions + intrinsic :: abs,real,max,min,nint,sqrt + ! Executable Statements + lastrk = min( m, n+offset ) + lsticc = 0 + k = 0 + tol3z = sqrt(stdlib_dlamch('EPSILON')) + ! beginning of while loop. + 10 continue + if( ( k1 ) then + call stdlib_dgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1 ),lda, f( k, 1 ), & + ldf, one, a( rk, k ), 1 ) + end if + ! generate elementary reflector h(k). + if( rk1 ) then + call stdlib_dgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1 ),lda, a( rk, k & + ), 1, zero, auxv( 1 ), 1 ) + call stdlib_dgemv( 'NO TRANSPOSE', n, k-1, one, f( 1, 1 ), ldf,auxv( 1 ), 1, one,& + f( 1, k ), 1 ) + end if + ! update the current row of a: + ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t. + if( k0 ) then + itemp = nint( vn2( lsticc ),KIND=ilp) + vn1( lsticc ) = stdlib_dnrm2( m-rk, a( rk+1, lsticc ), 1 ) + ! note: the computation of vn1( lsticc ) relies on the fact that + ! stdlib_snrm2 does not fail on vectors with norm below the value of + ! sqrt(stdlib_dlamch('s')) + vn2( lsticc ) = vn1( lsticc ) + lsticc = itemp + go to 40 + end if + return + end subroutine stdlib_dlaqps + + !> DLAQR5:, called by DLAQR0, performs a + !> single small-bulge multi-shift QR sweep. + + pure subroutine stdlib_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, & + iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & + ldz, n, nh, nshfts, nv + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*) + real(dp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(dp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,& + tst1, tst2, ulp + integer(ilp) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, & + krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu + logical(lk) :: accum, bmp22 + ! Intrinsic Functions + intrinsic :: abs,real,max,min,mod + ! Local Arrays + real(dp) :: vt(3) + ! Executable Statements + ! ==== if there are no shifts, then there is nothing to do. ==== + if( nshfts<2 )return + ! ==== if the active block is empty or 1-by-1, then there + ! . is nothing to do. ==== + if( ktop>=kbot )return + ! ==== shuffle shifts into pairs of real shifts and pairs + ! . of complex conjugate shifts assuming complex + ! . conjugate shifts are already adjacent to one + ! . another. ==== + do i = 1, nshfts - 2, 2 + if( si( i )/=-si( i+1 ) ) then + swap = sr( i ) + sr( i ) = sr( i+1 ) + sr( i+1 ) = sr( i+2 ) + sr( i+2 ) = swap + swap = si( i ) + si( i ) = si( i+1 ) + si( i+1 ) = si( i+2 ) + si( i+2 ) = swap + end if + end do + ! ==== nshfts is supposed to be even, but if it is odd, + ! . then simply reduce it by one. the shuffle above + ! . ensures that the dropped shift is real and that + ! . the remaining shifts are paired. ==== + ns = nshfts - mod( nshfts, 2 ) + ! ==== machine constants for deflation ==== + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp) / ulp ) + ! ==== use accumulated reflections to update far-from-diagonal + ! . entries ? ==== + accum = ( kacc22==1 ) .or. ( kacc22==2 ) + ! ==== clear trash ==== + if( ktop+2<=kbot )h( ktop+2, ktop ) = zero + ! ==== nbmps = number of 2-shift bulges in the chain ==== + nbmps = ns / 2 + ! ==== kdu = width of slab ==== + kdu = 4*nbmps + ! ==== create and chase chains of nbmps bulges ==== + loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps + ! jtop = index from which updates from the right start. + if( accum ) then + jtop = max( ktop, incol ) + else if( wantt ) then + jtop = 1 + else + jtop = ktop + end if + ndcol = incol + kdu + if( accum )call stdlib_dlaset( 'ALL', kdu, kdu, zero, one, u, ldu ) + ! ==== near-the-diagonal bulge chase. the following loop + ! . performs the near-the-diagonal part of a small bulge + ! . multi-shift qr sweep. each 4*nbmps column diagonal + ! . chunk extends from column incol to column ndcol + ! . (including both column incol and column ndcol). the + ! . following loop chases a 2*nbmps+1 column long chain of + ! . nbmps bulges 2*nbmps columns to the right. (incol + ! . may be less than ktop and and ndcol may be greater than + ! . kbot indicating phantom columns from which to chase + ! . bulges before they are actually introduced or to which + ! . to chase bulges beyond column kbot.) ==== + loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) + ! ==== bulges number mtop to mbot are active double implicit + ! . shift bulges. there may or may not also be small + ! . 2-by-2 bulge, if there is room. the inactive bulges + ! . (if any) must wait until the active bulges have moved + ! . down the diagonal to make room. the phantom matrix + ! . paradigm described above helps keep track. ==== + mtop = max( 1, ( ktop-krcol ) / 2+1 ) + mbot = min( nbmps, ( kbot-krcol-1 ) / 2 ) + m22 = mbot + 1 + bmp22 = ( mbot=ktop ) then + if( h( k+1, k )/=zero ) then + tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) + if( tst1==zero ) then + if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) + end if + if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then + h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) + h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) + h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & + then + h( k+1, k ) = zero + end if + end if + end if + end if + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + kms = k - incol + do j = max( 1, ktop-incol ), kdu + refsum = v( 1, m22 )*( u( j, kms+1 )+v( 2, m22 )*u( j, kms+2 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m22 ) + end do + else if( wantz ) then + do j = iloz, ihiz + refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*z( j, k+2 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 ) + end do + end if + end if + ! ==== normal case: chain of 3-by-3 reflections ==== + loop_80: do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + if( k==ktop-1 ) then + call stdlib_dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),si( 2*m-1 ), sr( & + 2*m ), si( 2*m ),v( 1, m ) ) + alpha = v( 1, m ) + call stdlib_dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) ) + else + ! ==== perform delayed transformation of row below + ! . mth bulge. exploit fact that first two elements + ! . of row are actually zero. ==== + refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 ) + h( k+3, k ) = -refsum + h( k+3, k+1 ) = -refsum*v( 2, m ) + h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m ) + ! ==== calculate reflection to move + ! . mth bulge one step. ==== + beta = h( k+1, k ) + v( 2, m ) = h( k+2, k ) + v( 3, m ) = h( k+3, k ) + call stdlib_dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) ) + ! ==== a bulge may collapse because of vigilant + ! . deflation or destructive underflow. in the + ! . underflow case, try the two-small-subdiagonals + ! . trick to try to reinflate the bulge. ==== + if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) & + then + ! ==== typical case: not collapsed (yet). ==== + h( k+1, k ) = beta + h( k+2, k ) = zero + h( k+3, k ) = zero + else + ! ==== atypical case: collapsed. attempt to + ! . reintroduce ignoring h(k+1,k) and h(k+2,k). + ! . if the fill resulting from the new + ! . reflector is too large, then abandon it. + ! . otherwise, use the new one. ==== + call stdlib_dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),si( 2*m-1 ), sr( & + 2*m ), si( 2*m ),vt ) + alpha = vt( 1 ) + call stdlib_dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) ) + refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*h( k+2, k ) ) + if( abs( h( k+2, k )-refsum*vt( 2 ) )+abs( refsum*vt( 3 ) )>ulp*( abs( & + h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then + ! ==== starting a new bulge here would + ! . create non-negligible fill. use + ! . the old one with trepidation. ==== + h( k+1, k ) = beta + h( k+2, k ) = zero + h( k+3, k ) = zero + else + ! ==== starting a new bulge here would + ! . create only negligible fill. + ! . replace the old reflector with + ! . the new one. ==== + h( k+1, k ) = h( k+1, k ) - refsum + h( k+2, k ) = zero + h( k+3, k ) = zero + v( 1, m ) = vt( 1 ) + v( 2, m ) = vt( 2 ) + v( 3, m ) = vt( 3 ) + end if + end if + end if + ! ==== apply reflection from the right and + ! . the first column of update from the left. + ! . these updates are required for the vigilant + ! . deflation check. we still delay most of the + ! . updates from the left for efficiency. ==== + do j = jtop, min( kbot, k+3 ) + refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*h( j, k+2 )+v( 3, m )*h( j, k+3 & + ) ) + h( j, k+1 ) = h( j, k+1 ) - refsum + h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m ) + h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m ) + end do + ! ==== perform update from left for subsequent + ! . column. ==== + refsum = v( 1, m )*( h( k+1, k+1 )+v( 2, m )*h( k+2, k+1 )+v( 3, m )*h( k+3, & + k+1 ) ) + h( k+1, k+1 ) = h( k+1, k+1 ) - refsum + h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m ) + h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m ) + ! ==== the following convergence test requires that + ! . the tradition small-compared-to-nearby-diagonals + ! . criterion and the ahues + ! . criteria both be satisfied. the latter improves + ! . accuracy in some examples. falling back on an + ! . alternate convergence criterion when tst1 or tst2 + ! . is zero (as done here) is traditional but probably + ! . unnecessary. ==== + if( k=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) + end if + if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) + h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) + h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & + then + h( k+1, k ) = zero + end if + end if + end if + end do loop_80 + ! ==== multiply h by reflections from the left ==== + if( accum ) then + jbot = min( ndcol, kbot ) + else if( wantt ) then + jbot = n + else + jbot = kbot + end if + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = max( ktop, krcol + 2*m ), jbot + refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*h( k+2, j )+v( 3, m )*h( k+3, j & + ) ) + h( k+1, j ) = h( k+1, j ) - refsum + h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m ) + h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m ) + end do + end do + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + ! ==== accumulate u. (if needed, update z later + ! . with an efficient matrix-matrix + ! . multiply.) ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + kms = k - incol + i2 = max( 1, ktop-incol ) + i2 = max( i2, kms-(krcol-incol)+1 ) + i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 ) + do j = i2, i4 + refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*u( j, kms+2 )+v( 3, m )*u( & + j, kms+3 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m ) + u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m ) + end do + end do + else if( wantz ) then + ! ==== u is not accumulated, so update z + ! . now by multiplying by reflections + ! . from the right. ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = iloz, ihiz + refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*z( j, k+2 )+v( 3, m )*z( j, & + k+3 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m ) + z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m ) + end do + end do + end if + ! ==== end of near-the-diagonal bulge chase. ==== + end do loop_145 + ! ==== use u (if accumulated) to update far-from-diagonal + ! . entries in h. if required, use u to update z as + ! . well. ==== + if( accum ) then + if( wantt ) then + jtop = 1 + jbot = n + else + jtop = ktop + jbot = kbot + end if + k1 = max( 1, ktop-incol ) + nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1 + ! ==== horizontal multiply ==== + do jcol = min( ndcol, kbot ) + 1, jbot, nh + jlen = min( nh, jbot-jcol+1 ) + call stdlib_dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, & + jcol ), ldh, zero, wh,ldwh ) + call stdlib_dlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) + + end do + ! ==== vertical multiply ==== + do jrow = jtop, max( ktop, incol ) - 1, nv + jlen = min( nv, max( ktop, incol )-jrow ) + call stdlib_dgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( & + k1, k1 ),ldu, zero, wv, ldwv ) + call stdlib_dlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) + + end do + ! ==== z multiply (also vertical) ==== + if( wantz ) then + do jrow = iloz, ihiz, nv + jlen = min( nv, ihiz-jrow+1 ) + call stdlib_dgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(& + k1, k1 ),ldu, zero, wv, ldwv ) + call stdlib_dlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) + + end do + end if + end if + end do loop_180 + end subroutine stdlib_dlaqr5 + + !> DLAQTR: solves the real quasi-triangular system + !> op(T)*p = scale*c, if LREAL = .TRUE. + !> or the complex quasi-triangular systems + !> op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. + !> in real arithmetic, where T is upper quasi-triangular. + !> If LREAL = .FALSE., then the first diagonal block of T must be + !> 1 by 1, B is the specially structured matrix + !> B = [ b(1) b(2) ... b(n) ] + !> [ w ] + !> [ w ] + !> [ . ] + !> [ w ] + !> op(A) = A or A**T, A**T denotes the transpose of + !> matrix A. + !> On input, X = [ c ]. On output, X = [ p ]. + !> [ d ] [ q ] + !> This subroutine is designed for the condition number estimation + !> in routine DTRSNA. + + subroutine stdlib_dlaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: lreal, ltran + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldt, n + real(dp), intent(out) :: scale + real(dp), intent(in) :: w + ! Array Arguments + real(dp), intent(in) :: b(*), t(ldt,*) + real(dp), intent(out) :: work(*) + real(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ierr, j, j1, j2, jnext, k, n1, n2 + real(dp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, & + xnorm, z + ! Local Arrays + real(dp) :: d(2,2), v(2,2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! do not test the input parameters for errors + notran = .not.ltran + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + bignum = one / smlnum + xnorm = stdlib_dlange( 'M', n, n, t, ldt, d ) + if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib_dlange( 'M', n, 1, b, n, d ) ) + + smin = max( smlnum, eps*xnorm ) + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + work( 1 ) = zero + do j = 2, n + work( j ) = stdlib_dasum( j-1, t( 1, j ), 1 ) + end do + if( .not.lreal ) then + do i = 2, n + work( i ) = work( i ) + abs( b( i ) ) + end do + end if + n2 = 2*n + n1 = n + if( .not.lreal )n1 = n2 + k = stdlib_idamax( n1, x, 1 ) + xmax = abs( x( k ) ) + scale = one + if( xmax>bignum ) then + scale = bignum / xmax + call stdlib_dscal( n1, scale, x, 1 ) + xmax = bignum + end if + if( lreal ) then + if( notran ) then + ! solve t*p = scale*c + jnext = n + loop_30: do j = n, 1, -1 + if( j>jnext )cycle loop_30 + j1 = j + j2 = j + jnext = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnext = j - 2 + end if + end if + if( j1==j2 ) then + ! meet 1 by 1 diagonal block + ! scale to avoid overflow when computing + ! x(j) = b(j)/t(j,j) + xj = abs( x( j1 ) ) + tjj = abs( t( j1, j1 ) ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) / tmp + xj = abs( x( j1 ) ) + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j1 of t. + if( xj>one ) then + rec = one / xj + if( work( j1 )>( bignum-xmax )*rec ) then + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + end if + end if + if( j1>1 ) then + call stdlib_daxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + k = stdlib_idamax( j1-1, x, 1 ) + xmax = abs( x( k ) ) + end if + else + ! meet 2 by 2 diagonal block + ! call 2 by 2 linear system solve, to take + ! care of possible overflow by scaling factor. + d( 1, 1 ) = x( j1 ) + d( 2, 1 ) = x( j2 ) + call stdlib_dlaln2( .false., 2, 1, smin, one, t( j1, j1 ),ldt, one, one, d,& + 2, zero, zero, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_dscal( n, scaloc, x, 1 ) + scale = scale*scaloc + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2)) + ! to avoid overflow in updating right-hand side. + xj = max( abs( v( 1, 1 ) ), abs( v( 2, 1 ) ) ) + if( xj>one ) then + rec = one / xj + if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + end if + end if + ! update right-hand side + if( j1>1 ) then + call stdlib_daxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_daxpy( j1-1, -x( j2 ), t( 1, j2 ), 1, x, 1 ) + k = stdlib_idamax( j1-1, x, 1 ) + xmax = abs( x( k ) ) + end if + end if + end do loop_30 + else + ! solve t**t*p = scale*c + jnext = 1 + loop_40: do j = 1, n + if( jone ) then + rec = one / xmax + if( work( j1 )>( bignum-xj )*rec ) then + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) - stdlib_ddot( j1-1, t( 1, j1 ), 1, x, 1 ) + xj = abs( x( j1 ) ) + tjj = abs( t( j1, j1 ) ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) / tmp + xmax = max( xmax, abs( x( j1 ) ) ) + else + ! 2 by 2 diagonal block + ! scale if necessary to avoid overflow in forming the + ! right-hand side elements by inner product. + xj = max( abs( x( j1 ) ), abs( x( j2 ) ) ) + if( xmax>one ) then + rec = one / xmax + if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then + call stdlib_dscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + d( 1, 1 ) = x( j1 ) - stdlib_ddot( j1-1, t( 1, j1 ), 1, x,1 ) + d( 2, 1 ) = x( j2 ) - stdlib_ddot( j1-1, t( 1, j2 ), 1, x,1 ) + call stdlib_dlaln2( .true., 2, 1, smin, one, t( j1, j1 ),ldt, one, one, d, & + 2, zero, zero, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_dscal( n, scaloc, x, 1 ) + scale = scale*scaloc + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax ) + end if + end do loop_40 + end if + else + sminw = max( eps*abs( w ), smin ) + if( notran ) then + ! solve (t + ib)*(p+iq) = c+id + jnext = n + loop_70: do j = n, 1, -1 + if( j>jnext )cycle loop_70 + j1 = j + j2 = j + jnext = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnext = j - 2 + end if + end if + if( j1==j2 ) then + ! 1 by 1 diagonal block + ! scale if necessary to avoid overflow in division + z = w + if( j1==1 )z = b( 1 ) + xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) + tjj = abs( t( j1, j1 ) ) + abs( z ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_dscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + call stdlib_dladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si ) + x( j1 ) = sr + x( n+j1 ) = si + xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j1 of t. + if( xj>one ) then + rec = one / xj + if( work( j1 )>( bignum-xmax )*rec ) then + call stdlib_dscal( n2, rec, x, 1 ) + scale = scale*rec + end if + end if + if( j1>1 ) then + call stdlib_daxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_daxpy( j1-1, -x( n+j1 ), t( 1, j1 ), 1,x( n+1 ), 1 ) + x( 1 ) = x( 1 ) + b( j1 )*x( n+j1 ) + x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) + xmax = zero + do k = 1, j1 - 1 + xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) ) + end do + end if + else + ! meet 2 by 2 diagonal block + d( 1, 1 ) = x( j1 ) + d( 2, 1 ) = x( j2 ) + d( 1, 2 ) = x( n+j1 ) + d( 2, 2 ) = x( n+j2 ) + call stdlib_dlaln2( .false., 2, 2, sminw, one, t( j1, j1 ),ldt, one, one, & + d, 2, zero, -w, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_dscal( 2*n, scaloc, x, 1 ) + scale = scaloc*scale + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + x( n+j1 ) = v( 1, 2 ) + x( n+j2 ) = v( 2, 2 ) + ! scale x(j1), .... to avoid overflow in + ! updating right hand side. + xj = max( abs( v( 1, 1 ) )+abs( v( 1, 2 ) ),abs( v( 2, 1 ) )+abs( v( 2, 2 )& + ) ) + if( xj>one ) then + rec = one / xj + if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then + call stdlib_dscal( n2, rec, x, 1 ) + scale = scale*rec + end if + end if + ! update the right-hand side. + if( j1>1 ) then + call stdlib_daxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_daxpy( j1-1, -x( j2 ), t( 1, j2 ), 1, x, 1 ) + call stdlib_daxpy( j1-1, -x( n+j1 ), t( 1, j1 ), 1,x( n+1 ), 1 ) + call stdlib_daxpy( j1-1, -x( n+j2 ), t( 1, j2 ), 1,x( n+1 ), 1 ) + x( 1 ) = x( 1 ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 ) + x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 ) + xmax = zero + do k = 1, j1 - 1 + xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax ) + end do + end if + end if + end do loop_70 + else + ! solve (t + ib)**t*(p+iq) = c+id + jnext = 1 + loop_80: do j = 1, n + if( jone ) then + rec = one / xmax + if( work( j1 )>( bignum-xj )*rec ) then + call stdlib_dscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) - stdlib_ddot( j1-1, t( 1, j1 ), 1, x, 1 ) + x( n+j1 ) = x( n+j1 ) - stdlib_ddot( j1-1, t( 1, j1 ), 1,x( n+1 ), 1 ) + + if( j1>1 ) then + x( j1 ) = x( j1 ) - b( j1 )*x( n+1 ) + x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1 ) + end if + xj = abs( x( j1 ) ) + abs( x( j1+n ) ) + z = w + if( j1==1 )z = b( 1 ) + ! scale if necessary to avoid overflow in + ! complex division + tjj = abs( t( j1, j1 ) ) + abs( z ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_dscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + call stdlib_dladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si ) + x( j1 ) = sr + x( j1+n ) = si + xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax ) + else + ! 2 by 2 diagonal block + ! scale if necessary to avoid overflow in forming the + ! right-hand side element by inner product. + xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) ) + + if( xmax>one ) then + rec = one / xmax + if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then + call stdlib_dscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + d( 1, 1 ) = x( j1 ) - stdlib_ddot( j1-1, t( 1, j1 ), 1, x,1 ) + d( 2, 1 ) = x( j2 ) - stdlib_ddot( j1-1, t( 1, j2 ), 1, x,1 ) + d( 1, 2 ) = x( n+j1 ) - stdlib_ddot( j1-1, t( 1, j1 ), 1,x( n+1 ), 1 ) + + d( 2, 2 ) = x( n+j2 ) - stdlib_ddot( j1-1, t( 1, j2 ), 1,x( n+1 ), 1 ) + + d( 1, 1 ) = d( 1, 1 ) - b( j1 )*x( n+1 ) + d( 2, 1 ) = d( 2, 1 ) - b( j2 )*x( n+1 ) + d( 1, 2 ) = d( 1, 2 ) + b( j1 )*x( 1 ) + d( 2, 2 ) = d( 2, 2 ) + b( j2 )*x( 1 ) + call stdlib_dlaln2( .true., 2, 2, sminw, one, t( j1, j1 ),ldt, one, one, d,& + 2, zero, w, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_dscal( n2, scaloc, x, 1 ) + scale = scaloc*scale + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + x( n+j1 ) = v( 1, 2 ) + x( n+j2 ) = v( 2, 2 ) + xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )& + , xmax ) + end if + end do loop_80 + end if + end if + return + end subroutine stdlib_dlaqtr + + !> DLASD3: finds all the square roots of the roots of the secular + !> equation, as defined by the values in D and Z. It makes the + !> appropriate calls to DLASD4 and then updates the singular + !> vectors by matrix multiplication. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> DLASD3 is called from DLASD1. + + pure subroutine stdlib_dlasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,& + vt2, ldvt2, idxc, ctot, z,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre + ! Array Arguments + integer(ilp), intent(in) :: ctot(*), idxc(*) + real(dp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*) + real(dp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*) + real(dp), intent(in) :: u2(ldu2,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1 + real(dp) :: rho, temp + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre/=1 ) .and. ( sqre/=0 ) ) then + info = -3 + end if + n = nl + nr + 1 + m = n + sqre + nlp1 = nl + 1 + nlp2 = nl + 2 + if( ( k<1 ) .or. ( k>n ) ) then + info = -4 + else if( ldqzero ) then + call stdlib_dcopy( n, u2( 1, 1 ), 1, u( 1, 1 ), 1 ) + else + do i = 1, n + u( i, 1 ) = -u2( i, 1 ) + end do + end if + return + end if + ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can + ! be computed with high relative accuracy (barring over/underflow). + ! this is a problem on machines without a guard digit in + ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). + ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), + ! which on any of these machines zeros out the bottommost + ! bit of dsigma(i) if it is 1; this makes the subsequent + ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation + ! occurs. on binary machines with a guard digit (almost all + ! machines) it does not change dsigma(i) at all. on hexadecimal + ! and decimal machines with a guard digit, it slightly + ! changes the bottommost bits of dsigma(i). it does not account + ! for hexadecimal or decimal machines without guard digits + ! (we know of none). we use a subroutine call to compute + ! 2*dsigma(i) to prevent optimizing compilers from eliminating + ! this code. + do i = 1, k + dsigma( i ) = stdlib_dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) + end do + ! keep a copy of z. + call stdlib_dcopy( k, z, 1, q, 1 ) + ! normalize z. + rho = stdlib_dnrm2( k, z, 1 ) + call stdlib_dlascl( 'G', 0, 0, rho, one, k, 1, z, k, info ) + rho = rho*rho + ! find the new singular values. + do j = 1, k + call stdlib_dlasd4( k, j, dsigma, z, u( 1, j ), rho, d( j ),vt( 1, j ), info ) + + ! if the zero finder fails, report the convergence failure. + if( info/=0 ) then + return + end if + end do + ! compute updated z. + do i = 1, k + z( i ) = u( i, k )*vt( i, k ) + do j = 1, i - 1 + z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i & + )+dsigma( j ) ) ) + end do + do j = i, k - 1 + z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( & + i )+dsigma( j+1 ) ) ) + end do + z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1 ) ) + end do + ! compute left singular vectors of the modified diagonal matrix, + ! and store related information for the right singular vectors. + do i = 1, k + vt( 1, i ) = z( 1 ) / u( 1, i ) / vt( 1, i ) + u( 1, i ) = negone + do j = 2, k + vt( j, i ) = z( j ) / u( j, i ) / vt( j, i ) + u( j, i ) = dsigma( j )*vt( j, i ) + end do + temp = stdlib_dnrm2( k, u( 1, i ), 1 ) + q( 1, i ) = u( 1, i ) / temp + do j = 2, k + jc = idxc( j ) + q( j, i ) = u( jc, i ) / temp + end do + end do + ! update the left singular vector matrix. + if( k==2 ) then + call stdlib_dgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu ) + go to 100 + end if + if( ctot( 1 )>0 ) then + call stdlib_dgemm( 'N', 'N', nl, k, ctot( 1 ), one, u2( 1, 2 ), ldu2,q( 2, 1 ), ldq,& + zero, u( 1, 1 ), ldu ) + if( ctot( 3 )>0 ) then + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + call stdlib_dgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),ldu2, q( & + ktemp, 1 ), ldq, one, u( 1, 1 ), ldu ) + end if + else if( ctot( 3 )>0 ) then + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + call stdlib_dgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),ldu2, q( ktemp, & + 1 ), ldq, zero, u( 1, 1 ), ldu ) + else + call stdlib_dlacpy( 'F', nl, k, u2, ldu2, u, ldu ) + end if + call stdlib_dcopy( k, q( 1, 1 ), ldq, u( nlp1, 1 ), ldu ) + ktemp = 2 + ctot( 1 ) + ctemp = ctot( 2 ) + ctot( 3 ) + call stdlib_dgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1 ), & + ldq, zero, u( nlp2, 1 ), ldu ) + ! generate the right singular vectors. + 100 continue + do i = 1, k + temp = stdlib_dnrm2( k, vt( 1, i ), 1 ) + q( i, 1 ) = vt( 1, i ) / temp + do j = 2, k + jc = idxc( j ) + q( i, j ) = vt( jc, i ) / temp + end do + end do + ! update the right singular vector matrix. + if( k==2 ) then + call stdlib_dgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt ) + + return + end if + ktemp = 1 + ctot( 1 ) + call stdlib_dgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1, 1 ), ldq,vt2( 1, 1 ), ldvt2, & + zero, vt( 1, 1 ), ldvt ) + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + if( ktemp<=ldvt2 )call stdlib_dgemm( 'N', 'N', k, nlp1, ctot( 3 ), one, q( 1, ktemp ),& + ldq, vt2( ktemp, 1 ), ldvt2, one, vt( 1, 1 ),ldvt ) + ktemp = ctot( 1 ) + 1 + nrp1 = nr + sqre + if( ktemp>1 ) then + do i = 1, k + q( i, ktemp ) = q( i, 1 ) + end do + do i = nlp2, m + vt2( ktemp, i ) = vt2( 1, i ) + end do + end if + ctemp = 1 + ctot( 2 ) + ctot( 3 ) + call stdlib_dgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1, ktemp ), ldq,vt2( ktemp, nlp2 )& + , ldvt2, zero, vt( 1, nlp2 ), ldvt ) + return + end subroutine stdlib_dlasd3 + + !> DLASD6: computes the SVD of an updated upper bidiagonal matrix B + !> obtained by merging two smaller ones by appending a row. This + !> routine is used only for the problem which requires all singular + !> values and optionally singular vector matrices in factored form. + !> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + !> A related subroutine, DLASD1, handles the case in which all singular + !> values and singular vectors of the bidiagonal matrix are desired. + !> DLASD6 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The singular values of B can be computed using D1, D2, the first + !> components of all the right singular vectors of the lower block, and + !> the last components of all the right singular vectors of the upper + !> block. These components are stored and updated in VF and VL, + !> respectively, in DLASD6. Hence U and VT are not explicitly + !> referenced. + !> The singular values are stored in D. The algorithm consists of two + !> stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or if there is a zero + !> in the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLASD7. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the roots of the + !> secular equation via the routine DLASD4 (as called by DLASD8). + !> This routine also updates VF and VL and computes the distances + !> between the updated singular values and the old singular + !> values. + !> DLASD6 is called from DLASDA. + + pure subroutine stdlib_dlasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & + givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: givptr, info, k + integer(ilp), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre + real(dp), intent(inout) :: alpha, beta + real(dp), intent(out) :: c, s + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*) + integer(ilp), intent(inout) :: idxq(*) + real(dp), intent(inout) :: d(*), vf(*), vl(*) + real(dp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), & + z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2 + real(dp) :: orgnrm + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + m = n + sqre + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldgcolorgnrm ) then + orgnrm = abs( d( i ) ) + end if + end do + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + alpha = alpha / orgnrm + beta = beta / orgnrm + ! sort and deflate singular values. + call stdlib_dlasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, & + work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, & + givptr, givcol, ldgcol, givnum, ldgnum, c, s,info ) + ! solve secular equation, compute difl, difr, and update vf, vl. + call stdlib_dlasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( & + iw ), info ) + ! report the possible convergence failure. + if( info/=0 ) then + return + end if + ! save the poles if icompq = 1. + if( icompq==1 ) then + call stdlib_dcopy( k, d, 1, poles( 1, 1 ), 1 ) + call stdlib_dcopy( k, work( isigma ), 1, poles( 1, 2 ), 1 ) + end if + ! unscale. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + ! prepare the idxq sorting permutation. + n1 = k + n2 = n - k + call stdlib_dlamrg( n1, n2, d, 1, -1, idxq ) + return + end subroutine stdlib_dlasd6 + + !> DOPGTR: generates a real orthogonal matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> DSPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_dopgtr( uplo, n, ap, tau, q, ldq, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, n + ! Array Arguments + real(dp), intent(in) :: ap(*), tau(*) + real(dp), intent(out) :: q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, ij, j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldq1 ) then + ! generate q(2:n,2:n) + call stdlib_dorg2r( n-1, n-1, n-1, q( 2, 2 ), ldq, tau, work,iinfo ) + end if + end if + return + end subroutine stdlib_dopgtr + + !> DOPMTR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by DSPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_dopmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, m, n + ! Array Arguments + real(dp), intent(inout) :: ap(*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: forwrd, left, notran, upper + integer(ilp) :: i, i1, i2, i3, ic, ii, jc, mi, ni, nq + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldc DORBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines DORBDB2, DORBDB3, and DORBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_dorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + real(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + real(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < q .or. m-p < q ) then + info = -2 + else if( q < 0 .or. m-q < q ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-2 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DORBDB1', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., q of x11 and x21 + do i = 1, q + call stdlib_dlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + call stdlib_dlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + theta(i) = atan2( x21(i,i), x11(i,i) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i) = one + x21(i,i) = one + call stdlib_dlarf( 'L', p-i+1, q-i, x11(i,i), 1, taup1(i), x11(i,i+1),ldx11, work(& + ilarf) ) + call stdlib_dlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, taup2(i),x21(i,i+1), ldx21, work(& + ilarf) ) + if( i < q ) then + call stdlib_drot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c, s ) + call stdlib_dlarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) + s = x21(i,i+1) + x21(i,i+1) = one + call stdlib_dlarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & + ldx11, work(ilarf) ) + call stdlib_dlarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & + ldx21, work(ilarf) ) + c = sqrt( stdlib_dnrm2( p-i, x11(i+1,i+1), 1 )**2+ stdlib_dnrm2( m-p-i, x21(i+1,& + i+1), 1 )**2 ) + phi(i) = atan2( s, c ) + call stdlib_dorbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1,x21(i+1,i+1), 1, x11(i+1,& + i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) + end if + end do + return + end subroutine stdlib_dorbdb1 + + !> DORBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines DORBDB1, DORBDB3, and DORBDB4 handle cases in + !> which P is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_dorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + real(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + real(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < 0 .or. p > m-p ) then + info = -2 + else if( q < 0 .or. q < p .or. m-q < p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DORBDB2', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., p of x11 and x21 + do i = 1, p + if( i > 1 ) then + call stdlib_drot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c, s ) + end if + call stdlib_dlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + c = x11(i,i) + x11(i,i) = one + call stdlib_dlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_dlarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & + work(ilarf) ) + s = sqrt( stdlib_dnrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_dnrm2( m-p-i+1, x21(i,i), 1 & + )**2 ) + theta(i) = atan2( s, c ) + call stdlib_dorbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1, x21(i,i), 1,x11(i+1,i+1), & + ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_dscal( p-i, negone, x11(i+1,i), 1 ) + call stdlib_dlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + if( i < p ) then + call stdlib_dlarfgp( p-i, x11(i+1,i), x11(i+2,i), 1, taup1(i) ) + phi(i) = atan2( x11(i+1,i), x21(i,i) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x11(i+1,i) = one + call stdlib_dlarf( 'L', p-i, q-i, x11(i+1,i), 1, taup1(i),x11(i+1,i+1), ldx11, & + work(ilarf) ) + end if + x21(i,i) = one + call stdlib_dlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, taup2(i),x21(i,i+1), ldx21, work(& + ilarf) ) + end do + ! reduce the bottom-right portion of x21 to the identity matrix + do i = p + 1, q + call stdlib_dlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + x21(i,i) = one + call stdlib_dlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, taup2(i),x21(i,i+1), ldx21, work(& + ilarf) ) + end do + return + end subroutine stdlib_dorbdb2 + + !> DORBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines DORBDB1, DORBDB2, and DORBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_dorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + real(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + real(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( 2*p < m .or. p > m ) then + info = -2 + else if( q < m-p .or. m-q < m-p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DORBDB3', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., m-p of x11 and x21 + do i = 1, m-p + if( i > 1 ) then + call stdlib_drot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c, s ) + end if + call stdlib_dlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + s = x21(i,i) + x21(i,i) = one + call stdlib_dlarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_dlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + c = sqrt( stdlib_dnrm2( p-i+1, x11(i,i), 1 )**2+ stdlib_dnrm2( m-p-i, x21(i+1,i), 1 & + )**2 ) + theta(i) = atan2( s, c ) + call stdlib_dorbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1, x21(i+1,i), 1,x11(i,i+1), & + ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_dlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + if( i < m-p ) then + call stdlib_dlarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1, taup2(i) ) + phi(i) = atan2( x21(i+1,i), x11(i,i) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x21(i+1,i) = one + call stdlib_dlarf( 'L', m-p-i, q-i, x21(i+1,i), 1, taup2(i),x21(i+1,i+1), ldx21, & + work(ilarf) ) + end if + x11(i,i) = one + call stdlib_dlarf( 'L', p-i+1, q-i, x11(i,i), 1, taup1(i), x11(i,i+1),ldx11, work(& + ilarf) ) + end do + ! reduce the bottom-right portion of x11 to the identity matrix + do i = m-p + 1, q + call stdlib_dlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + x11(i,i) = one + call stdlib_dlarf( 'L', p-i+1, q-i, x11(i,i), 1, taup1(i), x11(i,i+1),ldx11, work(& + ilarf) ) + end do + return + end subroutine stdlib_dorbdb3 + + !> DORBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines DORBDB1, DORBDB2, and DORBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_dorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + phantom, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + real(dp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) + real(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < m-q .or. m-p < m-q ) then + info = -2 + else if( q < m-q .or. q > m ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( q-1, p-1, m-p-1 ) + iorbdb5 = 2 + lorbdb5 = q + lworkopt = ilarf + llarf - 1 + lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DORBDB4', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., m-q of x11 and x21 + do i = 1, m-q + if( i == 1 ) then + do j = 1, m + phantom(j) = zero + end do + call stdlib_dorbdb5( p, m-p, q, phantom(1), 1, phantom(p+1), 1,x11, ldx11, x21, & + ldx21, work(iorbdb5),lorbdb5, childinfo ) + call stdlib_dscal( p, negone, phantom(1), 1 ) + call stdlib_dlarfgp( p, phantom(1), phantom(2), 1, taup1(1) ) + call stdlib_dlarfgp( m-p, phantom(p+1), phantom(p+2), 1, taup2(1) ) + theta(i) = atan2( phantom(1), phantom(p+1) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + phantom(1) = one + phantom(p+1) = one + call stdlib_dlarf( 'L', p, q, phantom(1), 1, taup1(1), x11, ldx11,work(ilarf) ) + + call stdlib_dlarf( 'L', m-p, q, phantom(p+1), 1, taup2(1), x21,ldx21, work(ilarf)& + ) + else + call stdlib_dorbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1,x21(i,i-1), 1, x11(i,i)& + , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) + call stdlib_dscal( p-i+1, negone, x11(i,i-1), 1 ) + call stdlib_dlarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1, taup1(i) ) + call stdlib_dlarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1,taup2(i) ) + theta(i) = atan2( x11(i,i-1), x21(i,i-1) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i-1) = one + x21(i,i-1) = one + call stdlib_dlarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1, taup1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_dlarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1, taup2(i),x21(i,i), ldx21, & + work(ilarf) ) + end if + call stdlib_drot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) + call stdlib_dlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + c = x21(i,i) + x21(i,i) = one + call stdlib_dlarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_dlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + if( i < m-q ) then + s = sqrt( stdlib_dnrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_dnrm2( m-p-i, x21(i+1,i),& + 1 )**2 ) + phi(i) = atan2( s, c ) + end if + end do + ! reduce the bottom-right portion of x11 to [ i 0 ] + do i = m - q + 1, p + call stdlib_dlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + x11(i,i) = one + call stdlib_dlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_dlarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & + work(ilarf) ) + end do + ! reduce the bottom-right portion of x21 to [ 0 i ] + do i = p + 1, q + call stdlib_dlarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) + + x21(m-q+i-p,i) = one + call stdlib_dlarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& + , ldx21, work(ilarf) ) + end do + return + end subroutine stdlib_dorbdb4 + + !> DORCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The orthogonal matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + + subroutine stdlib_dorcsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, iwork, info ) + ! -- lapack computational routine (3.5.0_dp) -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q + ! Array Arguments + real(dp), intent(out) :: theta(*) + real(dp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) + real(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & + lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & + r + logical(lk) :: lquery, wantu1, wantu2, wantv1t + ! Local Arrays + real(dp) :: dum1(1), dum2(1,1) + ! Intrinsic Function + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + lquery = lwork == -1 + if( m < 0 ) then + info = -4 + else if( p < 0 .or. p > m ) then + info = -5 + else if( q < 0 .or. q > m ) then + info = -6 + else if( ldx11 < max( 1, p ) ) then + info = -8 + else if( ldx21 < max( 1, m-p ) ) then + info = -10 + else if( wantu1 .and. ldu1 < max( 1, p ) ) then + info = -13 + else if( wantu2 .and. ldu2 < max( 1, m - p ) ) then + info = -15 + else if( wantv1t .and. ldv1t < max( 1, q ) ) then + info = -17 + end if + r = min( p, m-p, q, m-q ) + ! compute workspace + ! work layout: + ! |-------------------------------------------------------| + ! | lworkopt (1) | + ! |-------------------------------------------------------| + ! | phi (max(1,r-1)) | + ! |-------------------------------------------------------| + ! | taup1 (max(1,p)) | b11d (r) | + ! | taup2 (max(1,m-p)) | b11e (r-1) | + ! | tauq1 (max(1,q)) | b12d (r) | + ! |-----------------------------------------| b12e (r-1) | + ! | stdlib_dorbdb work | stdlib_dorgqr work | stdlib_dorglq work | b21d (r) | + ! | | | | b21e (r-1) | + ! | | | | b22d (r) | + ! | | | | b22e (r-1) | + ! | | | | stdlib_dbbcsd work | + ! |-------------------------------------------------------| + if( info == 0 ) then + iphi = 2 + ib11d = iphi + max( 1, r-1 ) + ib11e = ib11d + max( 1, r ) + ib12d = ib11e + max( 1, r - 1 ) + ib12e = ib12d + max( 1, r ) + ib21d = ib12e + max( 1, r - 1 ) + ib21e = ib21d + max( 1, r ) + ib22d = ib21e + max( 1, r - 1 ) + ib22e = ib22d + max( 1, r ) + ibbcsd = ib22e + max( 1, r - 1 ) + itaup1 = iphi + max( 1, r-1 ) + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m-p ) + iorbdb = itauq1 + max( 1, q ) + iorgqr = itauq1 + max( 1, q ) + iorglq = itauq1 + max( 1, q ) + lorgqrmin = 1 + lorgqropt = 1 + lorglqmin = 1 + lorglqopt = 1 + if( r == q ) then + call stdlib_dorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1, work,-1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_dorgqr( p, p, q, u1, ldu1, dum1, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + endif + if( wantu2 .and. m-p > 0 ) then + call stdlib_dorgqr( m-p, m-p, q, u2, ldu2, dum1, work(1),-1, childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dorglq( q-1, q-1, q-1, v1t, ldv1t,dum1, work(1), -1, childinfo ) + + lorglqmin = max( lorglqmin, q-1 ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_dbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum1, u1, & + ldu1, u2, ldu2, v1t, ldv1t,dum2, 1, dum1, dum1, dum1,dum1, dum1, dum1, dum1,dum1,& + work(1), -1, childinfo ) + lbbcsd = int( work(1),KIND=ilp) + else if( r == p ) then + call stdlib_dorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1,work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_dorgqr( p-1, p-1, p-1, u1(2,2), ldu1, dum1,work(1), -1, childinfo & + ) + lorgqrmin = max( lorgqrmin, p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_dorgqr( m-p, m-p, q, u2, ldu2, dum1, work(1),-1, childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dorglq( q, q, r, v1t, ldv1t, dum1, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_dbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum1, v1t, & + ldv1t, dum2, 1, u1, ldu1,u2, ldu2, dum1, dum1, dum1,dum1, dum1, dum1, dum1,dum1, & + work(1), -1, childinfo ) + lbbcsd = int( work(1),KIND=ilp) + else if( r == m-p ) then + call stdlib_dorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1,work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_dorgqr( p, p, q, u1, ldu1, dum1, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_dorgqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2,dum1, work(1), -1, & + childinfo ) + lorgqrmin = max( lorgqrmin, m-p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dorglq( q, q, r, v1t, ldv1t, dum1, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_dbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum1, & + dum2, 1, v1t, ldv1t, u2,ldu2, u1, ldu1, dum1, dum1, dum1,dum1, dum1, dum1, dum1,& + dum1, work(1), -1, childinfo ) + lbbcsd = int( work(1),KIND=ilp) + else + call stdlib_dorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1,dum1, work(1), -1, childinfo ) + lorbdb = m + int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_dorgqr( p, p, m-q, u1, ldu1, dum1, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_dorgqr( m-p, m-p, m-q, u2, ldu2, dum1, work(1),-1, childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dorglq( q, q, q, v1t, ldv1t, dum1, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_dbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum1, u2, & + ldu2, u1, ldu1, dum2,1, v1t, ldv1t, dum1, dum1, dum1,dum1, dum1, dum1, dum1,dum1,& + work(1), -1, childinfo ) + lbbcsd = int( work(1),KIND=ilp) + end if + lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1,ibbcsd+lbbcsd-& + 1 ) + lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1,ibbcsd+lbbcsd-& + 1 ) + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -19 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DORCSD2BY1', -info ) + return + else if( lquery ) then + return + end if + lorgqr = lwork-iorgqr+1 + lorglq = lwork-iorglq+1 + ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, + ! in which r = min(p,m-p,q,m-q) + if( r == q ) then + ! case 1: r = q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_dorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_dlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_dorgqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_dlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_dorgqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + v1t(1,1) = one + do j = 2, q + v1t(1,j) = zero + v1t(j,1) = zero + end do + call stdlib_dlacpy( 'U', q-1, q-1, x21(1,2), ldx21, v1t(2,2),ldv1t ) + call stdlib_dorglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglq, childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_dbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,work(iphi), u1, & + ldu1, u2, ldu2, v1t, ldv1t,dum2, 1, work(ib11d), work(ib11e),work(ib12d), work(& + ib12e), work(ib21d),work(ib21e), work(ib22d), work(ib22e),work(ibbcsd), lbbcsd, & + childinfo ) + ! permute rows and columns to place zero submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_dlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == p ) then + ! case 2: r = p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_dorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + u1(1,1) = one + do j = 2, p + u1(1,j) = zero + u1(j,1) = zero + end do + call stdlib_dlacpy( 'L', p-1, p-1, x11(2,1), ldx11, u1(2,2), ldu1 ) + call stdlib_dorgqr( p-1, p-1, p-1, u1(2,2), ldu1, work(itaup1),work(iorgqr), & + lorgqr, childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_dlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_dorgqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dlacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) + call stdlib_dorglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_dbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,work(iphi), v1t, & + ldv1t, dum2, 1, u1, ldu1, u2,ldu2, work(ib11d), work(ib11e), work(ib12d),work(ib12e)& + , work(ib21d), work(ib21e),work(ib22d), work(ib22e), work(ibbcsd), lbbcsd,childinfo & + ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_dlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == m-p ) then + ! case 3: r = m-p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_dorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_dlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_dorgqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + u2(1,1) = one + do j = 2, m-p + u2(1,j) = zero + u2(j,1) = zero + end do + call stdlib_dlacpy( 'L', m-p-1, m-p-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_dorgqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2,work(itaup2), work(iorgqr)& + , lorgqr, childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dlacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) + call stdlib_dorglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_dbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, work(iphi), & + dum2, 1, v1t, ldv1t, u2,ldu2, u1, ldu1, work(ib11d), work(ib11e),work(ib12d), work(& + ib12e), work(ib21d),work(ib21e), work(ib22d), work(ib22e),work(ibbcsd), lbbcsd, & + childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > r ) then + do i = 1, r + iwork(i) = q - r + i + end do + do i = r + 1, q + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_dlapmt( .false., p, q, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_dlapmr( .false., q, q, v1t, ldv1t, iwork ) + end if + end if + else + ! case 4: r = m-q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_dorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, childinfo ) + + ! accumulate householder reflectors + if( wantu2 .and. m-p > 0 ) then + call stdlib_dcopy( m-p, work(iorbdb+p), 1, u2, 1 ) + end if + if( wantu1 .and. p > 0 ) then + call stdlib_dcopy( p, work(iorbdb), 1, u1, 1 ) + do j = 2, p + u1(1,j) = zero + end do + call stdlib_dlacpy( 'L', p-1, m-q-1, x11(2,1), ldx11, u1(2,2),ldu1 ) + call stdlib_dorgqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + do j = 2, m-p + u2(1,j) = zero + end do + call stdlib_dlacpy( 'L', m-p-1, m-q-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_dorgqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_dlacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) + call stdlib_dlacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& + 1), ldv1t ) + call stdlib_dlacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) + + call stdlib_dorglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_dbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, work(iphi), & + u2, ldu2, u1, ldu1, dum2,1, v1t, ldv1t, work(ib11d), work(ib11e),work(ib12d), work(& + ib12e), work(ib21d),work(ib21e), work(ib22d), work(ib22e),work(ibbcsd), lbbcsd, & + childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( p > r ) then + do i = 1, r + iwork(i) = p - r + i + end do + do i = r + 1, p + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_dlapmt( .false., p, p, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_dlapmr( .false., p, q, v1t, ldv1t, iwork ) + end if + end if + end if + return + end subroutine stdlib_dorcsd2by1 + + !> DORGTR: generates a real orthogonal matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> DSYTRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_dorgtr( uplo, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, j, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + ! generate q(2:n,2:n) + call stdlib_dorgqr( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dorgtr + + !> DORGTSQR: generates an M-by-N real matrix Q_out with orthonormal columns, + !> which are the first N columns of a product of real orthogonal + !> matrices of order M which are returned by DLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for DLATSQR. + + pure subroutine stdlib_dorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + lquery = lwork==-1 + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. m DORMTR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by DSYTRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_dormtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery, upper + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )& + then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda DPBTRF: computes the Cholesky factorization of a real symmetric + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_dpbtrf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 32 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ib, ii, j, jj, nb + ! Local Arrays + real(dp) :: work(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( .not.stdlib_lsame( uplo, 'U' ) ) .and.( .not.stdlib_lsame( uplo, 'L' ) ) ) & + then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldabkd ) then + ! use unblocked code + call stdlib_dpbtf2( uplo, n, kd, ab, ldab, info ) + else + ! use blocked code + if( stdlib_lsame( uplo, 'U' ) ) then + ! compute the cholesky factorization of a symmetric band + ! matrix, given the upper triangle of the matrix in band + ! storage. + ! zero the upper triangle of the work array. + do j = 1, nb + do i = 1, j - 1 + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_70: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_dpotf2( uplo, ib, ab( kd+1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 a12 a13 + ! a22 a23 + ! a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a12, a22 and + ! a23 are empty if ib = kd. the upper triangle of a13 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a12 + call stdlib_dtrsm( 'LEFT', 'UPPER', 'TRANSPOSE','NON-UNIT', ib, i2, one,& + ab( kd+1, i ),ldab-1, ab( kd+1-ib, i+ib ), ldab-1 ) + ! update a22 + call stdlib_dsyrk( 'UPPER', 'TRANSPOSE', i2, ib, -one,ab( kd+1-ib, i+ib & + ), ldab-1, one,ab( kd+1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the lower triangle of a13 into the work array. + do jj = 1, i3 + do ii = jj, ib + work( ii, jj ) = ab( ii-jj+1, jj+i+kd-1 ) + end do + end do + ! update a13 (in the work array). + call stdlib_dtrsm( 'LEFT', 'UPPER', 'TRANSPOSE','NON-UNIT', ib, i3, one,& + ab( kd+1, i ),ldab-1, work, ldwork ) + ! update a23 + if( i2>0 )call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', i2, i3,ib, -& + one, ab( kd+1-ib, i+ib ),ldab-1, work, ldwork, one,ab( 1+ib, i+kd ), & + ldab-1 ) + ! update a33 + call stdlib_dsyrk( 'UPPER', 'TRANSPOSE', i3, ib, -one,work, ldwork, one,& + ab( kd+1, i+kd ),ldab-1 ) + ! copy the lower triangle of a13 back into place. + do jj = 1, i3 + do ii = jj, ib + ab( ii-jj+1, jj+i+kd-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_70 + else + ! compute the cholesky factorization of a symmetric band + ! matrix, given the lower triangle of the matrix in band + ! storage. + ! zero the lower triangle of the work array. + do j = 1, nb + do i = j + 1, nb + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_140: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_dpotf2( uplo, ib, ab( 1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 + ! a21 a22 + ! a31 a32 a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a21, a22 and + ! a32 are empty if ib = kd. the lower triangle of a31 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a21 + call stdlib_dtrsm( 'RIGHT', 'LOWER', 'TRANSPOSE','NON-UNIT', i2, ib, & + one, ab( 1, i ),ldab-1, ab( 1+ib, i ), ldab-1 ) + ! update a22 + call stdlib_dsyrk( 'LOWER', 'NO TRANSPOSE', i2, ib, -one,ab( 1+ib, i ), & + ldab-1, one,ab( 1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the upper triangle of a31 into the work array. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + work( ii, jj ) = ab( kd+1-jj+ii, jj+i-1 ) + end do + end do + ! update a31 (in the work array). + call stdlib_dtrsm( 'RIGHT', 'LOWER', 'TRANSPOSE','NON-UNIT', i3, ib, & + one, ab( 1, i ),ldab-1, work, ldwork ) + ! update a32 + if( i2>0 )call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', i3, i2,ib, -& + one, work, ldwork,ab( 1+ib, i ), ldab-1, one,ab( 1+kd-ib, i+ib ), ldab-& + 1 ) + ! update a33 + call stdlib_dsyrk( 'LOWER', 'NO TRANSPOSE', i3, ib, -one,work, ldwork, & + one, ab( 1, i+kd ),ldab-1 ) + ! copy the upper triangle of a31 back into place. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + ab( kd+1-jj+ii, jj+i-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_140 + end if + end if + return + 150 continue + return + end subroutine stdlib_dpbtrf + + !> DPFTRI: computes the inverse of a (real) symmetric positive definite + !> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T + !> computed by DPFTRF. + + pure subroutine stdlib_dpftri( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DPFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_dtftri( transr, uplo, 'N', n, a, info ) + if( info>0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution of triangular matrix multiply: inv(u)*inv(u)^c or + ! inv(l)^c*inv(l). there are eight cases. + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_dlauum( 'L', n1, a( 0 ), n, info ) + call stdlib_dsyrk( 'L', 'T', n1, n2, one, a( n1 ), n, one,a( 0 ), n ) + call stdlib_dtrmm( 'L', 'U', 'N', 'N', n2, n1, one, a( n ), n,a( n1 ), n ) + + call stdlib_dlauum( 'U', n2, a( n ), n, info ) + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_dlauum( 'L', n1, a( n2 ), n, info ) + call stdlib_dsyrk( 'L', 'N', n1, n2, one, a( 0 ), n, one,a( n2 ), n ) + call stdlib_dtrmm( 'R', 'U', 'T', 'N', n1, n2, one, a( n1 ), n,a( 0 ), n ) + + call stdlib_dlauum( 'U', n2, a( n1 ), n, info ) + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose, and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_dlauum( 'U', n1, a( 0 ), n1, info ) + call stdlib_dsyrk( 'U', 'N', n1, n2, one, a( n1*n1 ), n1, one,a( 0 ), n1 ) + + call stdlib_dtrmm( 'R', 'L', 'N', 'N', n1, n2, one, a( 1 ), n1,a( n1*n1 ), n1 & + ) + call stdlib_dlauum( 'L', n2, a( 1 ), n1, info ) + else + ! srpa for upper, transpose, and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_dlauum( 'U', n1, a( n2*n2 ), n2, info ) + call stdlib_dsyrk( 'U', 'T', n1, n2, one, a( 0 ), n2, one,a( n2*n2 ), n2 ) + + call stdlib_dtrmm( 'L', 'L', 'T', 'N', n2, n1, one, a( n1*n2 ),n2, a( 0 ), n2 & + ) + call stdlib_dlauum( 'L', n2, a( n1*n2 ), n2, info ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_dlauum( 'L', k, a( 1 ), n+1, info ) + call stdlib_dsyrk( 'L', 'T', k, k, one, a( k+1 ), n+1, one,a( 1 ), n+1 ) + + call stdlib_dtrmm( 'L', 'U', 'N', 'N', k, k, one, a( 0 ), n+1,a( k+1 ), n+1 ) + + call stdlib_dlauum( 'U', k, a( 0 ), n+1, info ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_dlauum( 'L', k, a( k+1 ), n+1, info ) + call stdlib_dsyrk( 'L', 'N', k, k, one, a( 0 ), n+1, one,a( k+1 ), n+1 ) + + call stdlib_dtrmm( 'R', 'U', 'T', 'N', k, k, one, a( k ), n+1,a( 0 ), n+1 ) + + call stdlib_dlauum( 'U', k, a( k ), n+1, info ) + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose, and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1), + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_dlauum( 'U', k, a( k ), k, info ) + call stdlib_dsyrk( 'U', 'N', k, k, one, a( k*( k+1 ) ), k, one,a( k ), k ) + + call stdlib_dtrmm( 'R', 'L', 'N', 'N', k, k, one, a( 0 ), k,a( k*( k+1 ) ), k & + ) + call stdlib_dlauum( 'L', k, a( 0 ), k, info ) + else + ! srpa for upper, transpose, and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0), + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_dlauum( 'U', k, a( k*( k+1 ) ), k, info ) + call stdlib_dsyrk( 'U', 'T', k, k, one, a( 0 ), k, one,a( k*( k+1 ) ), k ) + + call stdlib_dtrmm( 'L', 'L', 'T', 'N', k, k, one, a( k*k ), k,a( 0 ), k ) + + call stdlib_dlauum( 'L', k, a( k*k ), k, info ) + end if + end if + end if + return + end subroutine stdlib_dpftri + + !> DPOTRF: computes the Cholesky factorization of a real symmetric + !> positive definite matrix A. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_dpotrf( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code. + call stdlib_dpotrf2( uplo, n, a, lda, info ) + else + ! use blocked code. + if( upper ) then + ! compute the cholesky factorization a = u**t*u. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_dsyrk( 'UPPER', 'TRANSPOSE', jb, j-1, -one,a( 1, j ), lda, one, a(& + j, j ), lda ) + call stdlib_dpotrf2( 'UPPER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block row. + call stdlib_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', jb, n-j-jb+1,j-1, -one, a( & + 1, j ), lda, a( 1, j+jb ),lda, one, a( j, j+jb ), lda ) + call stdlib_dtrsm( 'LEFT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',jb, n-j-jb+1, & + one, a( j, j ), lda,a( j, j+jb ), lda ) + end if + end do + else + ! compute the cholesky factorization a = l*l**t. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_dsyrk( 'LOWER', 'NO TRANSPOSE', jb, j-1, -one,a( j, 1 ), lda, one,& + a( j, j ), lda ) + call stdlib_dpotrf2( 'LOWER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block column. + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', n-j-jb+1, jb,j-1, -one, a( & + j+jb, 1 ), lda, a( j, 1 ),lda, one, a( j+jb, j ), lda ) + call stdlib_dtrsm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',n-j-jb+1, jb, & + one, a( j, j ), lda,a( j+jb, j ), lda ) + end if + end do + end if + end if + go to 40 + 30 continue + info = info + j - 1 + 40 continue + return + end subroutine stdlib_dpotrf + + !> DPTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric positive definite + !> and tridiagonal, and provides error bounds and backward error + !> estimates for the solution. + + pure subroutine stdlib_dptrfs( n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(in) :: b(ldb,*), d(*), df(*), e(*), ef(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*) + real(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + integer(ilp) :: count, i, ix, j, nz + real(dp) :: bi, cx, dx, eps, ex, lstres, s, safe1, safe2, safmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_dpttrs( n, 1, df, ef, work( n+1 ), n, info ) + call stdlib_daxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + ix = stdlib_idamax( n, work, 1 ) + ferr( j ) = work( ix ) + ! estimate the norm of inv(a). + ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by + ! m(i,j) = abs(a(i,j)), i = j, + ! m(i,j) = -abs(a(i,j)), i .ne. j, + ! and e = [ 1, 1, ..., 1 ]**t. note m(a) = m(l)*d*m(l)**t. + ! solve m(l) * x = e. + work( 1 ) = one + do i = 2, n + work( i ) = one + work( i-1 )*abs( ef( i-1 ) ) + end do + ! solve d * m(l)**t * x = b. + work( n ) = work( n ) / df( n ) + do i = n - 1, 1, -1 + work( i ) = work( i ) / df( i ) + work( i+1 )*abs( ef( i ) ) + end do + ! compute norm(inv(a)) = max(x(i)), 1<=i<=n. + ix = stdlib_idamax( n, work, 1 ) + ferr( j ) = ferr( j )*abs( work( ix ) ) + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_90 + return + end subroutine stdlib_dptrfs + + !> DPTSV: computes the solution to a real system of linear equations + !> A*X = B, where A is an N-by-N symmetric positive definite tridiagonal + !> matrix, and X and B are N-by-NRHS matrices. + !> A is factored as A = L*D*L**T, and the factored form of A is then + !> used to solve the system of equations. + + pure subroutine stdlib_dptsv( n, nrhs, d, e, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: b(ldb,*), d(*), e(*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb DPTSVX: uses the factorization A = L*D*L**T to compute the solution + !> to a real system of linear equations A*X = B, where A is an N-by-N + !> symmetric positive definite tridiagonal matrix and X and B are + !> N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_dptsvx( fact, n, nrhs, d, e, df, ef, b, ldb, x, ldx,rcond, ferr, berr,& + work, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(in) :: b(ldb,*), d(*), e(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + real(dp), intent(inout) :: df(*), ef(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb1 )call stdlib_dcopy( n-1, e, 1, ef, 1 ) + call stdlib_dpttrf( n, df, ef, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_dlanst( '1', n, d, e ) + ! compute the reciprocal of the condition number of a. + call stdlib_dptcon( n, df, ef, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dpttrs( n, nrhs, df, ef, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_dptrfs( n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr, berr,work, info ) + + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DSBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a real symmetric band matrix A. + + subroutine stdlib_dsbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, wantz + integer(ilp) :: iinfo, imax, inde, indwrk, iscale + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_dlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_dlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_dsbtrd to reduce symmetric band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + call stdlib_dsbtrd( jobz, uplo, n, kd, ab, ldab, w, work( inde ), z, ldz,work( indwrk )& + , iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, call stdlib_ssteqr. + if( .not.wantz ) then + call stdlib_dsterf( n, w, work( inde ), info ) + else + call stdlib_dsteqr( jobz, n, w, work( inde ), z, ldz, work( indwrk ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_dsbev + + !> DSBEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric band matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_dsbevx( jobz, range, uplo, n, kd, ab, ldab, q, ldq, vl,vu, il, iu, abstol, & + m, w, z, ldz, work, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, kd, ldab, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(out) :: q(ldq,*), w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indwrk, & + iscale, itmp1, j, jj, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=tmp1 ) )m = 0 + end if + if( m==1 ) then + w( 1 ) = tmp1 + if( wantz )z( 1, 1 ) = one + end if + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + anrm = stdlib_dlansb( 'M', uplo, n, kd, ab, ldab, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_dlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_dlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_dsbtrd to reduce symmetric band matrix to tridiagonal form. + indd = 1 + inde = indd + n + indwrk = inde + n + call stdlib_dsbtrd( jobz, uplo, n, kd, ab, ldab, work( indd ),work( inde ), q, ldq, & + work( indwrk ), iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_dsterf or stdlib_ssteqr. if this fails for some + ! eigenvalue, then try stdlib_dstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_dcopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsterf( n, w, work( indee ), info ) + else + call stdlib_dlacpy( 'A', n, n, q, ldq, z, ldz ) + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_dstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_dstein. + do j = 1, m + call stdlib_dcopy( n, z( 1, j ), 1, work( 1 ), 1 ) + call stdlib_dgemv( 'N', n, n, one, q, ldq, work, 1, zero,z( 1, j ), 1 ) + end do + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric + !> and banded, and B is also positive definite. + + pure subroutine stdlib_dsbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, n + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwrk + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab DSBGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric + !> and banded, and B is also positive definite. Eigenvalues and + !> eigenvectors can be selected by specifying either all eigenvalues, + !> a range of values or a range of indices for the desired eigenvalues. + + pure subroutine stdlib_dsbgvx( jobz, range, uplo, n, ka, kb, ab, ldab, bb,ldbb, q, ldq, vl, & + vu, il, iu, abstol, m, w, z,ldz, work, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ka, kb, ldab, ldbb, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + real(dp), intent(out) :: q(ldq,*), w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, upper, valeig, wantz + character :: order, vect + integer(ilp) :: i, iinfo, indd, inde, indee, indibl, indisp, indiwo, indwrk, itmp1, j, & + jj, nsplit + real(dp) :: tmp1 + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ka<0 ) then + info = -5 + else if( kb<0 .or. kb>ka ) then + info = -6 + else if( ldab0 .and. vu<=vl )info = -14 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -15 + else if ( iun ) then + info = -16 + end if + end if + end if + if( info==0) then + if( ldz<1 .or. ( wantz .and. ldz DSGESV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> DSGESV first attempts to factorize the matrix in SINGLE PRECISION + !> and use this factorization within an iterative refinement procedure + !> to produce a solution with DOUBLE PRECISION normwise backward error + !> quality (see below). If the approach fails the method switches to a + !> DOUBLE PRECISION factorization and solve. + !> The iterative refinement is not going to be a winning strategy if + !> the ratio SINGLE PRECISION performance over DOUBLE PRECISION + !> performance is too small. A reasonable strategy should take the + !> number of right-hand sides and the size of the matrix into account. + !> This might be done with a call to ILAENV in the future. Up to now, we + !> always try iterative refinement. + !> The iterative refinement process is stopped if + !> ITER > ITERMAX + !> or for all the RHS we have: + !> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX + !> where + !> o ITER is the number of the current iteration in the iterative + !> refinement process + !> o RNRM is the infinity-norm of the residual + !> o XNRM is the infinity-norm of the solution + !> o ANRM is the infinity-operator-norm of the matrix A + !> o EPS is the machine epsilon returned by DLAMCH('Epsilon') + !> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 + !> respectively. + + subroutine stdlib_dsgesv( n, nrhs, a, lda, ipiv, b, ldb, x, ldx, work,swork, iter, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, iter + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(out) :: swork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: b(ldb,*) + real(dp), intent(out) :: work(n,*), x(ldx,*) + ! ===================================================================== + ! Parameters + logical(lk), parameter :: doitref = .true. + integer(ilp), parameter :: itermax = 30 + real(dp), parameter :: bwdmax = 1.0e+00_dp + + + + + ! Local Scalars + integer(ilp) :: i, iiter, ptsa, ptsx + real(dp) :: anrm, cte, eps, rnrm, xnrm + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Executable Statements + info = 0 + iter = 0 + ! test the input parameters. + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldaxnrm*cte )go to 10 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion. we are good to exit. + iter = 0 + return + 10 continue + loop_30: do iiter = 1, itermax + ! convert r (in work) from double precision to single precision + ! and store the result in sx. + call stdlib_dlag2s( n, nrhs, work, n, swork( ptsx ), n, info ) + if( info/=0 ) then + iter = -2 + go to 40 + end if + ! solve the system sa*sx = sr. + call stdlib_sgetrs( 'NO TRANSPOSE', n, nrhs, swork( ptsa ), n, ipiv,swork( ptsx ), & + n, info ) + ! convert sx back to double precision and update the current + ! iterate. + call stdlib_slag2d( n, nrhs, swork( ptsx ), n, work, n, info ) + do i = 1, nrhs + call stdlib_daxpy( n, one, work( 1, i ), 1, x( 1, i ), 1 ) + end do + ! compute r = b - ax (r is work). + call stdlib_dlacpy( 'ALL', n, nrhs, b, ldb, work, n ) + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n, nrhs, n, negone,a, lda, x, & + ldx, one, work, n ) + ! check whether the nrhs normwise backward errors satisfy the + ! stopping criterion. if yes, set iter=iiter>0 and return. + do i = 1, nrhs + xnrm = abs( x( stdlib_idamax( n, x( 1, i ), 1 ), i ) ) + rnrm = abs( work( stdlib_idamax( n, work( 1, i ), 1 ), i ) ) + if( rnrm>xnrm*cte )go to 20 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion, we are good to exit. + iter = iiter + return + 20 continue + end do loop_30 + ! if we are at this place of the code, this is because we have + ! performed iter=itermax iterations and never satisfied the + ! stopping criterion, set up the iter flag accordingly and follow up + ! on double precision routine. + iter = -itermax - 1 + 40 continue + ! single-precision iterative refinement failed to converge to a + ! satisfactory solution, so we resort to double precision. + call stdlib_dgetrf( n, n, a, lda, ipiv, info ) + if( info/=0 )return + call stdlib_dlacpy( 'ALL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dgetrs( 'NO TRANSPOSE', n, nrhs, a, lda, ipiv, x, ldx,info ) + return + end subroutine stdlib_dsgesv + + !> DSPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A in packed storage. + + subroutine stdlib_dspev( jobz, uplo, n, ap, w, z, ldz, work, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_dscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_dsptrd to reduce symmetric packed matrix to tridiagonal form. + inde = 1 + indtau = inde + n + call stdlib_dsptrd( uplo, n, ap, w, work( inde ), work( indtau ), iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_dopgtr to generate the orthogonal matrix, then call stdlib_dsteqr. + if( .not.wantz ) then + call stdlib_dsterf( n, w, work( inde ), info ) + else + indwrk = indtau + n + call stdlib_dopgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_dsteqr( jobz, n, w, work( inde ), z, ldz, work( indtau ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_dspev + + !> DSPEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A in packed storage. Eigenvalues/vectors + !> can be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_dspevx( jobz, range, uplo, n, ap, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, iwork, ifail,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indtau, & + indwrk, iscale, itmp1, j, jj, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=ap( 1 ) ) then + m = 1 + w( 1 ) = ap( 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + anrm = stdlib_dlansp( 'M', uplo, n, ap, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_dscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_dsptrd to reduce symmetric packed matrix to tridiagonal form. + indtau = 1 + inde = indtau + n + indd = inde + n + indwrk = indd + n + call stdlib_dsptrd( uplo, n, ap, work( indd ), work( inde ),work( indtau ), iinfo ) + + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_dsterf or stdlib_dopgtr and stdlib_ssteqr. if this fails + ! for some eigenvalue, then try stdlib_dstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_dcopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsterf( n, w, work( indee ), info ) + else + call stdlib_dopgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 20 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_dstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_dstein. + call stdlib_dopmtr( 'L', uplo, 'N', n, m, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 20 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric, stored in packed format, + !> and B is also positive definite. + + subroutine stdlib_dspgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, n + ! Array Arguments + real(dp), intent(inout) :: ap(*), bp(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: trans + integer(ilp) :: j, neig + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + do j = 1, neig + call stdlib_dtpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_dtpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_dspgv + + !> DSPGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A + !> and B are assumed to be symmetric, stored in packed storage, and B + !> is also positive definite. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of indices + !> for the desired eigenvalues. + + subroutine stdlib_dspgvx( itype, jobz, range, uplo, n, ap, bp, vl, vu,il, iu, abstol, m, w, & + z, ldz, work, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(inout) :: ap(*), bp(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: alleig, indeig, upper, valeig, wantz + character :: trans + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + upper = stdlib_lsame( uplo, 'U' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else + if( valeig ) then + if( n>0 .and. vu<=vl ) then + info = -9 + end if + else if( indeig ) then + if( il<1 ) then + info = -10 + else if( iun ) then + info = -11 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + do j = 1, m + call stdlib_dtpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + do j = 1, m + call stdlib_dtpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_dspgvx + + !> DSPOSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> DSPOSV first attempts to factorize the matrix in SINGLE PRECISION + !> and use this factorization within an iterative refinement procedure + !> to produce a solution with DOUBLE PRECISION normwise backward error + !> quality (see below). If the approach fails the method switches to a + !> DOUBLE PRECISION factorization and solve. + !> The iterative refinement is not going to be a winning strategy if + !> the ratio SINGLE PRECISION performance over DOUBLE PRECISION + !> performance is too small. A reasonable strategy should take the + !> number of right-hand sides and the size of the matrix into account. + !> This might be done with a call to ILAENV in the future. Up to now, we + !> always try iterative refinement. + !> The iterative refinement process is stopped if + !> ITER > ITERMAX + !> or for all the RHS we have: + !> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX + !> where + !> o ITER is the number of the current iteration in the iterative + !> refinement process + !> o RNRM is the infinity-norm of the residual + !> o XNRM is the infinity-norm of the solution + !> o ANRM is the infinity-operator-norm of the matrix A + !> o EPS is the machine epsilon returned by DLAMCH('Epsilon') + !> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 + !> respectively. + + subroutine stdlib_dsposv( uplo, n, nrhs, a, lda, b, ldb, x, ldx, work,swork, iter, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, iter + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(out) :: swork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: b(ldb,*) + real(dp), intent(out) :: work(n,*), x(ldx,*) + ! ===================================================================== + ! Parameters + logical(lk), parameter :: doitref = .true. + integer(ilp), parameter :: itermax = 30 + real(dp), parameter :: bwdmax = 1.0e+00_dp + + + + + ! Local Scalars + integer(ilp) :: i, iiter, ptsa, ptsx + real(dp) :: anrm, cte, eps, rnrm, xnrm + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Executable Statements + info = 0 + iter = 0 + ! test the input parameters. + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldaxnrm*cte )go to 10 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion. we are good to exit. + iter = 0 + return + 10 continue + loop_30: do iiter = 1, itermax + ! convert r (in work) from double precision to single precision + ! and store the result in sx. + call stdlib_dlag2s( n, nrhs, work, n, swork( ptsx ), n, info ) + if( info/=0 ) then + iter = -2 + go to 40 + end if + ! solve the system sa*sx = sr. + call stdlib_spotrs( uplo, n, nrhs, swork( ptsa ), n, swork( ptsx ), n,info ) + ! convert sx back to double precision and update the current + ! iterate. + call stdlib_slag2d( n, nrhs, swork( ptsx ), n, work, n, info ) + do i = 1, nrhs + call stdlib_daxpy( n, one, work( 1, i ), 1, x( 1, i ), 1 ) + end do + ! compute r = b - ax (r is work). + call stdlib_dlacpy( 'ALL', n, nrhs, b, ldb, work, n ) + call stdlib_dsymm( 'L', uplo, n, nrhs, negone, a, lda, x, ldx, one,work, n ) + ! check whether the nrhs normwise backward errors satisfy the + ! stopping criterion. if yes, set iter=iiter>0 and return. + do i = 1, nrhs + xnrm = abs( x( stdlib_idamax( n, x( 1, i ), 1 ), i ) ) + rnrm = abs( work( stdlib_idamax( n, work( 1, i ), 1 ), i ) ) + if( rnrm>xnrm*cte )go to 20 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion, we are good to exit. + iter = iiter + return + 20 continue + end do loop_30 + ! if we are at this place of the code, this is because we have + ! performed iter=itermax iterations and never satisfied the + ! stopping criterion, set up the iter flag accordingly and follow + ! up on double precision routine. + iter = -itermax - 1 + 40 continue + ! single-precision iterative refinement failed to converge to a + ! satisfactory solution, so we resort to double precision. + call stdlib_dpotrf( uplo, n, a, lda, info ) + if( info/=0 )return + call stdlib_dlacpy( 'ALL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dpotrs( uplo, n, nrhs, a, lda, x, ldx, info ) + return + end subroutine stdlib_dsposv + + !> DSYEV: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. + + subroutine stdlib_dsyev( jobz, uplo, n, a, lda, w, work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale, llwork, lwkopt, nb + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_dlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_dsytrd to reduce symmetric matrix to tridiagonal form. + inde = 1 + indtau = inde + n + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_dsytrd( uplo, n, a, lda, w, work( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_dorgtr to generate the orthogonal matrix, then call stdlib_dsteqr. + if( .not.wantz ) then + call stdlib_dsterf( n, w, work( inde ), info ) + else + call stdlib_dorgtr( uplo, n, a, lda, work( indtau ), work( indwrk ),llwork, iinfo ) + + call stdlib_dsteqr( jobz, n, w, work( inde ), a, lda, work( indtau ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! set work(1) to optimal workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_dsyev + + !> DSYEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of indices + !> for the desired eigenvalues. + + subroutine stdlib_dsyevx( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, lwork, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indtau, & + indwkn, indwrk, iscale, itmp1, j, jj, llwork, llwrkn, lwkmin, lwkopt, nb, & + nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=a( 1, 1 ) ) then + m = 1 + w( 1 ) = a( 1, 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + end if + anrm = stdlib_dlansy( 'M', uplo, n, a, lda, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_dscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_dscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_dsytrd to reduce symmetric matrix to tridiagonal form. + indtau = 1 + inde = indtau + n + indd = inde + n + indwrk = indd + n + llwork = lwork - indwrk + 1 + call stdlib_dsytrd( uplo, n, a, lda, work( indd ), work( inde ),work( indtau ), work( & + indwrk ), llwork, iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal to + ! zero, then call stdlib_dsterf or stdlib_dorgtr and stdlib_ssteqr. if this fails for + ! some eigenvalue, then try stdlib_dstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_dcopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsterf( n, w, work( indee ), info ) + else + call stdlib_dlacpy( 'A', n, n, a, lda, z, ldz ) + call stdlib_dorgtr( uplo, n, z, ldz, work( indtau ),work( indwrk ), llwork, & + iinfo ) + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 40 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_dstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_dstein. + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_dormtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 40 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSYGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric and B is also + !> positive definite. + + subroutine stdlib_dsygv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: lwkmin, lwkopt, nb, neig + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + call stdlib_dtrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, one,b, ldb, a, lda ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + call stdlib_dtrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, one,b, ldb, a, lda ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dsygv + + !> DSYGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A + !> and B are assumed to be symmetric and B is also positive definite. + !> Eigenvalues and eigenvectors can be selected by specifying either a + !> range of values or a range of indices for the desired eigenvalues. + + subroutine stdlib_dsygvx( itype, jobz, range, uplo, n, a, lda, b, ldb,vl, vu, il, iu, abstol,& + m, w, z, ldz, work,lwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, lda, ldb, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, upper, valeig, wantz + character :: trans + integer(ilp) :: lwkmin, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + upper = stdlib_lsame( uplo, 'U' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if (info==0) then + if (ldz<1 .or. (wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + call stdlib_dtrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, one, b,ldb, z, ldz ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + call stdlib_dtrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, one, b,ldb, z, ldz ) + + end if + end if + ! set work(1) to optimal workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_dsygvx + + !> DSYSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_dsysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DSYSVX: uses the diagonal pivoting factorization to compute the + !> solution to a real system of linear equations A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_dsysvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*), b(ldb,*) + real(dp), intent(inout) :: af(ldaf,*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_dlansy( 'I', uplo, n, a, lda, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_dsycon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, iwork,info ) + ! compute the solution vectors x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dsytrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_dsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, iwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DSYTRD_SY2SB: reduces a real symmetric matrix A to real symmetric + !> band-diagonal form AB by a orthogonal similarity transformation: + !> Q**T * A * Q = AB. + + pure subroutine stdlib_dsytrd_sy2sb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldab, lwork, n, kd + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: ab(ldab,*), tau(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: rone = 1.0e+0_dp + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, j, iinfo, lwmin, pn, pk, lk, ldt, ldw, lds2, lds1, ls2, ls1, lw, lt,& + tpos, wpos, s2pos, s1pos + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + ! determine the minimal workspace size required + ! and test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + lwmin = stdlib_ilaenv2stage( 4, 'DSYTRD_SY2SB', '', n, kd, -1, -1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( lda DTGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of real matrices (S,P), where S is a quasi-triangular matrix + !> and P is upper triangular. Matrix pairs of this type are produced by + !> the generalized Schur factorization of a matrix pair (A,B): + !> A = Q*S*Z**T, B = Q*P*Z**T + !> as computed by DGGHRD + DHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal blocks of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the orthogonal factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + + pure subroutine stdlib_dtgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, & + mm, m, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldp, lds, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(dp), intent(in) :: p(ldp,*), s(lds,*) + real(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: safety = 1.0e+2_dp + + ! Local Scalars + logical(lk) :: compl, compr, il2by2, ilabad, ilall, ilback, ilbbad, ilcomp, ilcplx, & + lsa, lsb + integer(ilp) :: i, ibeg, ieig, iend, ihwmny, iinfo, im, iside, j, ja, jc, je, jr, jw, & + na, nw + real(dp) :: acoef, acoefa, anorm, ascale, bcoefa, bcoefi, bcoefr, big, bignum, bnorm, & + bscale, cim2a, cim2b, cimaga, cimagb, cre2a, cre2b, creala, crealb, dmin, safmin, & + salfar, sbeta, scale, small, temp, temp2, temp2i, temp2r, ulp, xmax, xscale + ! Local Arrays + real(dp) :: bdiag(2), sum(2,2), sums(2,2), sump(2,2) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! decode and test the input parameters + if( stdlib_lsame( howmny, 'A' ) ) then + ihwmny = 1 + ilall = .true. + ilback = .false. + else if( stdlib_lsame( howmny, 'S' ) ) then + ihwmny = 2 + ilall = .false. + ilback = .false. + else if( stdlib_lsame( howmny, 'B' ) ) then + ihwmny = 3 + ilall = .true. + ilback = .true. + else + ihwmny = -1 + ilall = .true. + end if + if( stdlib_lsame( side, 'R' ) ) then + iside = 1 + compl = .false. + compr = .true. + else if( stdlib_lsame( side, 'L' ) ) then + iside = 2 + compl = .true. + compr = .false. + else if( stdlib_lsame( side, 'B' ) ) then + iside = 3 + compl = .true. + compr = .true. + else + iside = -1 + end if + info = 0 + if( iside<0 ) then + info = -1 + else if( ihwmny<0 ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lds1 )anorm = anorm + abs( s( 2, 1 ) ) + bnorm = abs( p( 1, 1 ) ) + work( 1 ) = zero + work( n+1 ) = zero + do j = 2, n + temp = zero + temp2 = zero + if( s( j, j-1 )==zero ) then + iend = j - 1 + else + iend = j - 2 + end if + do i = 1, iend + temp = temp + abs( s( i, j ) ) + temp2 = temp2 + abs( p( i, j ) ) + end do + work( j ) = temp + work( n+j ) = temp2 + do i = iend + 1, min( j+1, n ) + temp = temp + abs( s( i, j ) ) + temp2 = temp2 + abs( p( i, j ) ) + end do + anorm = max( anorm, temp ) + bnorm = max( bnorm, temp2 ) + end do + ascale = one / max( anorm, safmin ) + bscale = one / max( bnorm, safmin ) + ! left eigenvectors + if( compl ) then + ieig = 0 + ! main loop over eigenvalues + ilcplx = .false. + loop_220: do je = 1, n + ! skip this iteration if (a) howmny='s' and select=.false., or + ! (b) this would be the second of a complex pair. + ! check for complex eigenvalue, so as to be sure of which + ! entry(-ies) of select to look at. + if( ilcplx ) then + ilcplx = .false. + cycle loop_220 + end if + nw = 1 + if( je=safmin .and. abs( acoef )=safmin .and. abs( bcoefr )=safmin )scale = ( safmin / ulp ) / & + acoefa + if( bcoefa*ulp=safmin )scale = max( scale, ( safmin / & + ulp ) / bcoefa ) + if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa ) + if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) ) + + if( scale/=one ) then + acoef = scale*acoef + acoefa = abs( acoef ) + bcoefr = scale*bcoefr + bcoefi = scale*bcoefi + bcoefa = abs( bcoefr ) + abs( bcoefi ) + end if + ! compute first two components of eigenvector + temp = acoef*s( je+1, je ) + temp2r = acoef*s( je, je ) - bcoefr*p( je, je ) + temp2i = -bcoefi*p( je, je ) + if( abs( temp )>abs( temp2r )+abs( temp2i ) ) then + work( 2*n+je ) = one + work( 3*n+je ) = zero + work( 2*n+je+1 ) = -temp2r / temp + work( 3*n+je+1 ) = -temp2i / temp + else + work( 2*n+je+1 ) = one + work( 3*n+je+1 ) = zero + temp = acoef*s( je, je+1 ) + work( 2*n+je ) = ( bcoefr*p( je+1, je+1 )-acoef*s( je+1, je+1 ) ) / & + temp + work( 3*n+je ) = bcoefi*p( je+1, je+1 ) / temp + end if + xmax = max( abs( work( 2*n+je ) )+abs( work( 3*n+je ) ),abs( work( 2*n+je+1 ) & + )+abs( work( 3*n+je+1 ) ) ) + end if + dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin ) + ! t + ! triangular solve of (a a - b b) y = 0 + ! t + ! (rowwise in (a a - b b) , or columnwise in (a a - b b) ) + il2by2 = .false. + loop_160: do j = je + nw, n + if( il2by2 ) then + il2by2 = .false. + cycle loop_160 + end if + na = 1 + bdiag( 1 ) = p( j, j ) + if( jbignum*xscale ) then + do jw = 0, nw - 1 + do jr = je, j - 1 + work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr ) + end do + end do + xmax = xmax*xscale + end if + ! compute dot products + ! j-1 + ! sum = sum conjg( a*s(k,j) - b*p(k,j) )*x(k) + ! k=je + ! to reduce the op count, this is done as + ! _ j-1 _ j-1 + ! a*conjg( sum s(k,j)*x(k) ) - b*conjg( sum p(k,j)*x(k) ) + ! k=je k=je + ! which may cause underflow problems if a or b are close + ! to underflow. (e.g., less than small.) + do jw = 1, nw + do ja = 1, na + sums( ja, jw ) = zero + sump( ja, jw ) = zero + do jr = je, j - 1 + sums( ja, jw ) = sums( ja, jw ) +s( jr, j+ja-1 )*work( ( jw+1 )*n+jr & + ) + sump( ja, jw ) = sump( ja, jw ) +p( jr, j+ja-1 )*work( ( jw+1 )*n+jr & + ) + end do + end do + end do + do ja = 1, na + if( ilcplx ) then + sum( ja, 1 ) = -acoef*sums( ja, 1 ) +bcoefr*sump( ja, 1 ) -bcoefi*sump( & + ja, 2 ) + sum( ja, 2 ) = -acoef*sums( ja, 2 ) +bcoefr*sump( ja, 2 ) +bcoefi*sump( & + ja, 1 ) + else + sum( ja, 1 ) = -acoef*sums( ja, 1 ) +bcoefr*sump( ja, 1 ) + end if + end do + ! t + ! solve ( a a - b b ) y = sum(,) + ! with scaling and perturbation of the denominator + call stdlib_dlaln2( .true., na, nw, dmin, acoef, s( j, j ), lds,bdiag( 1 ), & + bdiag( 2 ), sum, 2, bcoefr,bcoefi, work( 2*n+j ), n, scale, temp,iinfo ) + + if( scalesafmin ) then + xscale = one / xmax + do jw = 0, nw - 1 + do jr = ibeg, n + vl( jr, ieig+jw ) = xscale*vl( jr, ieig+jw ) + end do + end do + end if + ieig = ieig + nw - 1 + end do loop_220 + end if + ! right eigenvectors + if( compr ) then + ieig = im + 1 + ! main loop over eigenvalues + ilcplx = .false. + loop_500: do je = n, 1, -1 + ! skip this iteration if (a) howmny='s' and select=.false., or + ! (b) this would be the second of a complex pair. + ! check for complex eigenvalue, so as to be sure of which + ! entry(-ies) of select to look at -- if complex, select(je) + ! or select(je-1). + ! if this is a complex pair, the 2-by-2 diagonal block + ! corresponding to the eigenvalue is in rows/columns je-1:je + if( ilcplx ) then + ilcplx = .false. + cycle loop_500 + end if + nw = 1 + if( je>1 ) then + if( s( je, je-1 )/=zero ) then + ilcplx = .true. + nw = 2 + end if + end if + if( ilall ) then + ilcomp = .true. + else if( ilcplx ) then + ilcomp = select( je ) .or. select( je-1 ) + else + ilcomp = select( je ) + end if + if( .not.ilcomp )cycle loop_500 + ! decide if (a) singular pencil, (b) real eigenvalue, or + ! (c) complex eigenvalue. + if( .not.ilcplx ) then + if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then + ! singular matrix pencil -- unit eigenvector + ieig = ieig - 1 + do jr = 1, n + vr( jr, ieig ) = zero + end do + vr( ieig, ieig ) = one + cycle loop_500 + end if + end if + ! clear vector + do jw = 0, nw - 1 + do jr = 1, n + work( ( jw+2 )*n+jr ) = zero + end do + end do + ! compute coefficients in ( a a - b b ) x = 0 + ! a is acoef + ! b is bcoefr + i*bcoefi + if( .not.ilcplx ) then + ! real eigenvalue + temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin & + ) + salfar = ( temp*s( je, je ) )*ascale + sbeta = ( temp*p( je, je ) )*bscale + acoef = sbeta*ascale + bcoefr = salfar*bscale + bcoefi = zero + ! scale to avoid underflow + scale = one + lsa = abs( sbeta )>=safmin .and. abs( acoef )=safmin .and. abs( bcoefr )=safmin )scale = ( safmin / ulp ) / & + acoefa + if( bcoefa*ulp=safmin )scale = max( scale, ( safmin / & + ulp ) / bcoefa ) + if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa ) + if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) ) + + if( scale/=one ) then + acoef = scale*acoef + acoefa = abs( acoef ) + bcoefr = scale*bcoefr + bcoefi = scale*bcoefi + bcoefa = abs( bcoefr ) + abs( bcoefi ) + end if + ! compute first two components of eigenvector + ! and contribution to sums + temp = acoef*s( je, je-1 ) + temp2r = acoef*s( je, je ) - bcoefr*p( je, je ) + temp2i = -bcoefi*p( je, je ) + if( abs( temp )>=abs( temp2r )+abs( temp2i ) ) then + work( 2*n+je ) = one + work( 3*n+je ) = zero + work( 2*n+je-1 ) = -temp2r / temp + work( 3*n+je-1 ) = -temp2i / temp + else + work( 2*n+je-1 ) = one + work( 3*n+je-1 ) = zero + temp = acoef*s( je-1, je ) + work( 2*n+je ) = ( bcoefr*p( je-1, je-1 )-acoef*s( je-1, je-1 ) ) / & + temp + work( 3*n+je ) = bcoefi*p( je-1, je-1 ) / temp + end if + xmax = max( abs( work( 2*n+je ) )+abs( work( 3*n+je ) ),abs( work( 2*n+je-1 ) & + )+abs( work( 3*n+je-1 ) ) ) + ! compute contribution from columns je and je-1 + ! of a and b to the sums. + creala = acoef*work( 2*n+je-1 ) + cimaga = acoef*work( 3*n+je-1 ) + crealb = bcoefr*work( 2*n+je-1 ) -bcoefi*work( 3*n+je-1 ) + cimagb = bcoefi*work( 2*n+je-1 ) +bcoefr*work( 3*n+je-1 ) + cre2a = acoef*work( 2*n+je ) + cim2a = acoef*work( 3*n+je ) + cre2b = bcoefr*work( 2*n+je ) - bcoefi*work( 3*n+je ) + cim2b = bcoefi*work( 2*n+je ) + bcoefr*work( 3*n+je ) + do jr = 1, je - 2 + work( 2*n+jr ) = -creala*s( jr, je-1 ) +crealb*p( jr, je-1 ) -cre2a*s( jr, & + je ) + cre2b*p( jr, je ) + work( 3*n+jr ) = -cimaga*s( jr, je-1 ) +cimagb*p( jr, je-1 ) -cim2a*s( jr, & + je ) + cim2b*p( jr, je ) + end do + end if + dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin ) + ! columnwise triangular solve of (a a - b b) x = 0 + il2by2 = .false. + loop_370: do j = je - nw, 1, -1 + ! if a 2-by-2 block, is in position j-1:j, wait until + ! next iteration to process it (when it will be j:j+1) + if( .not.il2by2 .and. j>1 ) then + if( s( j, j-1 )/=zero ) then + il2by2 = .true. + cycle loop_370 + end if + end if + bdiag( 1 ) = p( j, j ) + if( il2by2 ) then + na = 2 + bdiag( 2 ) = p( j+1, j+1 ) + else + na = 1 + end if + ! compute x(j) (and x(j+1), if 2-by-2 block) + call stdlib_dlaln2( .false., na, nw, dmin, acoef, s( j, j ),lds, bdiag( 1 ), & + bdiag( 2 ), work( 2*n+j ),n, bcoefr, bcoefi, sum, 2, scale, temp,iinfo ) + + if( scale1 ) then + ! check whether scaling is necessary for sum. + xscale = one / max( one, xmax ) + temp = acoefa*work( j ) + bcoefa*work( n+j ) + if( il2by2 )temp = max( temp, acoefa*work( j+1 )+bcoefa*work( n+j+1 ) ) + + temp = max( temp, acoefa, bcoefa ) + if( temp>bignum*xscale ) then + do jw = 0, nw - 1 + do jr = 1, je + work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr ) + end do + end do + xmax = xmax*xscale + end if + ! compute the contributions of the off-diagonals of + ! column j (and j+1, if 2-by-2 block) of a and b to the + ! sums. + do ja = 1, na + if( ilcplx ) then + creala = acoef*work( 2*n+j+ja-1 ) + cimaga = acoef*work( 3*n+j+ja-1 ) + crealb = bcoefr*work( 2*n+j+ja-1 ) -bcoefi*work( 3*n+j+ja-1 ) + cimagb = bcoefi*work( 2*n+j+ja-1 ) +bcoefr*work( 3*n+j+ja-1 ) + do jr = 1, j - 1 + work( 2*n+jr ) = work( 2*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(& + jr, j+ja-1 ) + work( 3*n+jr ) = work( 3*n+jr ) -cimaga*s( jr, j+ja-1 ) +cimagb*p(& + jr, j+ja-1 ) + end do + else + creala = acoef*work( 2*n+j+ja-1 ) + crealb = bcoefr*work( 2*n+j+ja-1 ) + do jr = 1, j - 1 + work( 2*n+jr ) = work( 2*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(& + jr, j+ja-1 ) + end do + end if + end do + end if + il2by2 = .false. + end do loop_370 + ! copy eigenvector to vr, back transforming if + ! howmny='b'. + ieig = ieig - nw + if( ilback ) then + do jw = 0, nw - 1 + do jr = 1, n + work( ( jw+4 )*n+jr ) = work( ( jw+2 )*n+1 )*vr( jr, 1 ) + end do + ! a series of compiler directives to defeat + ! vectorization for the next loop + do jc = 2, je + do jr = 1, n + work( ( jw+4 )*n+jr ) = work( ( jw+4 )*n+jr ) +work( ( jw+2 )*n+jc )& + *vr( jr, jc ) + end do + end do + end do + do jw = 0, nw - 1 + do jr = 1, n + vr( jr, ieig+jw ) = work( ( jw+4 )*n+jr ) + end do + end do + iend = n + else + do jw = 0, nw - 1 + do jr = 1, n + vr( jr, ieig+jw ) = work( ( jw+2 )*n+jr ) + end do + end do + iend = je + end if + ! scale eigenvector + xmax = zero + if( ilcplx ) then + do j = 1, iend + xmax = max( xmax, abs( vr( j, ieig ) )+abs( vr( j, ieig+1 ) ) ) + end do + else + do j = 1, iend + xmax = max( xmax, abs( vr( j, ieig ) ) ) + end do + end if + if( xmax>safmin ) then + xscale = one / xmax + do jw = 0, nw - 1 + do jr = 1, iend + vr( jr, ieig+jw ) = xscale*vr( jr, ieig+jw ) + end do + end do + end if + end do loop_500 + end if + return + end subroutine stdlib_dtgevc + + !> DTGEX2: swaps adjacent diagonal blocks (A11, B11) and (A22, B22) + !> of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair + !> (A, B) by an orthogonal equivalence transformation. + !> (A, B) must be in generalized real Schur canonical form (as returned + !> by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 + !> diagonal blocks. B is upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T + !> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T + + pure subroutine stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, n1, n2, & + work, lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, lda, ldb, ldq, ldz, lwork, n, n1, n2 + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_dcopy by calls to stdlib_dlaset, or by do + ! loops. sven hammarling, 1/5/02. + ! Parameters + real(dp), parameter :: twenty = 2.0e+01_dp + integer(ilp), parameter :: ldst = 4 + logical(lk), parameter :: wands = .true. + + + + + ! Local Scalars + logical(lk) :: strong, weak + integer(ilp) :: i, idum, linfo, m + real(dp) :: bqra21, brqa21, ddum, dnorma, dnormb, dscale, dsum, eps, f, g, sa, sb, & + scale, smlnum, thresha, threshb + ! Local Arrays + integer(ilp) :: iwork(ldst) + real(dp) :: ai(2), ar(2), be(2), ir(ldst,ldst), ircop(ldst,ldst), li(ldst,ldst), licop(& + ldst,ldst), s(ldst,ldst), scpy(ldst,ldst), t(ldst,ldst), taul(ldst), taur(ldst), tcpy(& + ldst,ldst) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=1 .or. n1<=0 .or. n2<=0 )return + if( n1>n .or. ( j1+n1 )>n )return + m = n1 + n2 + if( lwork=sb ) then + call stdlib_dlartg( s( 1, 1 ), s( 2, 1 ), li( 1, 1 ), li( 2, 1 ),ddum ) + else + call stdlib_dlartg( t( 1, 1 ), t( 2, 1 ), li( 1, 1 ), li( 2, 1 ),ddum ) + end if + call stdlib_drot( 2, s( 1, 1 ), ldst, s( 2, 1 ), ldst, li( 1, 1 ),li( 2, 1 ) ) + + call stdlib_drot( 2, t( 1, 1 ), ldst, t( 2, 1 ), ldst, li( 1, 1 ),li( 2, 1 ) ) + + li( 2, 2 ) = li( 1, 1 ) + li( 1, 2 ) = -li( 2, 1 ) + ! weak stability test: |s21| <= o(eps f-norm((a))) + ! and |t21| <= o(eps f-norm((b))) + weak = abs( s( 2, 1 ) ) <= thresha .and.abs( t( 2, 1 ) ) <= threshb + if( .not.weak )go to 70 + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_dlacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m ) + call stdlib_dgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + + call stdlib_dgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_dlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sa = dscale*sqrt( dsum ) + call stdlib_dlacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m ) + call stdlib_dgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + + call stdlib_dgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_dlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sb = dscale*sqrt( dsum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 70 + end if + ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and + ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)). + call stdlib_drot( j1+1, a( 1, j1 ), 1, a( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 ) ) + + call stdlib_drot( j1+1, b( 1, j1 ), 1, b( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 ) ) + + call stdlib_drot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda,li( 1, 1 ), li( 2, 1 & + ) ) + call stdlib_drot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb,li( 1, 1 ), li( 2, 1 & + ) ) + ! set n1-by-n2 (2,1) - blocks to zero. + a( j1+1, j1 ) = zero + b( j1+1, j1 ) = zero + ! accumulate transformations into q and z if requested. + if( wantz )call stdlib_drot( n, z( 1, j1 ), 1, z( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 & + ) ) + if( wantq )call stdlib_drot( n, q( 1, j1 ), 1, q( 1, j1+1 ), 1, li( 1, 1 ),li( 2, 1 & + ) ) + ! exit with info = 0 if swap was successfully performed. + return + else + ! case 2: swap 1-by-1 and 2-by-2 blocks, or 2-by-2 + ! and 2-by-2 blocks. + ! solve the generalized sylvester equation + ! s11 * r - l * s22 = scale * s12 + ! t11 * r - l * t22 = scale * t12 + ! for r and l. solutions in li and ir. + call stdlib_dlacpy( 'FULL', n1, n2, t( 1, n1+1 ), ldst, li, ldst ) + call stdlib_dlacpy( 'FULL', n1, n2, s( 1, n1+1 ), ldst,ir( n2+1, n1+1 ), ldst ) + + call stdlib_dtgsy2( 'N', 0, n1, n2, s, ldst, s( n1+1, n1+1 ), ldst,ir( n2+1, n1+1 ),& + ldst, t, ldst, t( n1+1, n1+1 ),ldst, li, ldst, scale, dsum, dscale, iwork, idum,& + linfo ) + if( linfo/=0 )go to 70 + ! compute orthogonal matrix ql: + ! ql**t * li = [ tl ] + ! [ 0 ] + ! where + ! li = [ -l ] + ! [ scale * identity(n2) ] + do i = 1, n2 + call stdlib_dscal( n1, -one, li( 1, i ), 1 ) + li( n1+i, i ) = scale + end do + call stdlib_dgeqr2( m, n2, li, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_dorg2r( m, m, n2, li, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + ! compute orthogonal matrix rq: + ! ir * rq**t = [ 0 tr], + ! where ir = [ scale * identity(n1), r ] + do i = 1, n1 + ir( n2+i, i ) = scale + end do + call stdlib_dgerq2( n1, m, ir( n2+1, 1 ), ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_dorgr2( m, m, n1, ir, ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + ! perform the swapping tentatively: + call stdlib_dgemm( 'T', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + call stdlib_dgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, s,ldst ) + call stdlib_dgemm( 'T', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + call stdlib_dgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, t,ldst ) + call stdlib_dlacpy( 'F', m, m, s, ldst, scpy, ldst ) + call stdlib_dlacpy( 'F', m, m, t, ldst, tcpy, ldst ) + call stdlib_dlacpy( 'F', m, m, ir, ldst, ircop, ldst ) + call stdlib_dlacpy( 'F', m, m, li, ldst, licop, ldst ) + ! triangularize the b-part by an rq factorization. + ! apply transformation (from left) to a-part, giving s. + call stdlib_dgerq2( m, m, t, ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_dormr2( 'R', 'T', m, m, m, t, ldst, taur, s, ldst, work,linfo ) + if( linfo/=0 )go to 70 + call stdlib_dormr2( 'L', 'N', m, m, m, t, ldst, taur, ir, ldst, work,linfo ) + if( linfo/=0 )go to 70 + ! compute f-norm(s21) in brqa21. (t21 is 0.) + dscale = zero + dsum = one + do i = 1, n2 + call stdlib_dlassq( n1, s( n2+1, i ), 1, dscale, dsum ) + end do + brqa21 = dscale*sqrt( dsum ) + ! triangularize the b-part by a qr factorization. + ! apply transformation (from right) to a-part, giving s. + call stdlib_dgeqr2( m, m, tcpy, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_dorm2r( 'L', 'T', m, m, m, tcpy, ldst, taul, scpy, ldst,work, info ) + + call stdlib_dorm2r( 'R', 'N', m, m, m, tcpy, ldst, taul, licop, ldst,work, info ) + + if( linfo/=0 )go to 70 + ! compute f-norm(s21) in bqra21. (t21 is 0.) + dscale = zero + dsum = one + do i = 1, n2 + call stdlib_dlassq( n1, scpy( n2+1, i ), 1, dscale, dsum ) + end do + bqra21 = dscale*sqrt( dsum ) + ! decide which method to use. + ! weak stability test: + ! f-norm(s21) <= o(eps * f-norm((s))) + if( bqra21<=brqa21 .and. bqra21<=thresha ) then + call stdlib_dlacpy( 'F', m, m, scpy, ldst, s, ldst ) + call stdlib_dlacpy( 'F', m, m, tcpy, ldst, t, ldst ) + call stdlib_dlacpy( 'F', m, m, ircop, ldst, ir, ldst ) + call stdlib_dlacpy( 'F', m, m, licop, ldst, li, ldst ) + else if( brqa21>=thresha ) then + go to 70 + end if + ! set lower triangle of b-part to zero + call stdlib_dlaset( 'LOWER', m-1, m-1, zero, zero, t(2,1), ldst ) + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_dlacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m ) + call stdlib_dgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + + call stdlib_dgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_dlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sa = dscale*sqrt( dsum ) + call stdlib_dlacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m ) + call stdlib_dgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + + call stdlib_dgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_dlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sb = dscale*sqrt( dsum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 70 + end if + ! if the swap is accepted ("weakly" and "strongly"), apply the + ! transformations and set n1-by-n2 (2,1)-block to zero. + call stdlib_dlaset( 'FULL', n1, n2, zero, zero, s(n2+1,1), ldst ) + ! copy back m-by-m diagonal block starting at index j1 of (a, b) + call stdlib_dlacpy( 'F', m, m, s, ldst, a( j1, j1 ), lda ) + call stdlib_dlacpy( 'F', m, m, t, ldst, b( j1, j1 ), ldb ) + call stdlib_dlaset( 'FULL', ldst, ldst, zero, zero, t, ldst ) + ! standardize existing 2-by-2 blocks. + call stdlib_dlaset( 'FULL', m, m, zero, zero, work, m ) + work( 1 ) = one + t( 1, 1 ) = one + idum = lwork - m*m - 2 + if( n2>1 ) then + call stdlib_dlagv2( a( j1, j1 ), lda, b( j1, j1 ), ldb, ar, ai, be,work( 1 ), & + work( 2 ), t( 1, 1 ), t( 2, 1 ) ) + work( m+1 ) = -work( 2 ) + work( m+2 ) = work( 1 ) + t( n2, n2 ) = t( 1, 1 ) + t( 1, 2 ) = -t( 2, 1 ) + end if + work( m*m ) = one + t( m, m ) = one + if( n1>1 ) then + call stdlib_dlagv2( a( j1+n2, j1+n2 ), lda, b( j1+n2, j1+n2 ), ldb,taur, taul, & + work( m*m+1 ), work( n2*m+n2+1 ),work( n2*m+n2+2 ), t( n2+1, n2+1 ),t( m, m-1 ) ) + + work( m*m ) = work( n2*m+n2+1 ) + work( m*m-1 ) = -work( n2*m+n2+2 ) + t( m, m ) = t( n2+1, n2+1 ) + t( m-1, m ) = -t( m, m-1 ) + end if + call stdlib_dgemm( 'T', 'N', n2, n1, n2, one, work, m, a( j1, j1+n2 ),lda, zero, & + work( m*m+1 ), n2 ) + call stdlib_dlacpy( 'FULL', n2, n1, work( m*m+1 ), n2, a( j1, j1+n2 ),lda ) + call stdlib_dgemm( 'T', 'N', n2, n1, n2, one, work, m, b( j1, j1+n2 ),ldb, zero, & + work( m*m+1 ), n2 ) + call stdlib_dlacpy( 'FULL', n2, n1, work( m*m+1 ), n2, b( j1, j1+n2 ),ldb ) + call stdlib_dgemm( 'N', 'N', m, m, m, one, li, ldst, work, m, zero,work( m*m+1 ), m & + ) + call stdlib_dlacpy( 'FULL', m, m, work( m*m+1 ), m, li, ldst ) + call stdlib_dgemm( 'N', 'N', n2, n1, n1, one, a( j1, j1+n2 ), lda,t( n2+1, n2+1 ), & + ldst, zero, work, n2 ) + call stdlib_dlacpy( 'FULL', n2, n1, work, n2, a( j1, j1+n2 ), lda ) + call stdlib_dgemm( 'N', 'N', n2, n1, n1, one, b( j1, j1+n2 ), ldb,t( n2+1, n2+1 ), & + ldst, zero, work, n2 ) + call stdlib_dlacpy( 'FULL', n2, n1, work, n2, b( j1, j1+n2 ), ldb ) + call stdlib_dgemm( 'T', 'N', m, m, m, one, ir, ldst, t, ldst, zero,work, m ) + call stdlib_dlacpy( 'FULL', m, m, work, m, ir, ldst ) + ! accumulate transformations into q and z if requested. + if( wantq ) then + call stdlib_dgemm( 'N', 'N', n, m, m, one, q( 1, j1 ), ldq, li,ldst, zero, work, & + n ) + call stdlib_dlacpy( 'FULL', n, m, work, n, q( 1, j1 ), ldq ) + end if + if( wantz ) then + call stdlib_dgemm( 'N', 'N', n, m, m, one, z( 1, j1 ), ldz, ir,ldst, zero, work, & + n ) + call stdlib_dlacpy( 'FULL', n, m, work, n, z( 1, j1 ), ldz ) + end if + ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and + ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)). + i = j1 + m + if( i<=n ) then + call stdlib_dgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,a( j1, i ), lda, zero, & + work, m ) + call stdlib_dlacpy( 'FULL', m, n-i+1, work, m, a( j1, i ), lda ) + call stdlib_dgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,b( j1, i ), ldb, zero, & + work, m ) + call stdlib_dlacpy( 'FULL', m, n-i+1, work, m, b( j1, i ), ldb ) + end if + i = j1 - 1 + if( i>0 ) then + call stdlib_dgemm( 'N', 'N', i, m, m, one, a( 1, j1 ), lda, ir,ldst, zero, work, & + i ) + call stdlib_dlacpy( 'FULL', i, m, work, i, a( 1, j1 ), lda ) + call stdlib_dgemm( 'N', 'N', i, m, m, one, b( 1, j1 ), ldb, ir,ldst, zero, work, & + i ) + call stdlib_dlacpy( 'FULL', i, m, work, i, b( 1, j1 ), ldb ) + end if + ! exit with info = 0 if swap was successfully performed. + return + end if + ! exit with info = 1 if swap was rejected. + 70 continue + info = 1 + return + end subroutine stdlib_dtgex2 + + !> DTGEXC: reorders the generalized real Schur decomposition of a real + !> matrix pair (A,B) using an orthogonal equivalence transformation + !> (A, B) = Q * (A, B) * Z**T, + !> so that the diagonal block of (A, B) with row index IFST is moved + !> to row ILST. + !> (A, B) must be in generalized real Schur canonical form (as returned + !> by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 + !> diagonal blocks. B is upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T + !> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T + + pure subroutine stdlib_dtgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(inout) :: ifst, ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldq, ldz, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: here, lwmin, nbf, nbl, nbnext + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input arguments. + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -3 + else if( ldan ) then + info = -12 + else if( ilst<1 .or. ilst>n ) then + info = -13 + end if + if( info==0 ) then + if( n<=1 ) then + lwmin = 1 + else + lwmin = 4*n + 16 + end if + work(1) = lwmin + if (lwork1 ) then + if( a( ifst, ifst-1 )/=zero )ifst = ifst - 1 + end if + nbf = 1 + if( ifst1 ) then + if( a( ilst, ilst-1 )/=zero )ilst = ilst - 1 + end if + nbl = 1 + if( ilst=3 ) then + if( a( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, & + nbnext, nbf, work, lwork,info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - nbnext + ! test if 2-by-2 block breaks into two 1-by-1 blocks. + if( nbf==2 ) then + if( a( here+1, here )==zero )nbf = 3 + end if + else + ! current block consists of two 1-by-1 blocks, each of which + ! must be swapped individually. + nbnext = 1 + if( here>=3 ) then + if( a( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, & + nbnext, 1, work, lwork,info ) + if( info/=0 ) then + ilst = here + return + end if + if( nbnext==1 ) then + ! swap two 1-by-1 blocks. + call stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, & + nbnext, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + else + ! recompute nbnext in case of 2-by-2 split. + if( a( here, here-1 )==zero )nbnext = 1 + if( nbnext==2 ) then + ! 2-by-2 block did not split. + call stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here-1,& + 2, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 2 + else + ! 2-by-2 block did split. + call stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, & + 1, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + call stdlib_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, & + 1, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + end if + end if + end if + if( here>ilst )go to 20 + end if + ilst = here + work( 1 ) = lwmin + return + end subroutine stdlib_dtgexc + + !> DTGSEN: reorders the generalized real Schur decomposition of a real + !> matrix pair (A, B) (in terms of an orthonormal equivalence trans- + !> formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the upper quasi-triangular + !> matrix A and the upper triangular B. The leading columns of Q and + !> Z form orthonormal bases of the corresponding left and right eigen- + !> spaces (deflating subspaces). (A, B) must be in generalized real + !> Schur canonical form (as returned by DGGES), i.e. A is block upper + !> triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper + !> triangular. + !> DTGSEN also computes the generalized eigenvalues + !> w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, DTGSEN computes the estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + + pure subroutine stdlib_dtgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, alphai, & + beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(out) :: pl, pr + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), dif(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, pair, swap, wantd, wantd1, wantd2, wantp + integer(ilp) :: i, ierr, ijb, k, kase, kk, ks, liwmin, lwmin, mn2, n1, n2 + real(dp) :: dscale, dsum, eps, rdscal, smlnum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! decode and test the input parameters + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ijob<0 .or. ijob>5 ) then + info = -1 + else if( n<0 ) then + info = -5 + else if( lda=4 + wantd1 = ijob==2 .or. ijob==4 + wantd2 = ijob==3 .or. ijob==5 + wantd = wantd1 .or. wantd2 + ! set m to the dimension of the specified pair of deflating + ! subspaces. + m = 0 + pair = .false. + if( .not.lquery .or. ijob/=0 ) then + do k = 1, n + if( pair ) then + pair = .false. + else + if( k0 ) then + ! swap is rejected: exit. + info = 1 + if( wantp ) then + pl = zero + pr = zero + end if + if( wantd ) then + dif( 1 ) = zero + dif( 2 ) = zero + end if + go to 60 + end if + if( pair )ks = ks + 1 + end if + end if + end do loop_30 + if( wantp ) then + ! solve generalized sylvester equation for r and l + ! and compute pl and pr. + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + call stdlib_dlacpy( 'FULL', n1, n2, a( 1, i ), lda, work, n1 ) + call stdlib_dlacpy( 'FULL', n1, n2, b( 1, i ), ldb, work( n1*n2+1 ),n1 ) + call stdlib_dtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,& + i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-2*n1*n2, & + iwork, ierr ) + ! estimate the reciprocal of norms of "projections" onto left + ! and right eigenspaces. + rdscal = zero + dsum = one + call stdlib_dlassq( n1*n2, work, 1, rdscal, dsum ) + pl = rdscal*sqrt( dsum ) + if( pl==zero ) then + pl = one + else + pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) ) + end if + rdscal = zero + dsum = one + call stdlib_dlassq( n1*n2, work( n1*n2+1 ), 1, rdscal, dsum ) + pr = rdscal*sqrt( dsum ) + if( pr==zero ) then + pr = one + else + pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) ) + end if + end if + if( wantd ) then + ! compute estimates of difu and difl. + if( wantd1 ) then + n1 = m + n2 = n - m + i = n1 + 1 + ijb = idifjb + ! frobenius norm-based difu-estimate. + call stdlib_dtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(& + i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1 ), work( 2*n1*n2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + ! frobenius norm-based difl-estimate. + call stdlib_dtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),& + ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2 ), work( 2*n1*n2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + else + ! compute 1-norm-based estimates of difu and difl using + ! reversed communication with stdlib_dlacn2. in each step a + ! generalized sylvester equation or a transposed variant + ! is solved. + kase = 0 + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + mn2 = 2*n1*n2 + ! 1-norm-based estimate of difu. + 40 continue + call stdlib_dlacn2( mn2, work( mn2+1 ), work, iwork, dif( 1 ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation. + call stdlib_dtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_dtgsyl( 'T', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 40 + end if + dif( 1 ) = dscale / dif( 1 ) + ! 1-norm-based estimate of difl. + 50 continue + call stdlib_dlacn2( mn2, work( mn2+1 ), work, iwork, dif( 2 ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation. + call stdlib_dtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_dtgsyl( 'T', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 50 + end if + dif( 2 ) = dscale / dif( 2 ) + end if + end if + 60 continue + ! compute generalized eigenvalues of reordered pair (a, b) and + ! normalize the generalized schur form. + pair = .false. + loop_80: do k = 1, n + if( pair ) then + pair = .false. + else + if( k DTGSJA: computes the generalized singular value decomposition (GSVD) + !> of two real upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine DGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), + !> where U, V and Q are orthogonal matrices. + !> R is a nonsingular upper triangular matrix, and D1 and D2 are + !> ``diagonal'' matrices, which are of the following structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the orthogonal transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + + pure subroutine stdlib_dtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobq, jobu, jobv + integer(ilp), intent(out) :: info, ncycle + integer(ilp), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p + real(dp), intent(in) :: tola, tolb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) + real(dp), intent(out) :: alpha(*), beta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + real(dp), parameter :: hugenum = huge(zero) + + + ! Local Scalars + logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv + integer(ilp) :: i, j, kcycle + real(dp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, & + ssmin + ! Intrinsic Functions + intrinsic :: abs,max,min,huge + ! Executable Statements + ! decode and test the input parameters + initu = stdlib_lsame( jobu, 'I' ) + wantu = initu .or. stdlib_lsame( jobu, 'U' ) + initv = stdlib_lsame( jobv, 'I' ) + wantv = initv .or. stdlib_lsame( jobv, 'V' ) + initq = stdlib_lsame( jobq, 'I' ) + wantq = initq .or. stdlib_lsame( jobq, 'Q' ) + info = 0 + if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -1 + else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -2 + else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( p<0 ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda=-hugenum) ) then + ! change sign if necessary + if( gamma=beta( k+i ) ) then + call stdlib_dscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) + else + call stdlib_dscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) + call stdlib_dcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + else + alpha( k+i ) = zero + beta( k+i ) = one + call stdlib_dcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + end do + ! post-assignment + do i = m + 1, k + l + alpha( i ) = zero + beta( i ) = one + end do + if( k+l DTGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B) in + !> generalized real Schur canonical form (or of any matrix pair + !> (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where + !> Z**T denotes the transpose of Z. + !> (A, B) must be in generalized real Schur form (as returned by DGGES), + !> i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal + !> blocks. B is upper triangular. + + pure subroutine stdlib_dtgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, & + dif, mm, m, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*) + real(dp), intent(out) :: dif(*), s(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: difdri = 3 + + + ! Local Scalars + logical(lk) :: lquery, pair, somcon, wantbh, wantdf, wants + integer(ilp) :: i, ierr, ifst, ilst, iz, k, ks, lwmin, n1, n2 + real(dp) :: alphai, alphar, alprqt, beta, c1, c2, cond, eps, lnrm, rnrm, root1, root2, & + scale, smlnum, tmpii, tmpir, tmpri, tmprr, uhav, uhavi, uhbv, uhbvi + ! Local Arrays + real(dp) :: dummy(1), dummy1(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantdf = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.wants .and. .not.wantdf ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lda0 ) then + ! ill-conditioned problem - swap rejected. + dif( ks ) = zero + else + ! reordering successful, solve generalized sylvester + ! equation for r and l, + ! a22 * r - l * a11 = a12 + ! b22 * r - l * b11 = b12, + ! and compute estimate of difl((a11,b11), (a22, b22)). + n1 = 1 + if( work( 2 )/=zero )n1 = 2 + n2 = n - n1 + if( n2==0 ) then + dif( ks ) = cond + else + i = n*n + 1 + iz = 2*n*n + 1 + call stdlib_dtgsyl( 'N', difdri, n2, n1, work( n*n1+n1+1 ),n, work, n, & + work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, & + dif( ks ),work( iz+1 ), lwork-2*n*n, iwork, ierr ) + if( pair )dif( ks ) = min( max( one, alprqt )*dif( ks ),cond ) + end if + end if + if( pair )dif( ks+1 ) = dif( ks ) + end if + if( pair )ks = ks + 1 + end do loop_20 + work( 1 ) = lwmin + return + end subroutine stdlib_dtgsna + + !> DTPLQT: computes a blocked LQ factorization of a real + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_dtplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, mb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, nb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( mb<1 .or. (mb>m .and. m>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_dtplqt2( ib, nb, lb, a(i,i), lda, b( i, 1 ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(i+ib:m,:) from the right + if( i+ib<=m ) then + call stdlib_dtprfb( 'R', 'N', 'F', 'R', m-i-ib+1, nb, ib, lb,b( i, 1 ), ldb, t( & + 1, i ), ldt,a( i+ib, i ), lda, b( i+ib, 1 ), ldb,work, m-i-ib+1) + end if + end do + return + end subroutine stdlib_dtplqt + + !> DTPQRT: computes a blocked QR factorization of a real + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_dtpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, nb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, mb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( nb<1 .or. (nb>n .and. n>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_dtpqrt2( mb, ib, lb, a(i,i), lda, b( 1, i ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(:,i+ib:n) from the left + if( i+ib<=n ) then + call stdlib_dtprfb( 'L', 'T', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1, i ), ldb, t( & + 1, i ), ldt,a( i, i+ib ), lda, b( 1, i+ib ), ldb,work, ib ) + end if + end do + return + end subroutine stdlib_dtpqrt + + !> DTREVC: computes some or all of the right and/or left eigenvectors of + !> a real upper quasi-triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal blocks of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the orthogonal factor that reduces a matrix + !> A to Schur form T, then Q*X and Q*Y are the matrices of right and + !> left eigenvectors of A. + + pure subroutine stdlib_dtrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, over, pair, rightv, somev + integer(ilp) :: i, ierr, ii, ip, is, j, j1, j2, jnxt, k, ki, n2 + real(dp) :: beta, bignum, emax, ovfl, rec, remax, scale, smin, smlnum, ulp, unfl, & + vcrit, vmax, wi, wr, xnorm + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Local Arrays + real(dp) :: x(2,2) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldtjnxt )cycle loop_60 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_dlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_dscal( ki, scale, work( 1+n ), 1 ) + work( j+n ) = x( 1, 1 ) + ! update right-hand side + call stdlib_daxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_dlaln2( .false., 2, 1, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+n ), n, wr, zero, x, 2,scale, xnorm, ierr ) + ! scale x(1,1) and x(2,1) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 2, 1 ) = x( 2, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_dscal( ki, scale, work( 1+n ), 1 ) + work( j-1+n ) = x( 1, 1 ) + work( j+n ) = x( 2, 1 ) + ! update right-hand side + call stdlib_daxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+n ), 1 ) + + call stdlib_daxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + end if + end do loop_60 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_dcopy( ki, work( 1+n ), 1, vr( 1, is ), 1 ) + ii = stdlib_idamax( ki, vr( 1, is ), 1 ) + remax = one / abs( vr( ii, is ) ) + call stdlib_dscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = zero + end do + else + if( ki>1 )call stdlib_dgemv( 'N', n, ki-1, one, vr, ldvr,work( 1+n ), 1, & + work( ki+n ),vr( 1, ki ), 1 ) + ii = stdlib_idamax( n, vr( 1, ki ), 1 ) + remax = one / abs( vr( ii, ki ) ) + call stdlib_dscal( n, remax, vr( 1, ki ), 1 ) + end if + else + ! complex right eigenvector. + ! initial solve + ! [ (t(ki-1,ki-1) t(ki-1,ki) ) - (wr + i* wi)]*x = 0. + ! [ (t(ki,ki-1) t(ki,ki) ) ] + if( abs( t( ki-1, ki ) )>=abs( t( ki, ki-1 ) ) ) then + work( ki-1+n ) = one + work( ki+n2 ) = wi / t( ki-1, ki ) + else + work( ki-1+n ) = -wi / t( ki, ki-1 ) + work( ki+n2 ) = one + end if + work( ki+n ) = zero + work( ki-1+n2 ) = zero + ! form right-hand side + do k = 1, ki - 2 + work( k+n ) = -work( ki-1+n )*t( k, ki-1 ) + work( k+n2 ) = -work( ki+n2 )*t( k, ki ) + end do + ! solve upper quasi-triangular system: + ! (t(1:ki-2,1:ki-2) - (wr+i*wi))*x = scale*(work+i*work2) + jnxt = ki - 2 + loop_90: do j = ki - 2, 1, -1 + if( j>jnxt )cycle loop_90 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_dlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr, wi,x, 2, scale, xnorm, ierr ) + ! scale x(1,1) and x(1,2) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 1, 2 ) = x( 1, 2 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( ki, scale, work( 1+n ), 1 ) + call stdlib_dscal( ki, scale, work( 1+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + ! update the right-hand side + call stdlib_daxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + call stdlib_daxpy( j-1, -x( 1, 2 ), t( 1, j ), 1,work( 1+n2 ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_dlaln2( .false., 2, 2, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+n ), n, wr, wi, x, 2, scale,xnorm, ierr ) + ! scale x to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + rec = one / xnorm + x( 1, 1 ) = x( 1, 1 )*rec + x( 1, 2 ) = x( 1, 2 )*rec + x( 2, 1 ) = x( 2, 1 )*rec + x( 2, 2 ) = x( 2, 2 )*rec + scale = scale*rec + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( ki, scale, work( 1+n ), 1 ) + call stdlib_dscal( ki, scale, work( 1+n2 ), 1 ) + end if + work( j-1+n ) = x( 1, 1 ) + work( j+n ) = x( 2, 1 ) + work( j-1+n2 ) = x( 1, 2 ) + work( j+n2 ) = x( 2, 2 ) + ! update the right-hand side + call stdlib_daxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+n ), 1 ) + + call stdlib_daxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + call stdlib_daxpy( j-2, -x( 1, 2 ), t( 1, j-1 ), 1,work( 1+n2 ), 1 ) + + call stdlib_daxpy( j-2, -x( 2, 2 ), t( 1, j ), 1,work( 1+n2 ), 1 ) + + end if + end do loop_90 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_dcopy( ki, work( 1+n ), 1, vr( 1, is-1 ), 1 ) + call stdlib_dcopy( ki, work( 1+n2 ), 1, vr( 1, is ), 1 ) + emax = zero + do k = 1, ki + emax = max( emax, abs( vr( k, is-1 ) )+abs( vr( k, is ) ) ) + end do + remax = one / emax + call stdlib_dscal( ki, remax, vr( 1, is-1 ), 1 ) + call stdlib_dscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is-1 ) = zero + vr( k, is ) = zero + end do + else + if( ki>2 ) then + call stdlib_dgemv( 'N', n, ki-2, one, vr, ldvr,work( 1+n ), 1, work( ki-& + 1+n ),vr( 1, ki-1 ), 1 ) + call stdlib_dgemv( 'N', n, ki-2, one, vr, ldvr,work( 1+n2 ), 1, work( & + ki+n2 ),vr( 1, ki ), 1 ) + else + call stdlib_dscal( n, work( ki-1+n ), vr( 1, ki-1 ), 1 ) + call stdlib_dscal( n, work( ki+n2 ), vr( 1, ki ), 1 ) + end if + emax = zero + do k = 1, n + emax = max( emax, abs( vr( k, ki-1 ) )+abs( vr( k, ki ) ) ) + end do + remax = one / emax + call stdlib_dscal( n, remax, vr( 1, ki-1 ), 1 ) + call stdlib_dscal( n, remax, vr( 1, ki ), 1 ) + end if + end if + is = is - 1 + if( ip/=0 )is = is - 1 + 130 continue + if( ip==1 )ip = 0 + if( ip==-1 )ip = 1 + end do loop_140 + end if + if( leftv ) then + ! compute left eigenvectors. + ip = 0 + is = 1 + loop_260: do ki = 1, n + if( ip==-1 )go to 250 + if( ki==n )go to 150 + if( t( ki+1, ki )==zero )go to 150 + ip = 1 + 150 continue + if( somev ) then + if( .not.select( ki ) )go to 250 + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki+1 ) ) )*sqrt( abs( t( ki+1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! real left eigenvector. + work( ki+n ) = one + ! form right-hand side + do k = ki + 1, n + work( k+n ) = -t( ki, k ) + end do + ! solve the quasi-triangular system: + ! (t(ki+1:n,ki+1:n) - wr)**t*x = scale*work + vmax = one + vcrit = bignum + jnxt = ki + 1 + loop_170: do j = ki + 1, n + if( jvcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work( ki+n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_ddot( j-ki-1, t( ki+1, j ), 1,work( & + ki+1+n ), 1 ) + ! solve (t(j,j)-wr)**t*x = work + call stdlib_dlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_dscal( n-ki+1, scale, work( ki+n ), 1 ) + + work( j+n ) = x( 1, 1 ) + vmax = max( abs( work( j+n ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work( ki+n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_ddot( j-ki-1, t( ki+1, j ), 1,work( & + ki+1+n ), 1 ) + work( j+1+n ) = work( j+1+n ) -stdlib_ddot( j-ki-1, t( ki+1, j+1 ), 1,& + work( ki+1+n ), 1 ) + ! solve + ! [t(j,j)-wr t(j,j+1) ]**t * x = scale*( work1 ) + ! [t(j+1,j) t(j+1,j+1)-wr] ( work2 ) + call stdlib_dlaln2( .true., 2, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_dscal( n-ki+1, scale, work( ki+n ), 1 ) + + work( j+n ) = x( 1, 1 ) + work( j+1+n ) = x( 2, 1 ) + vmax = max( abs( work( j+n ) ),abs( work( j+1+n ) ), vmax ) + vcrit = bignum / vmax + end if + end do loop_170 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + call stdlib_dcopy( n-ki+1, work( ki+n ), 1, vl( ki, is ), 1 ) + ii = stdlib_idamax( n-ki+1, vl( ki, is ), 1 ) + ki - 1 + remax = one / abs( vl( ii, is ) ) + call stdlib_dscal( n-ki+1, remax, vl( ki, is ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + end do + else + if( ki=abs( t( ki+1, ki ) ) ) then + work( ki+n ) = wi / t( ki, ki+1 ) + work( ki+1+n2 ) = one + else + work( ki+n ) = one + work( ki+1+n2 ) = -wi / t( ki+1, ki ) + end if + work( ki+1+n ) = zero + work( ki+n2 ) = zero + ! form right-hand side + do k = ki + 2, n + work( k+n ) = -work( ki+n )*t( ki, k ) + work( k+n2 ) = -work( ki+1+n2 )*t( ki+1, k ) + end do + ! solve complex quasi-triangular system: + ! ( t(ki+2,n:ki+2,n) - (wr-i*wi) )*x = work1+i*work2 + vmax = one + vcrit = bignum + jnxt = ki + 2 + loop_200: do j = ki + 2, n + if( jvcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work( ki+n ), 1 ) + call stdlib_dscal( n-ki+1, rec, work( ki+n2 ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_ddot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n ), 1 ) + work( j+n2 ) = work( j+n2 ) -stdlib_ddot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n2 ), 1 ) + ! solve (t(j,j)-(wr-i*wi))*(x11+i*x12)= wk+i*wk2 + call stdlib_dlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( n-ki+1, scale, work( ki+n ), 1 ) + call stdlib_dscal( n-ki+1, scale, work( ki+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + vmax = max( abs( work( j+n ) ),abs( work( j+n2 ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side elements. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work( ki+n ), 1 ) + call stdlib_dscal( n-ki+1, rec, work( ki+n2 ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_ddot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n ), 1 ) + work( j+n2 ) = work( j+n2 ) -stdlib_ddot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n2 ), 1 ) + work( j+1+n ) = work( j+1+n ) -stdlib_ddot( j-ki-2, t( ki+2, j+1 ), 1,& + work( ki+2+n ), 1 ) + work( j+1+n2 ) = work( j+1+n2 ) -stdlib_ddot( j-ki-2, t( ki+2, j+1 ), 1,& + work( ki+2+n2 ), 1 ) + ! solve 2-by-2 complex linear equation + ! ([t(j,j) t(j,j+1) ]**t-(wr-i*wi)*i)*x = scale*b + ! ([t(j+1,j) t(j+1,j+1)] ) + call stdlib_dlaln2( .true., 2, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( n-ki+1, scale, work( ki+n ), 1 ) + call stdlib_dscal( n-ki+1, scale, work( ki+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + work( j+1+n ) = x( 2, 1 ) + work( j+1+n2 ) = x( 2, 2 ) + vmax = max( abs( x( 1, 1 ) ), abs( x( 1, 2 ) ),abs( x( 2, 1 ) ), abs( x(& + 2, 2 ) ), vmax ) + vcrit = bignum / vmax + end if + end do loop_200 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + call stdlib_dcopy( n-ki+1, work( ki+n ), 1, vl( ki, is ), 1 ) + call stdlib_dcopy( n-ki+1, work( ki+n2 ), 1, vl( ki, is+1 ),1 ) + emax = zero + do k = ki, n + emax = max( emax, abs( vl( k, is ) )+abs( vl( k, is+1 ) ) ) + end do + remax = one / emax + call stdlib_dscal( n-ki+1, remax, vl( ki, is ), 1 ) + call stdlib_dscal( n-ki+1, remax, vl( ki, is+1 ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + vl( k, is+1 ) = zero + end do + else + if( ki DTREVC3: computes some or all of the right and/or left eigenvectors of + !> a real upper quasi-triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**T)*T = w*(y**T) + !> where y**T denotes the transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal blocks of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the orthogonal factor that reduces a matrix + !> A to Schur form T, then Q*X and Q*Y are the matrices of right and + !> left eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + + pure subroutine stdlib_dtrevc3( side, howmny, select, n, t, ldt, vl, ldvl,vr, ldvr, mm, m, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + real(dp), intent(in) :: t(ldt,*) + real(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmin = 8 + integer(ilp), parameter :: nbmax = 128 + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, lquery, over, pair, rightv, somev + integer(ilp) :: i, ierr, ii, ip, is, j, j1, j2, jnxt, k, ki, iv, maxwrk, nb, & + ki2 + real(dp) :: beta, bignum, emax, ovfl, rec, remax, scale, smin, smlnum, ulp, unfl, & + vcrit, vmax, wi, wr, xnorm + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Local Arrays + real(dp) :: x(2,2) + integer(ilp) :: iscomplex(nbmax) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + info = 0 + nb = stdlib_ilaenv( 1, 'DTREVC', side // howmny, n, -1, -1, -1 ) + maxwrk = n + 2*n*nb + work(1) = maxwrk + lquery = ( lwork==-1 ) + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt= n + 2*n*nbmin ) then + nb = (lwork - n) / (2*n) + nb = min( nb, nbmax ) + call stdlib_dlaset( 'F', n, 1+2*nb, zero, zero, work, n ) + else + nb = 1 + end if + ! set the constants to control overflow. + unfl = stdlib_dlamch( 'SAFE MINIMUM' ) + ovfl = one / unfl + call stdlib_dlabad( unfl, ovfl ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = unfl*( n / ulp ) + bignum = ( one-ulp ) / smlnum + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + work( 1 ) = zero + do j = 2, n + work( j ) = zero + do i = 1, j - 1 + work( j ) = work( j ) + abs( t( i, j ) ) + end do + end do + ! index ip is used to specify the real or complex eigenvalue: + ! ip = 0, real eigenvalue, + ! 1, first of conjugate complex pair: (wr,wi) + ! -1, second of conjugate complex pair: (wr,wi) + ! iscomplex array stores ip for each column in current block. + if( rightv ) then + ! ============================================================ + ! compute right eigenvectors. + ! iv is index of column in current block. + ! for complex right vector, uses iv-1 for real part and iv for complex part. + ! non-blocked version always uses iv=2; + ! blocked version starts with iv=nb, goes down to 1 or 2. + ! (note the "0-th" column is used for 1-norms computed above.) + iv = 2 + if( nb>2 ) then + iv = nb + end if + ip = 0 + is = m + loop_140: do ki = n, 1, -1 + if( ip==-1 ) then + ! previous iteration (ki+1) was second of conjugate pair, + ! so this ki is first of conjugate pair; skip to end of loop + ip = 1 + cycle loop_140 + else if( ki==1 ) then + ! last column, so this ki must be real eigenvalue + ip = 0 + else if( t( ki, ki-1 )==zero ) then + ! zero on sub-diagonal, so this ki is real eigenvalue + ip = 0 + else + ! non-zero on sub-diagonal, so this ki is second of conjugate pair + ip = -1 + end if + if( somev ) then + if( ip==0 ) then + if( .not.select( ki ) )cycle loop_140 + else + if( .not.select( ki-1 ) )cycle loop_140 + end if + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki-1 ) ) )*sqrt( abs( t( ki-1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! -------------------------------------------------------- + ! real right eigenvector + work( ki + iv*n ) = one + ! form right-hand side. + do k = 1, ki - 1 + work( k + iv*n ) = -t( k, ki ) + end do + ! solve upper quasi-triangular system: + ! [ t(1:ki-1,1:ki-1) - wr ]*x = scale*work. + jnxt = ki - 1 + loop_60: do j = ki - 1, 1, -1 + if( j>jnxt )cycle loop_60 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_dlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_dscal( ki, scale, work( 1+iv*n ), 1 ) + + work( j+iv*n ) = x( 1, 1 ) + ! update right-hand side + call stdlib_daxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+iv*n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_dlaln2( .false., 2, 1, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+iv*n ), n, wr, zero, x, 2,scale, xnorm, ierr ) + ! scale x(1,1) and x(2,1) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 2, 1 ) = x( 2, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_dscal( ki, scale, work( 1+iv*n ), 1 ) + + work( j-1+iv*n ) = x( 1, 1 ) + work( j +iv*n ) = x( 2, 1 ) + ! update right-hand side + call stdlib_daxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+iv*n ), 1 ) + + call stdlib_daxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+iv*n ), 1 ) + + end if + end do loop_60 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_dcopy( ki, work( 1 + iv*n ), 1, vr( 1, is ), 1 ) + ii = stdlib_idamax( ki, vr( 1, is ), 1 ) + remax = one / abs( vr( ii, is ) ) + call stdlib_dscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>1 )call stdlib_dgemv( 'N', n, ki-1, one, vr, ldvr,work( 1 + iv*n ), & + 1, work( ki + iv*n ),vr( 1, ki ), 1 ) + ii = stdlib_idamax( n, vr( 1, ki ), 1 ) + remax = one / abs( vr( ii, ki ) ) + call stdlib_dscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + iv*n ) = zero + end do + iscomplex( iv ) = ip + ! back-transform and normalization is done below + end if + else + ! -------------------------------------------------------- + ! complex right eigenvector. + ! initial solve + ! [ ( t(ki-1,ki-1) t(ki-1,ki) ) - (wr + i*wi) ]*x = 0. + ! [ ( t(ki, ki-1) t(ki, ki) ) ] + if( abs( t( ki-1, ki ) )>=abs( t( ki, ki-1 ) ) ) then + work( ki-1 + (iv-1)*n ) = one + work( ki + (iv )*n ) = wi / t( ki-1, ki ) + else + work( ki-1 + (iv-1)*n ) = -wi / t( ki, ki-1 ) + work( ki + (iv )*n ) = one + end if + work( ki + (iv-1)*n ) = zero + work( ki-1 + (iv )*n ) = zero + ! form right-hand side. + do k = 1, ki - 2 + work( k+(iv-1)*n ) = -work( ki-1+(iv-1)*n )*t(k,ki-1) + work( k+(iv )*n ) = -work( ki +(iv )*n )*t(k,ki ) + end do + ! solve upper quasi-triangular system: + ! [ t(1:ki-2,1:ki-2) - (wr+i*wi) ]*x = scale*(work+i*work2) + jnxt = ki - 2 + loop_90: do j = ki - 2, 1, -1 + if( j>jnxt )cycle loop_90 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_dlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+(iv-1)*n ), n,wr, wi, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) and x(1,2) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 1, 2 ) = x( 1, 2 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( ki, scale, work( 1+(iv-1)*n ), 1 ) + call stdlib_dscal( ki, scale, work( 1+(iv )*n ), 1 ) + end if + work( j+(iv-1)*n ) = x( 1, 1 ) + work( j+(iv )*n ) = x( 1, 2 ) + ! update the right-hand side + call stdlib_daxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+(iv-1)*n ), 1 ) + + call stdlib_daxpy( j-1, -x( 1, 2 ), t( 1, j ), 1,work( 1+(iv )*n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_dlaln2( .false., 2, 2, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+(iv-1)*n ), n, wr, wi, x, 2,scale, xnorm, ierr ) + ! scale x to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + rec = one / xnorm + x( 1, 1 ) = x( 1, 1 )*rec + x( 1, 2 ) = x( 1, 2 )*rec + x( 2, 1 ) = x( 2, 1 )*rec + x( 2, 2 ) = x( 2, 2 )*rec + scale = scale*rec + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( ki, scale, work( 1+(iv-1)*n ), 1 ) + call stdlib_dscal( ki, scale, work( 1+(iv )*n ), 1 ) + end if + work( j-1+(iv-1)*n ) = x( 1, 1 ) + work( j +(iv-1)*n ) = x( 2, 1 ) + work( j-1+(iv )*n ) = x( 1, 2 ) + work( j +(iv )*n ) = x( 2, 2 ) + ! update the right-hand side + call stdlib_daxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+(iv-1)*n ),& + 1 ) + call stdlib_daxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+(iv-1)*n ), & + 1 ) + call stdlib_daxpy( j-2, -x( 1, 2 ), t( 1, j-1 ), 1,work( 1+(iv )*n ), & + 1 ) + call stdlib_daxpy( j-2, -x( 2, 2 ), t( 1, j ), 1,work( 1+(iv )*n ), 1 ) + + end if + end do loop_90 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_dcopy( ki, work( 1+(iv-1)*n ), 1, vr(1,is-1), 1 ) + call stdlib_dcopy( ki, work( 1+(iv )*n ), 1, vr(1,is ), 1 ) + emax = zero + do k = 1, ki + emax = max( emax, abs( vr( k, is-1 ) )+abs( vr( k, is ) ) ) + end do + remax = one / emax + call stdlib_dscal( ki, remax, vr( 1, is-1 ), 1 ) + call stdlib_dscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is-1 ) = zero + vr( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>2 ) then + call stdlib_dgemv( 'N', n, ki-2, one, vr, ldvr,work( 1 + (iv-1)*n ), & + 1,work( ki-1 + (iv-1)*n ), vr(1,ki-1), 1) + call stdlib_dgemv( 'N', n, ki-2, one, vr, ldvr,work( 1 + (iv)*n ), 1,& + work( ki + (iv)*n ), vr( 1, ki ), 1 ) + else + call stdlib_dscal( n, work(ki-1+(iv-1)*n), vr(1,ki-1), 1) + call stdlib_dscal( n, work(ki +(iv )*n), vr(1,ki ), 1) + end if + emax = zero + do k = 1, n + emax = max( emax, abs( vr( k, ki-1 ) )+abs( vr( k, ki ) ) ) + end do + remax = one / emax + call stdlib_dscal( n, remax, vr( 1, ki-1 ), 1 ) + call stdlib_dscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + (iv-1)*n ) = zero + work( k + (iv )*n ) = zero + end do + iscomplex( iv-1 ) = -ip + iscomplex( iv ) = ip + iv = iv - 1 + ! back-transform and normalization is done below + end if + end if + if( nb>1 ) then + ! -------------------------------------------------------- + ! blocked version of back-transform + ! for complex case, ki2 includes both vectors (ki-1 and ki) + if( ip==0 ) then + ki2 = ki + else + ki2 = ki - 1 + end if + ! columns iv:nb of work are valid vectors. + ! when the number of vectors stored reaches nb-1 or nb, + ! or if this was last vector, do the gemm + if( (iv<=2) .or. (ki2==1) ) then + call stdlib_dgemm( 'N', 'N', n, nb-iv+1, ki2+nb-iv, one,vr, ldvr,work( 1 + & + (iv)*n ), n,zero,work( 1 + (nb+iv)*n ), n ) + ! normalize vectors + do k = iv, nb + if( iscomplex(k)==0 ) then + ! real eigenvector + ii = stdlib_idamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / abs( work( ii + (nb+k)*n ) ) + else if( iscomplex(k)==1 ) then + ! first eigenvector of conjugate pair + emax = zero + do ii = 1, n + emax = max( emax,abs( work( ii + (nb+k )*n ) )+abs( work( ii + (& + nb+k+1)*n ) ) ) + end do + remax = one / emax + ! else if iscomplex(k)==-1 + ! second eigenvector of conjugate pair + ! reuse same remax as previous k + end if + call stdlib_dscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_dlacpy( 'F', n, nb-iv+1,work( 1 + (nb+iv)*n ), n,vr( 1, ki2 ), & + ldvr ) + iv = nb + else + iv = iv - 1 + end if + end if ! blocked back-transform + is = is - 1 + if( ip/=0 )is = is - 1 + end do loop_140 + end if + if( leftv ) then + ! ============================================================ + ! compute left eigenvectors. + ! iv is index of column in current block. + ! for complex left vector, uses iv for real part and iv+1 for complex part. + ! non-blocked version always uses iv=1; + ! blocked version starts with iv=1, goes up to nb-1 or nb. + ! (note the "0-th" column is used for 1-norms computed above.) + iv = 1 + ip = 0 + is = 1 + loop_260: do ki = 1, n + if( ip==1 ) then + ! previous iteration (ki-1) was first of conjugate pair, + ! so this ki is second of conjugate pair; skip to end of loop + ip = -1 + cycle loop_260 + else if( ki==n ) then + ! last column, so this ki must be real eigenvalue + ip = 0 + else if( t( ki+1, ki )==zero ) then + ! zero on sub-diagonal, so this ki is real eigenvalue + ip = 0 + else + ! non-zero on sub-diagonal, so this ki is first of conjugate pair + ip = 1 + end if + if( somev ) then + if( .not.select( ki ) )cycle loop_260 + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki+1 ) ) )*sqrt( abs( t( ki+1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! -------------------------------------------------------- + ! real left eigenvector + work( ki + iv*n ) = one + ! form right-hand side. + do k = ki + 1, n + work( k + iv*n ) = -t( ki, k ) + end do + ! solve transposed quasi-triangular system: + ! [ t(ki+1:n,ki+1:n) - wr ]**t * x = scale*work + vmax = one + vcrit = bignum + jnxt = ki + 1 + loop_170: do j = ki + 1, n + if( jvcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work( ki+iv*n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+iv*n ) = work( j+iv*n ) -stdlib_ddot( j-ki-1, t( ki+1, j ), 1,& + work( ki+1+iv*n ), 1 ) + ! solve [ t(j,j) - wr ]**t * x = work + call stdlib_dlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_dscal( n-ki+1, scale, work( ki+iv*n ), 1 ) + + work( j+iv*n ) = x( 1, 1 ) + vmax = max( abs( work( j+iv*n ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work( ki+iv*n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+iv*n ) = work( j+iv*n ) -stdlib_ddot( j-ki-1, t( ki+1, j ), 1,& + work( ki+1+iv*n ), 1 ) + work( j+1+iv*n ) = work( j+1+iv*n ) -stdlib_ddot( j-ki-1, t( ki+1, j+1 )& + , 1,work( ki+1+iv*n ), 1 ) + ! solve + ! [ t(j,j)-wr t(j,j+1) ]**t * x = scale*( work1 ) + ! [ t(j+1,j) t(j+1,j+1)-wr ] ( work2 ) + call stdlib_dlaln2( .true., 2, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_dscal( n-ki+1, scale, work( ki+iv*n ), 1 ) + + work( j +iv*n ) = x( 1, 1 ) + work( j+1+iv*n ) = x( 2, 1 ) + vmax = max( abs( work( j +iv*n ) ),abs( work( j+1+iv*n ) ), vmax ) + + vcrit = bignum / vmax + end if + end do loop_170 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vl and normalize. + call stdlib_dcopy( n-ki+1, work( ki + iv*n ), 1,vl( ki, is ), 1 ) + ii = stdlib_idamax( n-ki+1, vl( ki, is ), 1 ) + ki - 1 + remax = one / abs( vl( ii, is ) ) + call stdlib_dscal( n-ki+1, remax, vl( ki, is ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki=abs( t( ki+1, ki ) ) ) then + work( ki + (iv )*n ) = wi / t( ki, ki+1 ) + work( ki+1 + (iv+1)*n ) = one + else + work( ki + (iv )*n ) = one + work( ki+1 + (iv+1)*n ) = -wi / t( ki+1, ki ) + end if + work( ki+1 + (iv )*n ) = zero + work( ki + (iv+1)*n ) = zero + ! form right-hand side. + do k = ki + 2, n + work( k+(iv )*n ) = -work( ki +(iv )*n )*t(ki, k) + work( k+(iv+1)*n ) = -work( ki+1+(iv+1)*n )*t(ki+1,k) + end do + ! solve transposed quasi-triangular system: + ! [ t(ki+2:n,ki+2:n)**t - (wr-i*wi) ]*x = work1+i*work2 + vmax = one + vcrit = bignum + jnxt = ki + 2 + loop_200: do j = ki + 2, n + if( jvcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work(ki+(iv )*n), 1 ) + call stdlib_dscal( n-ki+1, rec, work(ki+(iv+1)*n), 1 ) + vmax = one + vcrit = bignum + end if + work( j+(iv )*n ) = work( j+(iv)*n ) -stdlib_ddot( j-ki-2, t( ki+2, j )& + , 1,work( ki+2+(iv)*n ), 1 ) + work( j+(iv+1)*n ) = work( j+(iv+1)*n ) -stdlib_ddot( j-ki-2, t( ki+2, & + j ), 1,work( ki+2+(iv+1)*n ), 1 ) + ! solve [ t(j,j)-(wr-i*wi) ]*(x11+i*x12)= wk+i*wk2 + call stdlib_dlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( n-ki+1, scale, work(ki+(iv )*n), 1) + call stdlib_dscal( n-ki+1, scale, work(ki+(iv+1)*n), 1) + end if + work( j+(iv )*n ) = x( 1, 1 ) + work( j+(iv+1)*n ) = x( 1, 2 ) + vmax = max( abs( work( j+(iv )*n ) ),abs( work( j+(iv+1)*n ) ), vmax ) + + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side elements. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_dscal( n-ki+1, rec, work(ki+(iv )*n), 1 ) + call stdlib_dscal( n-ki+1, rec, work(ki+(iv+1)*n), 1 ) + vmax = one + vcrit = bignum + end if + work( j +(iv )*n ) = work( j+(iv)*n ) -stdlib_ddot( j-ki-2, t( ki+2, & + j ), 1,work( ki+2+(iv)*n ), 1 ) + work( j +(iv+1)*n ) = work( j+(iv+1)*n ) -stdlib_ddot( j-ki-2, t( ki+2,& + j ), 1,work( ki+2+(iv+1)*n ), 1 ) + work( j+1+(iv )*n ) = work( j+1+(iv)*n ) -stdlib_ddot( j-ki-2, t( ki+2,& + j+1 ), 1,work( ki+2+(iv)*n ), 1 ) + work( j+1+(iv+1)*n ) = work( j+1+(iv+1)*n ) -stdlib_ddot( j-ki-2, t( ki+& + 2, j+1 ), 1,work( ki+2+(iv+1)*n ), 1 ) + ! solve 2-by-2 complex linear equation + ! [ (t(j,j) t(j,j+1) )**t - (wr-i*wi)*i ]*x = scale*b + ! [ (t(j+1,j) t(j+1,j+1)) ] + call stdlib_dlaln2( .true., 2, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_dscal( n-ki+1, scale, work(ki+(iv )*n), 1) + call stdlib_dscal( n-ki+1, scale, work(ki+(iv+1)*n), 1) + end if + work( j +(iv )*n ) = x( 1, 1 ) + work( j +(iv+1)*n ) = x( 1, 2 ) + work( j+1+(iv )*n ) = x( 2, 1 ) + work( j+1+(iv+1)*n ) = x( 2, 2 ) + vmax = max( abs( x( 1, 1 ) ), abs( x( 1, 2 ) ),abs( x( 2, 1 ) ), abs( x(& + 2, 2 ) ),vmax ) + vcrit = bignum / vmax + end if + end do loop_200 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vl and normalize. + call stdlib_dcopy( n-ki+1, work( ki + (iv )*n ), 1,vl( ki, is ), 1 ) + + call stdlib_dcopy( n-ki+1, work( ki + (iv+1)*n ), 1,vl( ki, is+1 ), 1 ) + + emax = zero + do k = ki, n + emax = max( emax, abs( vl( k, is ) )+abs( vl( k, is+1 ) ) ) + end do + remax = one / emax + call stdlib_dscal( n-ki+1, remax, vl( ki, is ), 1 ) + call stdlib_dscal( n-ki+1, remax, vl( ki, is+1 ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + vl( k, is+1 ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki1 ) then + ! -------------------------------------------------------- + ! blocked version of back-transform + ! for complex case, ki2 includes both vectors (ki and ki+1) + if( ip==0 ) then + ki2 = ki + else + ki2 = ki + 1 + end if + ! columns 1:iv of work are valid vectors. + ! when the number of vectors stored reaches nb-1 or nb, + ! or if this was last vector, do the gemm + if( (iv>=nb-1) .or. (ki2==n) ) then + call stdlib_dgemm( 'N', 'N', n, iv, n-ki2+iv, one,vl( 1, ki2-iv+1 ), ldvl,& + work( ki2-iv+1 + (1)*n ), n,zero,work( 1 + (nb+1)*n ), n ) + ! normalize vectors + do k = 1, iv + if( iscomplex(k)==0) then + ! real eigenvector + ii = stdlib_idamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / abs( work( ii + (nb+k)*n ) ) + else if( iscomplex(k)==1) then + ! first eigenvector of conjugate pair + emax = zero + do ii = 1, n + emax = max( emax,abs( work( ii + (nb+k )*n ) )+abs( work( ii + (& + nb+k+1)*n ) ) ) + end do + remax = one / emax + ! else if iscomplex(k)==-1 + ! second eigenvector of conjugate pair + ! reuse same remax as previous k + end if + call stdlib_dscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_dlacpy( 'F', n, iv,work( 1 + (nb+1)*n ), n,vl( 1, ki2-iv+1 ), & + ldvl ) + iv = 1 + else + iv = iv + 1 + end if + end if ! blocked back-transform + is = is + 1 + if( ip/=0 )is = is + 1 + end do loop_260 + end if + return + end subroutine stdlib_dtrevc3 + + !> DTRSYL: solves the real Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**T, and A and B are both upper quasi- + !> triangular. A is M-by-M and B is N-by-N; the right hand side C and + !> the solution X are M-by-N; and scale is an output scale factor, set + !> <= 1 to avoid overflow in X. + !> A and B must be in Schur canonical form (as returned by DHSEQR), that + !> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; + !> each 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_dtrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trana, tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, lda, ldb, ldc, m, n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(in) :: a(lda,*), b(ldb,*) + real(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notrna, notrnb + integer(ilp) :: ierr, j, k, k1, k2, knext, l, l1, l2, lnext + real(dp) :: a11, bignum, da11, db, eps, scaloc, sgn, smin, smlnum, suml, sumr, & + xnorm + ! Local Arrays + real(dp) :: dum(1), vec(2,2), x(2,2) + ! Intrinsic Functions + intrinsic :: abs,real,max,min + ! Executable Statements + ! decode and test input parameters + notrna = stdlib_lsame( trana, 'N' ) + notrnb = stdlib_lsame( tranb, 'N' ) + info = 0 + if( .not.notrna .and. .not.stdlib_lsame( trana, 'T' ) .and. .not.stdlib_lsame( trana, & + 'C' ) ) then + info = -1 + else if( .not.notrnb .and. .not.stdlib_lsame( tranb, 'T' ) .and. .not.stdlib_lsame( & + tranb, 'C' ) ) then + info = -2 + else if( isgn/=1 .and. isgn/=-1 ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldaknext )cycle loop_50 + if( k==1 ) then + k1 = k + k2 = k + else + if( a( k, k-1 )/=zero ) then + k1 = k - 1 + k2 = k + knext = k - 2 + else + k1 = k + k2 = k + knext = k - 1 + end if + end if + if( l1==l2 .and. k1==k2 ) then + suml = stdlib_ddot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + scaloc = one + a11 = a( k1, k1 ) + sgn*b( l1, l1 ) + da11 = abs( a11 ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( vec( 1, 1 ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_ddot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_dlaln2( .false., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_ddot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_ddot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l2 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_dlaln2( .true., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_ddot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k2, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_dlasy2( .false., .false., isgn, 2, 2,a( k1, k1 ), lda, b( l1, & + l1 ), ldb, vec,2, scaloc, x, 2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_50 + end do loop_60 + else if( .not.notrna .and. notrnb ) then + ! solve a**t *x + isgn*x*b = scale*c. + ! the (k,l)th block of x is determined starting from + ! upper-left corner column by column by + ! a(k,k)**t*x(k,l) + isgn*x(k,l)*b(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 t l-1 + ! r(k,l) = sum [a(i,k)**t*x(i,l)] +isgn*sum [x(k,j)*b(j,l)] + ! i=1 j=1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = 1 + loop_120: do l = 1, n + if( lone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_dlaln2( .true., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_dlaln2( .true., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k2 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_ddot( l1-1, c( k2, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_dlasy2( .true., .false., isgn, 2, 2, a( k1, k1 ),lda, b( l1, & + l1 ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_110 + end do loop_120 + else if( .not.notrna .and. .not.notrnb ) then + ! solve a**t*x + isgn*x*b**t = scale*c. + ! the (k,l)th block of x is determined starting from + ! top-right corner column by column by + ! a(k,k)**t*x(k,l) + isgn*x(k,l)*b(l,l)**t = c(k,l) - r(k,l) + ! where + ! k-1 n + ! r(k,l) = sum [a(i,k)**t*x(i,l)] + isgn*sum [x(k,j)*b(l,j)**t]. + ! i=1 j=l+1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = n + loop_180: do l = n, 1, -1 + if( l>lnext )cycle loop_180 + if( l==1 ) then + l1 = l + l2 = l + else + if( b( l, l-1 )/=zero ) then + l1 = l - 1 + l2 = l + lnext = l - 2 + else + l1 = l + l2 = l + lnext = l - 1 + end if + end if + ! start row loop (index = k) + ! k1 (k2): row index of the first (last) row of x(k,l) + knext = 1 + loop_170: do k = 1, m + if( kone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_dlaln2( .true., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_dlaln2( .false., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( k1-1, a( 1, k2 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_dlasy2( .true., .true., isgn, 2, 2, a( k1, k1 ),lda, b( l1, l1 & + ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_170 + end do loop_180 + else if( notrna .and. .not.notrnb ) then + ! solve a*x + isgn*x*b**t = scale*c. + ! the (k,l)th block of x is determined starting from + ! bottom-right corner column by column by + ! a(k,k)*x(k,l) + isgn*x(k,l)*b(l,l)**t = c(k,l) - r(k,l) + ! where + ! m n + ! r(k,l) = sum [a(k,i)*x(i,l)] + isgn*sum [x(k,j)*b(l,j)**t]. + ! i=k+1 j=l+1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = n + loop_240: do l = n, 1, -1 + if( l>lnext )cycle loop_240 + if( l==1 ) then + l1 = l + l2 = l + else + if( b( l, l-1 )/=zero ) then + l1 = l - 1 + l2 = l + lnext = l - 2 + else + l1 = l + l2 = l + lnext = l - 1 + end if + end if + ! start row loop (index = k) + ! k1 (k2): row index of the first (last) row of x(k,l) + knext = m + loop_230: do k = m, 1, -1 + if( k>knext )cycle loop_230 + if( k==1 ) then + k1 = k + k2 = k + else + if( a( k, k-1 )/=zero ) then + k1 = k - 1 + k2 = k + knext = k - 2 + else + k1 = k + k2 = k + knext = k - 1 + end if + end if + if( l1==l2 .and. k1==k2 ) then + suml = stdlib_ddot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( n-l1, c( k1, min( l1+1, n ) ), ldc,b( l1, min( l1+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + scaloc = one + a11 = a( k1, k1 ) + sgn*b( l1, l1 ) + da11 = abs( a11 ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( vec( 1, 1 ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_ddot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_dlaln2( .false., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_ddot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_ddot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l2 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_dlaln2( .false., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_ddot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_ddot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_ddot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_dlasy2( .false., .true., isgn, 2, 2, a( k1, k1 ),lda, b( l1, & + l1 ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_dscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_230 + end do loop_240 + end if + return + end subroutine stdlib_dtrsyl + + !> DGEBRD: reduces a general real M-by-N matrix A to upper or lower + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_dgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb = max( 1, stdlib_ilaenv( 1, 'DGEBRD', ' ', m, n, -1, -1 ) ) + lwkopt = ( m+n )*nb + work( 1 ) = real( lwkopt,KIND=dp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=( m+n )*nbmin ) then + nb = lwork / ( m+n ) + else + nb = 1 + nx = minmn + end if + end if + end if + else + nx = minmn + end if + do i = 1, minmn - nx, nb + ! reduce rows and columns i:i+nb-1 to bidiagonal form and return + ! the matrices x and y which are needed to update the unreduced + ! part of the matrix + call stdlib_dlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & + taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) + ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update + ! of the form a := a - v*y**t - x*u**t + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+& + nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda ) + call stdlib_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, & + work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda ) + ! copy diagonal and off-diagonal elements of b back into a + if( m>=n ) then + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j, j+1 ) = e( j ) + end do + else + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j+1, j ) = e( j ) + end do + end if + end do + ! use unblocked code to reduce the remainder of the matrix + call stdlib_dgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & + work, iinfo ) + work( 1 ) = ws + return + end subroutine stdlib_dgebrd + + !> DGEHRD: reduces a real general matrix A to upper Hessenberg form H by + !> an orthogonal similarity transformation: Q**T * A * Q = H . + + pure subroutine stdlib_dgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iwt, j, ldwork, lwkopt, nb, nbmin, nh, nx + real(dp) :: ei + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda1 .and. nb=(n*nbmin + tsize) ) then + nb = (lwork-tsize) / n + else + nb = 1 + end if + end if + end if + end if + ldwork = n + if( nb=nh ) then + ! use unblocked code below + i = ilo + else + ! use blocked code + iwt = 1 + n*nb + do i = ilo, ihi - 1 - nx, nb + ib = min( nb, ihi-i ) + ! reduce columns i:i+ib-1 to hessenberg form, returning the + ! matrices v and t of the block reflector h = i - v*t*v**t + ! which performs the reduction, and also the matrix y = a*v*t + call stdlib_dlahr2( ihi, i, ib, a( 1, i ), lda, tau( i ),work( iwt ), ldt, work, & + ldwork ) + ! apply the block reflector h to a(1:ihi,i+ib:ihi) from the + ! right, computing a := a - y * v**t. v(i+ib,ib-1) must be set + ! to 1 + ei = a( i+ib, i+ib-1 ) + a( i+ib, i+ib-1 ) = one + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE',ihi, ihi-i-ib+1,ib, -one, work, & + ldwork, a( i+ib, i ), lda, one,a( 1, i+ib ), lda ) + a( i+ib, i+ib-1 ) = ei + ! apply the block reflector h to a(1:i,i+1:i+ib-1) from the + ! right + call stdlib_dtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE','UNIT', i, ib-1,one, a( i+1, i )& + , lda, work, ldwork ) + do j = 0, ib-2 + call stdlib_daxpy( i, -one, work( ldwork*j+1 ), 1,a( 1, i+j+1 ), 1 ) + end do + ! apply the block reflector h to a(i+1:ihi,i+ib:n) from the + ! left + call stdlib_dlarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE',ihi-i, n-i-ib+1, & + ib, a( i+1, i ), lda,work( iwt ), ldt, a( i+1, i+ib ), lda,work, ldwork ) + end do + end if + ! use unblocked code to reduce the rest of the matrix + call stdlib_dgehd2( n, i, ihi, a, lda, tau, work, iinfo ) + work( 1 ) = lwkopt + return + end subroutine stdlib_dgehrd + + !> DGELQT: computes a blocked LQ factorization of a real M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_dgelqt( m, n, mb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, mb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, iinfo, k + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( mb<1 .or. ( mb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda DGELS: solves overdetermined or underdetermined real linear systems + !> involving an M-by-N matrix A, or its transpose, using a QR or LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of + !> an underdetermined system A**T * X = B. + !> 4. If TRANS = 'T' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_dgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, tpsd + integer(ilp) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize + real(dp) :: anrm, bignum, bnrm, smlnum + ! Local Arrays + real(dp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input arguments. + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + nb = stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'DORMQR', 'LN', m, nrhs, n,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'DORMQR', 'LT', m, nrhs, n,-1 ) ) + end if + else + nb = stdlib_ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'DORMLQ', 'LT', n, nrhs, m,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'DORMLQ', 'LN', n, nrhs, m,-1 ) ) + end if + end if + wsize = max( 1, mn+max( mn, nrhs )*nb ) + work( 1 ) = real( wsize,KIND=dp) + end if + if( info/=0 ) then + call stdlib_xerbla( 'DGELS ', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( min( m, n, nrhs )==0 ) then + call stdlib_dlaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb ) + return + end if + ! get machine parameters + smlnum = stdlib_dlamch( 'S' ) / stdlib_dlamch( 'P' ) + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! scale a, b if max element outside range [smlnum,bignum] + anrm = stdlib_dlange( 'M', m, n, a, lda, rwork ) + iascl = 0 + if( anrm>zero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + go to 50 + end if + brow = m + if( tpsd )brow = n + bnrm = stdlib_dlange( 'M', brow, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if( m>=n ) then + ! compute qr factorization of a + call stdlib_dgeqrf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least n, optimally n*nb + if( .not.tpsd ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_dormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb, & + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = n + else + ! underdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_dtrtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = zero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_dormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_dgelqf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least m, optimally m*nb. + if( .not.tpsd ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_dtrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = zero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_dormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb, & + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_dormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_dtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_dlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_dlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( wsize,KIND=dp) + return + end subroutine stdlib_dgels + + !> DGEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by short wide LQ + !> factorization (DGELQ) + + pure subroutine stdlib_dgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + real(dp), intent(in) :: a(lda,*), t(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * mb + mn = m + else + lw = m * mb + mn = n + end if + if( ( nb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, nb - k ) == 0 ) then + nblcks = ( mn - k ) / ( nb - k ) + else + nblcks = ( mn - k ) / ( nb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_dgemlqt( side, trans, m, n, k, mb, a, lda,t( 6 ), mb, c, ldc, work, info & + ) + else + call stdlib_dlamswlq( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),mb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_dgemlq + + !> DGEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (DGEQR) + + pure subroutine stdlib_dgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + real(dp), intent(in) :: a(lda,*), t(*) + real(dp), intent(inout) :: c(ldc,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * nb + mn = m + else + lw = mb * nb + mn = n + end if + if( ( mb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, mb - k )==0 ) then + nblcks = ( mn - k ) / ( mb - k ) + else + nblcks = ( mn - k ) / ( mb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_dgemqrt( side, trans, m, n, k, nb, a, lda, t( 6 ),nb, c, ldc, work, info & + ) + else + call stdlib_dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),nb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_dgemqr + + !> DGEQP3: computes a QR factorization with column pivoting of a + !> matrix A: A*P = Q*R using Level 3 BLAS. + + pure subroutine stdlib_dgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: inb = 1 + integer(ilp), parameter :: inbmin = 2 + integer(ilp), parameter :: ixover = 3 + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, & + sminmn, sn, topbmn + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + ! ==================== + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 ) then + na = min( m, nfxd ) + ! cc call stdlib_dgeqr2( m, na, a, lda, tau, work, info ) + call stdlib_dgeqrf( m, na, a, lda, tau, work, lwork, info ) + iws = max( iws, int( work( 1 ),KIND=ilp) ) + if( na1 ) .and. ( nb=nbmin ) .and. ( nb DGEQRT: computes a blocked QR factorization of a real M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_dgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk), parameter :: use_recursive_qr = .true. + integer(ilp) :: i, ib, iinfo, k + + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nb<1 .or. ( nb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda DGESV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_dgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( lda DGESVJ: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + !> DGESVJ can sometimes compute tiny singular values and their singular vectors much + !> more accurately than other SVD routines, see below under Further Details. + + pure subroutine stdlib_dgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n + character, intent(in) :: joba, jobu, jobv + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), v(ldv,*), work(lwork) + real(dp), intent(out) :: sva(n) + ! ===================================================================== + ! Local Parameters + integer(ilp), parameter :: nsweep = 30 + + + ! Local Scalars + real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & + large, mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, & + temp1, theta, thsign, tol + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, goscale, lower, lsvec, noscale, rotok, rsvec, uctol, & + upper + ! Local Arrays + real(dp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sign,sqrt + ! from lapack + ! from lapack + ! Executable Statements + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) + uctol = stdlib_lsame( jobu, 'C' ) + rsvec = stdlib_lsame( jobv, 'V' ) + applv = stdlib_lsame( jobv, 'A' ) + upper = stdlib_lsame( joba, 'U' ) + lower = stdlib_lsame( joba, 'L' ) + if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then + info = -1 + else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -2 + else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -5 + else if( ldaj}|a(:,i)^t * a(:,j)|/(||a(:,i)||*||a(:,j)||) < ctol*eps + ! where eps is the round-off and ctol is defined as follows: + if( uctol ) then + ! ... user controlled + ctol = work( 1 ) + else + ! ... default + if( lsvec .or. rsvec .or. applv ) then + ctol = sqrt( real( m,KIND=dp) ) + else + ctol = real( m,KIND=dp) + end if + end if + ! ... and the machine dependent parameters are + ! [!] (make sure that stdlib_dlamch() works properly on the target machine.) + epsln = stdlib_dlamch( 'EPSILON' ) + rooteps = sqrt( epsln ) + sfmin = stdlib_dlamch( 'SAFEMINIMUM' ) + rootsfmin = sqrt( sfmin ) + small = sfmin / epsln + big = stdlib_dlamch( 'OVERFLOW' ) + ! big = one / sfmin + rootbig = one / rootsfmin + large = big / sqrt( real( m*n,KIND=dp) ) + bigtheta = one / rooteps + tol = ctol*epsln + roottol = sqrt( tol ) + if( real( m,KIND=dp)*epsln>=one ) then + info = -4 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + ! initialize the right singular vector matrix. + if( rsvec ) then + mvl = n + call stdlib_dlaset( 'A', mvl, n, zero, one, v, ldv ) + else if( applv ) then + mvl = mv + end if + rsvec = rsvec .or. applv + ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) + ! (!) if necessary, scale a to protect the largest singular value + ! from overflow. it is possible that saving the largest singular + ! value destroys the information about the small ones. + ! this initial scaling is almost minimal in the sense that the + ! goal is to make sure that no column norm overflows, and that + ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries + ! in a are detected, the procedure returns with info=-6. + skl= one / sqrt( real( m,KIND=dp)*real( n,KIND=dp) ) + noscale = .true. + goscale = .true. + if( lower ) then + ! the input matrix is m-by-n lower triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_dlassq( m-p+1, a( p, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else if( upper ) then + ! the input matrix is m-by-n upper triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_dlassq( p, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else + ! the input matrix is m-by-n general dense + do p = 1, n + aapp = zero + aaqq = one + call stdlib_dlassq( m, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + end if + if( noscale )skl= one + ! move the smaller part of the spectrum from the underflow threshold + ! (!) start by determining the position of the nonzero entries of the + ! array sva() relative to ( sfmin, big ). + aapp = zero + aaqq = big + do p = 1, n + if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) + aapp = max( aapp, sva( p ) ) + end do + ! #:) quick return for zero matrix + if( aapp==zero ) then + if( lsvec )call stdlib_dlaset( 'G', m, n, zero, one, a, lda ) + work( 1 ) = one + work( 2 ) = zero + work( 3 ) = zero + work( 4 ) = zero + work( 5 ) = zero + work( 6 ) = zero + return + end if + ! #:) quick return for one-column matrix + if( n==1 ) then + if( lsvec )call stdlib_dlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,a( 1, 1 ), lda, ierr ) + + work( 1 ) = one / skl + if( sva( 1 )>=sfmin ) then + work( 2 ) = one + else + work( 2 ) = zero + end if + work( 3 ) = zero + work( 4 ) = zero + work( 5 ) = zero + work( 6 ) = zero + return + end if + ! protect small singular values from underflow, and try to + ! avoid underflows/overflows in computing jacobi rotations. + sn = sqrt( sfmin / epsln ) + temp1 = sqrt( big / real( n,KIND=dp) ) + if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & + then + temp1 = min( big, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=dp) ) ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = max( sn / aaqq, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=dp) )*aapp ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else + temp1 = one + end if + ! scale, if necessary + if( temp1/=one ) then + call stdlib_dlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr ) + end if + skl= temp1*skl + if( skl/=one ) then + call stdlib_dlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr ) + skl= one / skl + end if + ! row-cyclic jacobi svd algorithm with column pivoting + emptsw = ( n*( n-1 ) ) / 2 + notrot = 0 + fastr( 1 ) = zero + ! a is represented in factored form a = a * diag(work), where diag(work) + ! is initialized to identity. work is updated during fast scaled + ! rotations. + do q = 1, n + work( q ) = one + end do + swband = 3 + ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective + ! if stdlib_dgesvj is used as a computational routine in the preconditioned + ! jacobi svd algorithm stdlib_dgesvj. for sweeps i=1:swband the procedure + ! works on pivots inside a band-like region around the diagonal. + ! the boundaries are determined dynamically, based on the number of + ! pivots above a threshold. + kbl = min( 8, n ) + ! [tp] kbl is a tuning parameter that defines the tile size in the + ! tiling of the p-q loops of pivot pairs. in general, an optimal + ! value of kbl depends on the matrix dimensions and on the + ! parameters of the computer's memory. + nbl = n / kbl + if( ( nbl*kbl )/=n )nbl = nbl + 1 + blskip = kbl**2 + ! [tp] blkskip is a tuning parameter that depends on swband and kbl. + rowskip = min( 5, kbl ) + ! [tp] rowskip is a tuning parameter. + lkahead = 1 + ! [tp] lkahead is a tuning parameter. + ! quasi block transformations, using the lower (upper) triangular + ! structure of the input matrix. the quasi-block-cycling usually + ! invokes cubic convergence. big part of this cycle is done inside + ! canonical subspaces of dimensions less than m. + if( ( lower .or. upper ) .and. ( n>max( 64, 4*kbl ) ) ) then + ! [tp] the number of partition levels and the actual partition are + ! tuning parameters. + n4 = n / 4 + n2 = n / 2 + n34 = 3*n4 + if( applv ) then + q = 0 + else + q = 1 + end if + if( lower ) then + ! this works very well on lower triangular matrices, in particular + ! in the framework of the preconditioned jacobi svd (xgejsv). + ! the idea is simple: + ! [+ 0 0 0] note that jacobi transformations of [0 0] + ! [+ + 0 0] [0 0] + ! [+ + x 0] actually work on [x 0] [x 0] + ! [+ + x x] [x x]. [x x] + call stdlib_dgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,work( n34+1 ), & + sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2, work( n+1 ), & + lwork-n, ierr ) + call stdlib_dgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,work( n2+1 ), sva( & + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,work( n+1 ), lwork-n, & + ierr ) + call stdlib_dgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,work( n2+1 ), sva(& + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, & + ierr ) + call stdlib_dgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,work( n4+1 ), sva( & + n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, & + ierr ) + call stdlib_dgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & + tol, 1, work( n+1 ), lwork-n,ierr ) + call stdlib_dgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, sfmin,& + tol, 1, work( n+1 ),lwork-n, ierr ) + else if( upper ) then + call stdlib_dgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & + tol, 2, work( n+1 ), lwork-n,ierr ) + call stdlib_dgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, work( n4+1 ),sva( n4+1 ), & + mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1, work( n+1 ), lwork-n,ierr ) + + call stdlib_dgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, work( n+1 ),lwork-n, ierr ) + call stdlib_dgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,work( n2+1 ), sva( n2+1 ),& + mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, ierr ) + + end if + end if + ! .. row-cyclic pivot strategy with de rijk's pivoting .. + loop_1993: do i = 1, nsweep + ! .. go go go ... + mxaapq = zero + mxsinj = zero + iswrot = 0 + notrot = 0 + pskipped = 0 + ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs + ! 1 <= p < q <= n. this is the first step toward a blocked implementation + ! of the rotations. new implementation, based on block transformations, + ! is under development. + loop_2000: do ibr = 1, nbl + igl = ( ibr-1 )*kbl + 1 + loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) + igl = igl + ir1*kbl + loop_2001: do p = igl, min( igl+kbl-1, n-1 ) + ! .. de rijk's pivoting + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + call stdlib_dswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_dswap( mvl, v( 1, p ), 1,v( 1, q ), 1 ) + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = work( p ) + work( p ) = work( q ) + work( q ) = temp1 + end if + if( ir1==0 ) then + ! column norms are periodically updated by explicit + ! norm computation. + ! caveat: + ! unfortunately, some blas implementations compute stdlib_dnrm2(m,a(1,p),1) + ! as sqrt(stdlib_ddot(m,a(1,p),1,a(1,p),1)), which may cause the result to + ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_dnrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_dnrm2 is available, the if-then-else + ! below should read "aapp = stdlib_dnrm2( m, a(1,p), 1 ) * work(p)". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_dnrm2( m, a( 1, p ), 1 )*work( p ) + else + temp1 = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp )*work( p ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_dcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp,work( p ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_ddot( m, work( n+1 ), 1,a( 1, q ), 1 )*work( & + q ) / aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_dcopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq,work( q ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_ddot( m, work( n+1 ), 1,a( 1, p ), 1 )*work( & + p ) / aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs(aqoap-apoaq)/aapq + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*work( p ) / work( q ) + fastr( 4 ) = -t*work( q ) /work( p ) + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = work( p ) / work( q ) + aqoap = work( q ) / work( p ) + if( work( p )>=one ) then + if( work( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + work( p ) = work( p )*cs + work( q ) = work( q )*cs + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + end if + else + if( work( q )>=one ) then + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_daxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + else + if( work( p )>=work( q ) )then + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_daxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_dcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp, one, m,1, work( n+1 ), & + lda,ierr ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + temp1 = -aapq*work( p ) / work( q ) + call stdlib_daxpy( m, temp1, work( n+1 ), 1,a( 1, q ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dnrm2( m, a( 1, q ), 1 )*work( q ) + + else + t = zero + aaqq = one + call stdlib_dlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*work( q ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dnrm2( m, a( 1, p ), 1 )*work( p ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*work( p ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_dcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_dlascl( 'G', 0, 0, aapp,work( p ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_ddot( m, work( n+1 ), 1,a( 1, q ), 1 )*work( & + q ) / aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_ddot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_dcopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + call stdlib_dlascl( 'G', 0, 0, aaqq,work( q ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_ddot( m, work( n+1 ), 1,a( 1, p ), 1 )*work( & + p ) / aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs(aqoap-apoaq)/aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*work( p ) / work( q ) + fastr( 4 ) = -t*work( q ) /work( p ) + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = work( p ) / work( q ) + aqoap = work( q ) / work( p ) + if( work( p )>=one ) then + if( work( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + work( p ) = work( p )*cs + work( q ) = work( q )*cs + call stdlib_drotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_drotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_daxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + end if + else + if( work( q )>=one ) then + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_daxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_daxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + else + if( work( p )>=work( q ) )then + call stdlib_daxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_daxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_daxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_daxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_daxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_daxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_daxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_daxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_dcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, aapp, one,m, 1, work( n+1 & + ), lda,ierr ) + call stdlib_dlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*work( p ) / work( q ) + call stdlib_daxpy( m, temp1, work( n+1 ),1, a( 1, q ), 1 & + ) + call stdlib_dlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_dcopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + + call stdlib_dlascl( 'G', 0, 0, aaqq, one,m, 1, work( n+1 & + ), lda,ierr ) + call stdlib_dlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*work( q ) / work( p ) + call stdlib_daxpy( m, temp1, work( n+1 ),1, a( 1, p ), 1 & + ) + call stdlib_dlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dnrm2( m, a( 1, q ), 1 )*work( q ) + + else + t = zero + aaqq = one + call stdlib_dlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*work( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dnrm2( m, a( 1, p ), 1 )*work( p ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*work( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_dnrm2( m, a( 1, n ), 1 )*work( n ) + else + t = zero + aapp = one + call stdlib_dlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*work( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the singular values and find how many are above + ! the underflow threshold. + n2 = 0 + n4 = 0 + do p = 1, n - 1 + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = work( p ) + work( p ) = work( q ) + work( q ) = temp1 + call stdlib_dswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_dswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + if( sva( p )/=zero ) then + n4 = n4 + 1 + if( sva( p )*skl>sfmin )n2 = n2 + 1 + end if + end do + if( sva( n )/=zero ) then + n4 = n4 + 1 + if( sva( n )*skl>sfmin )n2 = n2 + 1 + end if + ! normalize the left singular vectors. + if( lsvec .or. uctol ) then + do p = 1, n2 + call stdlib_dscal( m, work( p ) / sva( p ), a( 1, p ), 1 ) + end do + end if + ! scale the product of jacobi rotations (assemble the fast rotations). + if( rsvec ) then + if( applv ) then + do p = 1, n + call stdlib_dscal( mvl, work( p ), v( 1, p ), 1 ) + end do + else + do p = 1, n + temp1 = one / stdlib_dnrm2( mvl, v( 1, p ), 1 ) + call stdlib_dscal( mvl, temp1, v( 1, p ), 1 ) + end do + end if + end if + ! undo scaling, if necessary (and possible). + if( ( ( skl>one ) .and. ( sva( 1 )<( big / skl) ) ).or. ( ( skl( sfmin / skl) ) ) ) then + do p = 1, n + sva( p ) = skl*sva( p ) + end do + skl= one + end if + work( 1 ) = skl + ! the singular values of a are skl*sva(1:n). if skl/=one + ! then some of the singular values may overflow or underflow and + ! the spectrum is given in this factored representation. + work( 2 ) = real( n4,KIND=dp) + ! n4 is the number of computed nonzero singular values of a. + work( 3 ) = real( n2,KIND=dp) + ! n2 is the number of singular values of a greater than sfmin. + ! if n2 DGESVX: uses the LU factorization to compute the solution to a real + !> system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_dgesvx( fact, trans, n, nrhs, a, lda, af, ldaf, ipiv,equed, r, c, b, ldb, & + x, ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*), c(*), r(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j + real(dp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -12 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + rpvgrw = stdlib_dlantr( 'M', 'U', 'N', info, info, af, ldaf,work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_dlange( 'M', n, info, a, lda, work ) / rpvgrw + end if + work( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_dlange( norm, n, n, a, lda, work ) + rpvgrw = stdlib_dlantr( 'M', 'U', 'N', n, n, af, ldaf, work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_dlange( 'M', n, n, a, lda, work ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_dgecon( norm, n, af, ldaf, anorm, rcond, work, iwork, info ) + ! compute the solution matrix x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dgetrs( trans, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_dgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + work( 1 ) = rpvgrw + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DGGES: computes for a pair of N-by-N real nonsymmetric matrices (A,B), + !> the generalized eigenvalues, the generalized real Schur form (S,T), + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T.The + !> leading columns of VSL and VSR then form an orthonormal basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> DGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_dgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alphar, & + alphai, beta, vsl, ldvsl, vsr,ldvsr, work, lwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), & + work(*) + ! Function Arguments + procedure(stdlib_selctg_d) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, & + wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, ip, iright, irows, & + itau, iwrk, maxwrk, minwrk + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, safmax, safmin, & + smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( lda0 )then + minwrk = max( 8*n, 6*n + 16 ) + maxwrk = minwrk - n +n*stdlib_ilaenv( 1, 'DGEQRF', ' ', n, 1, n, 0 ) + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORMQR', ' ', n, 1, n, -1 & + ) ) + if( ilvsl ) then + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORGQR', ' ', n, 1, n, & + -1 ) ) + end if + else + minwrk = 1 + maxwrk = 1 + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_dlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need 6*n + 2*n space for storing balancing factors) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_dggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_dgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_dormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_dlaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_dlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_dorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_dlaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_dgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + ! perform qz algorithm, computing schur vectors if desired + ! (workspace: need n) + iwrk = itau + call stdlib_dhgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 50 + end if + ! sort eigenvalues alpha/beta if desired + ! (workspace: need 4*n+16 ) + sdim = 0 + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + call stdlib_dtgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alphar,alphai, beta, & + vsl, ldvsl, vsr, ldvsr, sdim, pvsl,pvsr, dif, work( iwrk ), lwork-iwrk+1, idum, 1,& + ierr ) + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_dggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_dggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) ) then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_dlascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_dlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 50 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_dgges + + !> DGGESX: computes for a pair of N-by-N real nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the real Schur form (S,T), and, + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T; computes + !> a reciprocal condition number for the average of the selected + !> eigenvalues (RCONDE); and computes a reciprocal condition number for + !> the right and left deflating subspaces corresponding to the selected + !> eigenvalues (RCONDV). The leading columns of VSL and VSR then form + !> an orthonormal basis for the corresponding left and right eigenspaces + !> (deflating subspaces). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or for both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_dggesx( jobvsl, jobvsr, sort, selctg, sense, n, a, lda,b, ldb, sdim, & + alphar, alphai, beta, vsl, ldvsl,vsr, ldvsr, rconde, rcondv, work, lwork, iwork,liwork, & + bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, liwork, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), rconde(2), rcondv(2), vsl(& + ldvsl,*), vsr(ldvsr,*), work(*) + ! Function Arguments + procedure(stdlib_selctg_d) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, wantsb, & + wantse, wantsn, wantst, wantsv + integer(ilp) :: i, icols, ierr, ihi, ijob, ijobvl, ijobvr, ileft, ilo, ip, iright, & + irows, itau, iwrk, liwmin, lwrk, maxwrk, minwrk + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pl, pr, safmax, safmin, & + smlnum + ! Local Arrays + real(dp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( wantsn ) then + ijob = 0 + else if( wantse ) then + ijob = 1 + else if( wantsv ) then + ijob = 2 + else if( wantsb ) then + ijob = 4 + end if + ! test the input arguments + info = 0 + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda0) then + minwrk = max( 8*n, 6*n + 16 ) + maxwrk = minwrk - n +n*stdlib_ilaenv( 1, 'DGEQRF', ' ', n, 1, n, 0 ) + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORMQR', ' ', n, 1, n, -1 & + ) ) + if( ilvsl ) then + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORGQR', ' ', n, 1, n, & + -1 ) ) + end if + lwrk = maxwrk + if( ijob>=1 )lwrk = max( lwrk, n*n/2 ) + else + minwrk = 1 + maxwrk = 1 + lwrk = 1 + end if + work( 1 ) = lwrk + if( wantsn .or. n==0 ) then + liwmin = 1 + else + liwmin = n + 6 + end if + iwork( 1 ) = liwmin + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_dlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need 6*n + 2*n for permutation parameters) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_dggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_dgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_dormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_dlaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_dlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_dorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_dlaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_dgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (workspace: need n) + iwrk = itau + call stdlib_dhgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 60 + end if + ! sort eigenvalues alpha/beta and compute the reciprocal of + ! condition number(s) + ! (workspace: if ijob >= 1, need max( 8*(n+1), 2*sdim*(n-sdim) ) + ! otherwise, need 8*(n+1) ) + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + ! reorder eigenvalues, transform generalized schur vectors, and + ! compute reciprocal condition numbers + call stdlib_dtgsen( ijob, ilvsl, ilvsr, bwork, n, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,sdim, pl, pr, dif, work( iwrk ), lwork-iwrk+1,iwork, & + liwork, ierr ) + if( ijob>=1 )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( ierr==-22 ) then + ! not enough real workspace + info = -22 + else + if( ijob==1 .or. ijob==4 ) then + rconde( 1 ) = pl + rconde( 2 ) = pr + end if + if( ijob==2 .or. ijob==4 ) then + rcondv( 1 ) = dif( 1 ) + rcondv( 2 ) = dif( 2 ) + end if + if( ierr==1 )info = n + 3 + end if + end if + ! apply permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_dggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_dggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) ) then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_dlascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_dlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 60 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwmin + return + end subroutine stdlib_dggesx + + !> DGGEV: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B . + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_dggev( jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai,beta, vl, ldvl, vr, & + ldvr, work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, itau, & + iwrk, jc, jr, maxwrk, minwrk + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_dlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ! (workspace: need 6*n) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_dggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = iwrk + iwrk = itau + irows + call stdlib_dgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_dormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_dlaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_dlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_dorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_dlaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_dgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_dgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (workspace: need n) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_dhgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 110 + end if + ! compute eigenvectors + ! (workspace: need 6*n) + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_dtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 110 + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_dggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, vl, & + ldvl, ierr ) + loop_50: do jc = 1, n + if( alphai( jc ) DGGEVX: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for + !> the eigenvalues (RCONDE), and reciprocal condition numbers for the + !> right eigenvectors (RCONDV). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j) . + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B. + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_dggevx( balanc, jobvl, jobvr, sense, n, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr, ilo,ihi, lscale, rscale, abnrm, bbnrm, rconde,rcondv, work, lwork, & + iwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + real(dp), intent(out) :: abnrm, bbnrm + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), lscale(*), rconde(*), rcondv(*)& + , rscale(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery, noscl, pair, wantsb, wantse, & + wantsn, wantsv + character :: chtemp + integer(ilp) :: i, icols, ierr, ijobvl, ijobvr, in, irows, itau, iwrk, iwrk1, j, jc, & + jr, m, maxwrk, minwrk, mm + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + noscl = stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'P' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc,'S' ) .or. & + stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) )then + info = -1 + else if( ijobvl<=0 ) then + info = -2 + else if( ijobvr<=0 ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsb .or. wantsv ) )then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_dlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute and/or balance the matrix pair (a,b) + ! (workspace: need 6*n if balanc = 's' or 'b', 1 otherwise) + call stdlib_dggbal( balanc, n, a, lda, b, ldb, ilo, ihi, lscale, rscale,work, ierr ) + + ! compute abnrm and bbnrm + abnrm = stdlib_dlange( '1', n, n, a, lda, work( 1 ) ) + if( ilascl ) then + work( 1 ) = abnrm + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, 1, 1, work( 1 ), 1,ierr ) + abnrm = work( 1 ) + end if + bbnrm = stdlib_dlange( '1', n, n, b, ldb, work( 1 ) ) + if( ilbscl ) then + work( 1 ) = bbnrm + call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, 1, 1, work( 1 ), 1,ierr ) + bbnrm = work( 1 ) + end if + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb ) + irows = ihi + 1 - ilo + if( ilv .or. .not.wantsn ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_dgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to a + ! (workspace: need n, prefer n*nb) + call stdlib_dormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl and/or vr + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_dlaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_dlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_dorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + if( ilvr )call stdlib_dlaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv .or. .not.wantsn ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_dgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_dgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (workspace: need n) + if( ilv .or. .not.wantsn ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_dhgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr, work,lwork, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 130 + end if + ! compute eigenvectors and estimate condition numbers if desired + ! (workspace: stdlib_dtgevc: need 6*n + ! stdlib_dtgsna: need 2*n*(n+2)+16 if sense = 'v' or 'b', + ! need n otherwise ) + if( ilv .or. .not.wantsn ) then + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_dtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, n,& + in, work, ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 130 + end if + end if + if( .not.wantsn ) then + ! compute eigenvectors (stdlib_dtgevc) and estimate condition + ! numbers (stdlib_dtgsna). note that the definition of the condition + ! number is not invariant under transformation (u,v) to + ! (q*u, z*v), where (u,v) are eigenvectors of the generalized + ! schur form (s,t), q and z are orthogonal matrices. in order + ! to avoid using extra 2*n*n workspace, we have to recalculate + ! eigenvectors and estimate one condition numbers at a time. + pair = .false. + loop_20: do i = 1, n + if( pair ) then + pair = .false. + cycle loop_20 + end if + mm = 1 + if( i DGGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + + pure subroutine stdlib_dggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), d(*) + real(dp), intent(out) :: work(*), x(*), y(*) + ! =================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, lopt, lwkmin, lwkopt, nb, nb1, nb2, nb3, nb4, np + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + np = min( n, p ) + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -2 + else if( p<0 .or. pm ) then + call stdlib_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', n-m, 1,b( m+1, m+p-n+1 ), & + ldb, d( m+1 ), n-m, info ) + if( info>0 ) then + info = 1 + return + end if + call stdlib_dcopy( n-m, d( m+1 ), 1, y( m+p-n+1 ), 1 ) + end if + ! set y1 = 0 + do i = 1, m + p - n + y( i ) = zero + end do + ! update d1 = d1 - t12*y2 + call stdlib_dgemv( 'NO TRANSPOSE', m, n-m, -one, b( 1, m+p-n+1 ), ldb,y( m+p-n+1 ), 1, & + one, d, 1 ) + ! solve triangular system: r11*x = d1 + if( m>0 ) then + call stdlib_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', m, 1, a, lda,d, m, info ) + + if( info>0 ) then + info = 2 + return + end if + ! copy d to x + call stdlib_dcopy( m, d, 1, x, 1 ) + end if + ! backward transformation y = z**t *y + call stdlib_dormrq( 'LEFT', 'TRANSPOSE', p, 1, np,b( max( 1, n-p+1 ), 1 ), ldb, work( & + m+1 ), y,max( 1, p ), work( m+np+1 ), lwork-m-np, info ) + work( 1 ) = m + np + max( lopt, int( work( m+np+1 ),KIND=ilp) ) + return + end subroutine stdlib_dggglm + + !> DGGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + + pure subroutine stdlib_dgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*), c(*), d(*) + real(dp), intent(out) :: work(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkmin, lwkopt, mn, nb, nb1, nb2, nb3, nb4, nr + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( p<0 .or. p>n .or. p0 ) then + call stdlib_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', p, 1,b( 1, n-p+1 ), ldb, d,& + p, info ) + if( info>0 ) then + info = 1 + return + end if + ! put the solution in x + call stdlib_dcopy( p, d, 1, x( n-p+1 ), 1 ) + ! update c1 + call stdlib_dgemv( 'NO TRANSPOSE', n-p, p, -one, a( 1, n-p+1 ), lda,d, 1, one, c, 1 & + ) + end if + ! solve r11*x1 = c1 for x1 + if( n>p ) then + call stdlib_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n-p, 1,a, lda, c, n-p, & + info ) + if( info>0 ) then + info = 2 + return + end if + ! put the solutions in x + call stdlib_dcopy( n-p, c, 1, x, 1 ) + end if + ! compute the residual vector: + if( m0 )call stdlib_dgemv( 'NO TRANSPOSE', nr, n-m, -one, a( n-p+1, m+1 ),lda, d( & + nr+1 ), 1, one, c( n-p+1 ), 1 ) + else + nr = p + end if + if( nr>0 ) then + call stdlib_dtrmv( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', nr,a( n-p+1, n-p+1 ), lda, & + d, 1 ) + call stdlib_daxpy( nr, -one, d, 1, c( n-p+1 ), 1 ) + end if + ! backward transformation x = q**t*x + call stdlib_dormrq( 'LEFT', 'TRANSPOSE', n, 1, p, b, ldb, work( 1 ), x,n, work( p+mn+1 & + ), lwork-p-mn, info ) + work( 1 ) = p + mn + max( lopt, int( work( p+mn+1 ),KIND=ilp) ) + return + end subroutine stdlib_dgglse + + !> DHSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a real upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + + subroutine stdlib_dhsein( side, eigsrc, initv, select, n, h, ldh, wr, wi,vl, ldvl, vr, ldvr, & + mm, m, work, ifaill,ifailr, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: eigsrc, initv, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldh, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + integer(ilp), intent(out) :: ifaill(*), ifailr(*) + real(dp), intent(in) :: h(ldh,*), wi(*) + real(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*), wr(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: bothv, fromqr, leftv, noinit, pair, rightv + integer(ilp) :: i, iinfo, k, kl, kln, kr, ksi, ksr, ldwork + real(dp) :: bignum, eps3, hnorm, smlnum, ulp, unfl, wki, wkr + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! decode and test the input parameters. + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + fromqr = stdlib_lsame( eigsrc, 'Q' ) + noinit = stdlib_lsame( initv, 'N' ) + ! set m to the number of columns required to store the selected + ! eigenvectors, and standardize the array select. + m = 0 + pair = .false. + do k = 1, n + if( pair ) then + pair = .false. + select( k ) = .false. + else + if( wi( k )==zero ) then + if( select( k ) )m = m + 1 + else + pair = .true. + if( select( k ) .or. select( k+1 ) ) then + select( k ) = .true. + m = m + 2 + end if + end if + end if + end do + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.fromqr .and. .not.stdlib_lsame( eigsrc, 'N' ) ) then + info = -2 + else if( .not.noinit .and. .not.stdlib_lsame( initv, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldhkr ) then + do i = k, n - 1 + if( h( i+1, i )==zero )go to 50 + end do + 50 continue + kr = i + end if + end if + if( kl/=kln ) then + kln = kl + ! compute infinity-norm of submatrix h(kl:kr,kl:kr) if it + ! has not ben computed before. + hnorm = stdlib_dlanhs( 'I', kr-kl+1, h( kl, kl ), ldh, work ) + if( stdlib_disnan( hnorm ) ) then + info = -6 + return + else if( hnorm>zero ) then + eps3 = hnorm*ulp + else + eps3 = smlnum + end if + end if + ! perturb eigenvalue if it is close to any previous + ! selected eigenvalues affiliated to the submatrix + ! h(kl:kr,kl:kr). close roots are modified by eps3. + wkr = wr( k ) + wki = wi( k ) + 60 continue + do i = k - 1, kl, -1 + if( select( i ) .and. abs( wr( i )-wkr )+abs( wi( i )-wki )0 ) then + if( pair ) then + info = info + 2 + else + info = info + 1 + end if + ifaill( ksr ) = k + ifaill( ksi ) = k + else + ifaill( ksr ) = 0 + ifaill( ksi ) = 0 + end if + do i = 1, kl - 1 + vl( i, ksr ) = zero + end do + if( pair ) then + do i = 1, kl - 1 + vl( i, ksi ) = zero + end do + end if + end if + if( rightv ) then + ! compute right eigenvector. + call stdlib_dlaein( .true., noinit, kr, h, ldh, wkr, wki,vr( 1, ksr ), vr( 1, & + ksi ), work, ldwork,work( n*n+n+1 ), eps3, smlnum, bignum,iinfo ) + if( iinfo>0 ) then + if( pair ) then + info = info + 2 + else + info = info + 1 + end if + ifailr( ksr ) = k + ifailr( ksi ) = k + else + ifailr( ksr ) = 0 + ifailr( ksi ) = 0 + end if + do i = kr + 1, n + vr( i, ksr ) = zero + end do + if( pair ) then + do i = kr + 1, n + vr( i, ksi ) = zero + end do + end if + end if + if( pair ) then + ksr = ksr + 2 + else + ksr = ksr + 1 + end if + end if + end do loop_120 + return + end subroutine stdlib_dhsein + + !> DLA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(dp) function stdlib_dla_porpvgrw( uplo, ncols, a, lda, af,ldaf, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols, lda, ldaf + ! Array Arguments + real(dp), intent(in) :: a(lda,*), af(ldaf,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: amax, umax, rpvgrw + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + ! stdlib_dpotrf will have factored only the ncolsxncols leading minor, so + ! we restrict the growth search to that minor and use only the first + ! 2*ncols workspace entries. + rpvgrw = one + do i = 1, 2*ncols + work( i ) = zero + end do + ! find the max magnitude entry of each column. + if ( upper ) then + do j = 1, ncols + do i = 1, j + work( ncols+j ) =max( abs( a( i, j ) ), work( ncols+j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( ncols+j ) =max( abs( a( i, j ) ), work( ncols+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of the factor in + ! af. no pivoting, so no permutations. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do j = 1, ncols + do i = 1, j + work( j ) = max( abs( af( i, j ) ), work( j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( j ) = max( abs( af( i, j ) ), work( j ) ) + end do + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_dla_porpvgrw = rpvgrw + end function stdlib_dla_porpvgrw + + !> DLAED3: finds the roots of the secular equation, as defined by the + !> values in D, W, and RHO, between 1 and K. It makes the + !> appropriate calls to DLAED4 and then updates the eigenvectors by + !> multiplying the matrix of eigenvectors of the pair of eigensystems + !> being combined by the matrix of eigenvectors of the K-by-K system + !> which is solved here. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_dlaed3( k, n, n1, d, q, ldq, rho, dlamda, q2, indx,ctot, w, s, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldq, n, n1 + real(dp), intent(in) :: rho + ! Array Arguments + integer(ilp), intent(in) :: ctot(*), indx(*) + real(dp), intent(out) :: d(*), q(ldq,*), s(*) + real(dp), intent(inout) :: dlamda(*), w(*) + real(dp), intent(in) :: q2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, iq2, j, n12, n2, n23 + real(dp) :: temp + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( k<0 ) then + info = -1 + else if( n DLAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense symmetric matrix + !> that has been reduced to tridiagonal form. DLAED1 handles + !> the case in which all eigenvalues and eigenvectors of a symmetric + !> tridiagonal matrix are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**Tu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED8. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by DLAED9). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_dlaed7( icompq, n, qsiz, tlvls, curlvl, curpbm, d, q,ldq, indxq, rho, & + cutpnt, qstore, qptr, prmptr,perm, givptr, givcol, givnum, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, cutpnt, icompq, ldq, n, qsiz, tlvls + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + + integer(ilp), intent(out) :: indxq(*), iwork(*) + real(dp), intent(inout) :: d(*), givnum(2,*), q(ldq,*), qstore(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: coltyp, curr, i, idlmda, indx, indxc, indxp, iq2, is, iw, iz, k, ldq2, & + n1, n2, ptr + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>1 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( icompq==1 .and. qsizcutpnt .or. n DLAEXC: swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in + !> an upper quasi-triangular matrix T by an orthogonal similarity + !> transformation. + !> T must be in Schur canonical form, that is, block upper triangular + !> with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block + !> has its diagonal elements equal and its off-diagonal elements of + !> opposite sign. + + subroutine stdlib_dlaexc( wantq, n, t, ldt, q, ldq, j1, n1, n2, work,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, ldq, ldt, n, n1, n2 + ! Array Arguments + real(dp), intent(inout) :: q(ldq,*), t(ldt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ldd = 4 + integer(ilp), parameter :: ldx = 2 + + + + ! Local Scalars + integer(ilp) :: ierr, j2, j3, j4, k, nd + real(dp) :: cs, dnorm, eps, scale, smlnum, sn, t11, t22, t33, tau, tau1, tau2, temp, & + thresh, wi1, wi2, wr1, wr2, xnorm + ! Local Arrays + real(dp) :: d(ldd,4), u(3), u1(3), u2(3), x(ldx,2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 .or. n1==0 .or. n2==0 )return + if( j1+n1>n )return + j2 = j1 + 1 + j3 = j1 + 2 + j4 = j1 + 3 + if( n1==1 .and. n2==1 ) then + ! swap two 1-by-1 blocks. + t11 = t( j1, j1 ) + t22 = t( j2, j2 ) + ! determine the transformation to perform the interchange. + call stdlib_dlartg( t( j1, j2 ), t22-t11, cs, sn, temp ) + ! apply transformation to the matrix t. + if( j3<=n )call stdlib_drot( n-j1-1, t( j1, j3 ), ldt, t( j2, j3 ), ldt, cs,sn ) + + call stdlib_drot( j1-1, t( 1, j1 ), 1, t( 1, j2 ), 1, cs, sn ) + t( j1, j1 ) = t22 + t( j2, j2 ) = t11 + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_drot( n, q( 1, j1 ), 1, q( 1, j2 ), 1, cs, sn ) + end if + else + ! swapping involves at least one 2-by-2 block. + ! copy the diagonal block of order n1+n2 to the local array d + ! and compute its norm. + nd = n1 + n2 + call stdlib_dlacpy( 'FULL', nd, nd, t( j1, j1 ), ldt, d, ldd ) + dnorm = stdlib_dlange( 'MAX', nd, nd, d, ldd, work ) + ! compute machine-dependent threshold for test for accepting + ! swap. + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + thresh = max( ten*eps*dnorm, smlnum ) + ! solve t11*x - x*t22 = scale*t12 for x. + call stdlib_dlasy2( .false., .false., -1, n1, n2, d, ldd,d( n1+1, n1+1 ), ldd, d( 1,& + n1+1 ), ldd, scale, x,ldx, xnorm, ierr ) + ! swap the adjacent diagonal blocks. + k = n1 + n1 + n2 - 3 + go to ( 10, 20, 30 )k + 10 continue + ! n1 = 1, n2 = 2: generate elementary reflector h so that: + ! ( scale, x11, x12 ) h = ( 0, 0, * ) + u( 1 ) = scale + u( 2 ) = x( 1, 1 ) + u( 3 ) = x( 1, 2 ) + call stdlib_dlarfg( 3, u( 3 ), u, 1, tau ) + u( 3 ) = one + t11 = t( j1, j1 ) + ! perform swap provisionally on diagonal block in d. + call stdlib_dlarfx( 'L', 3, 3, u, tau, d, ldd, work ) + call stdlib_dlarfx( 'R', 3, 3, u, tau, d, ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 3, 1 ) ), abs( d( 3, 2 ) ), abs( d( 3,3 )-t11 ) )>thresh )go to & + 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_dlarfx( 'L', 3, n-j1+1, u, tau, t( j1, j1 ), ldt, work ) + call stdlib_dlarfx( 'R', j2, 3, u, tau, t( 1, j1 ), ldt, work ) + t( j3, j1 ) = zero + t( j3, j2 ) = zero + t( j3, j3 ) = t11 + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_dlarfx( 'R', n, 3, u, tau, q( 1, j1 ), ldq, work ) + end if + go to 40 + 20 continue + ! n1 = 2, n2 = 1: generate elementary reflector h so that: + ! h ( -x11 ) = ( * ) + ! ( -x21 ) = ( 0 ) + ! ( scale ) = ( 0 ) + u( 1 ) = -x( 1, 1 ) + u( 2 ) = -x( 2, 1 ) + u( 3 ) = scale + call stdlib_dlarfg( 3, u( 1 ), u( 2 ), 1, tau ) + u( 1 ) = one + t33 = t( j3, j3 ) + ! perform swap provisionally on diagonal block in d. + call stdlib_dlarfx( 'L', 3, 3, u, tau, d, ldd, work ) + call stdlib_dlarfx( 'R', 3, 3, u, tau, d, ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 2, 1 ) ), abs( d( 3, 1 ) ), abs( d( 1,1 )-t33 ) )>thresh )go to & + 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_dlarfx( 'R', j3, 3, u, tau, t( 1, j1 ), ldt, work ) + call stdlib_dlarfx( 'L', 3, n-j1, u, tau, t( j1, j2 ), ldt, work ) + t( j1, j1 ) = t33 + t( j2, j1 ) = zero + t( j3, j1 ) = zero + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_dlarfx( 'R', n, 3, u, tau, q( 1, j1 ), ldq, work ) + end if + go to 40 + 30 continue + ! n1 = 2, n2 = 2: generate elementary reflectors h(1) and h(2) so + ! that: + ! h(2) h(1) ( -x11 -x12 ) = ( * * ) + ! ( -x21 -x22 ) ( 0 * ) + ! ( scale 0 ) ( 0 0 ) + ! ( 0 scale ) ( 0 0 ) + u1( 1 ) = -x( 1, 1 ) + u1( 2 ) = -x( 2, 1 ) + u1( 3 ) = scale + call stdlib_dlarfg( 3, u1( 1 ), u1( 2 ), 1, tau1 ) + u1( 1 ) = one + temp = -tau1*( x( 1, 2 )+u1( 2 )*x( 2, 2 ) ) + u2( 1 ) = -temp*u1( 2 ) - x( 2, 2 ) + u2( 2 ) = -temp*u1( 3 ) + u2( 3 ) = scale + call stdlib_dlarfg( 3, u2( 1 ), u2( 2 ), 1, tau2 ) + u2( 1 ) = one + ! perform swap provisionally on diagonal block in d. + call stdlib_dlarfx( 'L', 3, 4, u1, tau1, d, ldd, work ) + call stdlib_dlarfx( 'R', 4, 3, u1, tau1, d, ldd, work ) + call stdlib_dlarfx( 'L', 3, 4, u2, tau2, d( 2, 1 ), ldd, work ) + call stdlib_dlarfx( 'R', 4, 3, u2, tau2, d( 1, 2 ), ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 3, 1 ) ), abs( d( 3, 2 ) ), abs( d( 4, 1 ) ),abs( d( 4, 2 ) ) )& + >thresh )go to 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_dlarfx( 'L', 3, n-j1+1, u1, tau1, t( j1, j1 ), ldt, work ) + call stdlib_dlarfx( 'R', j4, 3, u1, tau1, t( 1, j1 ), ldt, work ) + call stdlib_dlarfx( 'L', 3, n-j1+1, u2, tau2, t( j2, j1 ), ldt, work ) + call stdlib_dlarfx( 'R', j4, 3, u2, tau2, t( 1, j2 ), ldt, work ) + t( j3, j1 ) = zero + t( j3, j2 ) = zero + t( j4, j1 ) = zero + t( j4, j2 ) = zero + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_dlarfx( 'R', n, 3, u1, tau1, q( 1, j1 ), ldq, work ) + call stdlib_dlarfx( 'R', n, 3, u2, tau2, q( 1, j2 ), ldq, work ) + end if + 40 continue + if( n2==2 ) then + ! standardize new 2-by-2 block t11 + call stdlib_dlanv2( t( j1, j1 ), t( j1, j2 ), t( j2, j1 ),t( j2, j2 ), wr1, wi1, & + wr2, wi2, cs, sn ) + call stdlib_drot( n-j1-1, t( j1, j1+2 ), ldt, t( j2, j1+2 ), ldt,cs, sn ) + call stdlib_drot( j1-1, t( 1, j1 ), 1, t( 1, j2 ), 1, cs, sn ) + if( wantq )call stdlib_drot( n, q( 1, j1 ), 1, q( 1, j2 ), 1, cs, sn ) + end if + if( n1==2 ) then + ! standardize new 2-by-2 block t22 + j3 = j1 + n2 + j4 = j3 + 1 + call stdlib_dlanv2( t( j3, j3 ), t( j3, j4 ), t( j4, j3 ),t( j4, j4 ), wr1, wi1, & + wr2, wi2, cs, sn ) + if( j3+2<=n )call stdlib_drot( n-j3-1, t( j3, j3+2 ), ldt, t( j4, j3+2 ),ldt, cs,& + sn ) + call stdlib_drot( j3-1, t( 1, j3 ), 1, t( 1, j4 ), 1, cs, sn ) + if( wantq )call stdlib_drot( n, q( 1, j3 ), 1, q( 1, j4 ), 1, cs, sn ) + end if + end if + return + ! exit with info = 1 if swap was rejected. + 50 continue + info = 1 + return + end subroutine stdlib_dlaexc + + !> DLAHQR: is an auxiliary routine called by DHSEQR to update the + !> eigenvalues and Schur decomposition already computed by DHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + + pure subroutine stdlib_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), z(ldz,*) + real(dp), intent(out) :: wi(*), wr(*) + ! ========================================================= + ! Parameters + real(dp), parameter :: dat1 = 3.0_dp/4.0_dp + real(dp), parameter :: dat2 = -0.4375_dp + integer(ilp), parameter :: kexsh = 10 + + + + ! Local Scalars + real(dp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, & + rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3 + integer(ilp) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl + ! Local Arrays + real(dp) :: v(3) + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + if( ilo==ihi ) then + wr( ilo ) = h( ilo, ilo ) + wi( ilo ) = zero + return + end if + ! ==== clear out the trash ==== + do j = ilo, ihi - 3 + h( j+2, j ) = zero + h( j+3, j ) = zero + end do + if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero + nh = ihi - ilo + 1 + nz = ihiz - iloz + 1 + ! set machine-dependent constants for the stopping criterion. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( nh,KIND=dp) / ulp ) + ! i1 and i2 are the indices of the first row and last column of h + ! to which transformations must be applied. if eigenvalues only are + ! being computed, i1 and i2 are set inside the main loop. + if( wantt ) then + i1 = 1 + i2 = n + end if + ! itmax is the total number of qr iterations allowed. + itmax = 30 * max( 10, nh ) + ! kdefl counts the number of iterations since a deflation + kdefl = 0 + ! the main loop begins here. i is the loop index and decreases from + ! ihi to ilo in steps of 1 or 2. each iteration of the loop works + ! with the active submatrix in rows and columns l to i. + ! eigenvalues i+1 to ihi have already converged. either l = ilo or + ! h(l,l-1) is negligible so that the matrix splits. + i = ihi + 20 continue + l = ilo + if( i=ilo )tst = tst + abs( h( k-1, k-2 ) ) + if( k+1<=ihi )tst = tst + abs( h( k+1, k ) ) + end if + ! ==== the following is a conservative small subdiagonal + ! . deflation criterion due to ahues + ! . 1997). it has better mathematical foundation and + ! . improves accuracy in some cases. ==== + if( abs( h( k, k-1 ) )<=ulp*tst ) then + ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) + ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) + aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) + bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) + s = aa + ab + if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40 + end if + end do + 40 continue + l = k + if( l>ilo ) then + ! h(l,l-1) is negligible + h( l, l-1 ) = zero + end if + ! exit from loop if a submatrix of order 1 or 2 has split off. + if( l>=i-1 )go to 150 + kdefl = kdefl + 1 + ! now the active submatrix is in rows and columns l to i. if + ! eigenvalues only are being computed, only the active submatrix + ! need be transformed. + if( .not.wantt ) then + i1 = l + i2 = i + end if + if( mod(kdefl,2*kexsh)==0 ) then + ! exceptional shift. + s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + h11 = dat1*s + h( i, i ) + h12 = dat2*s + h21 = s + h22 = h11 + else if( mod(kdefl,kexsh)==0 ) then + ! exceptional shift. + s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) ) + h11 = dat1*s + h( l, l ) + h12 = dat2*s + h21 = s + h22 = h11 + else + ! prepare to use francis' double shift + ! (i.e. 2nd degree generalized rayleigh quotient) + h11 = h( i-1, i-1 ) + h21 = h( i, i-1 ) + h12 = h( i-1, i ) + h22 = h( i, i ) + end if + s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 ) + if( s==zero ) then + rt1r = zero + rt1i = zero + rt2r = zero + rt2i = zero + else + h11 = h11 / s + h21 = h21 / s + h12 = h12 / s + h22 = h22 / s + tr = ( h11+h22 ) / two + det = ( h11-tr )*( h22-tr ) - h12*h21 + rtdisc = sqrt( abs( det ) ) + if( det>=zero ) then + ! ==== complex conjugate shifts ==== + rt1r = tr*s + rt2r = rt1r + rt1i = rtdisc*s + rt2i = -rt1i + else + ! ==== realshifts (use only one of them,KIND=dp) ==== + rt1r = tr + rtdisc + rt2r = tr - rtdisc + if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then + rt1r = rt1r*s + rt2r = rt1r + else + rt2r = rt2r*s + rt1r = rt2r + end if + rt1i = zero + rt2i = zero + end if + end if + ! look for two consecutive small subdiagonal elements. + do m = i - 2, l, -1 + ! determine the effect of starting the double-shift qr + ! iteration at row m, and see if this would make h(m,m-1) + ! negligible. (the following uses scaling to avoid + ! overflows and most underflows.) + h21s = h( m+1, m ) + s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s ) + h21s = h( m+1, m ) / s + v( 1 ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - & + rt1i*( rt2i / s ) + v( 2 ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r ) + v( 3 ) = h21s*h( m+2, m+1 ) + s = abs( v( 1 ) ) + abs( v( 2 ) ) + abs( v( 3 ) ) + v( 1 ) = v( 1 ) / s + v( 2 ) = v( 2 ) / s + v( 3 ) = v( 3 ) / s + if( m==l )go to 60 + if( abs( h( m, m-1 ) )*( abs( v( 2 ) )+abs( v( 3 ) ) )<=ulp*abs( v( 1 ) )*( abs( & + h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60 + end do + 60 continue + ! double-shift qr step + loop_130: do k = m, i - 1 + ! the first iteration of this loop determines a reflection g + ! from the vector v and applies it from left and right to h, + ! thus creating a nonzero bulge below the subdiagonal. + ! each subsequent iteration determines a reflection g to + ! restore the hessenberg form in the (k-1)th column, and thus + ! chases the bulge one step toward the bottom of the active + ! submatrix. nr is the order of g. + nr = min( 3, i-k+1 ) + if( k>m )call stdlib_dcopy( nr, h( k, k-1 ), 1, v, 1 ) + call stdlib_dlarfg( nr, v( 1 ), v( 2 ), 1, t1 ) + if( k>m ) then + h( k, k-1 ) = v( 1 ) + h( k+1, k-1 ) = zero + if( kl ) then + ! ==== use the following instead of + ! . h( k, k-1 ) = -h( k, k-1 ) to + ! . avoid a bug when v(2) and v(3) + ! . underflow. ==== + h( k, k-1 ) = h( k, k-1 )*( one-t1 ) + end if + v2 = v( 2 ) + t2 = t1*v2 + if( nr==3 ) then + v3 = v( 3 ) + t3 = t1*v3 + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) + h( k, j ) = h( k, j ) - sum*t1 + h( k+1, j ) = h( k+1, j ) - sum*t2 + h( k+2, j ) = h( k+2, j ) - sum*t3 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+3,i). + do j = i1, min( k+3, i ) + sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) + h( j, k ) = h( j, k ) - sum*t1 + h( j, k+1 ) = h( j, k+1 ) - sum*t2 + h( j, k+2 ) = h( j, k+2 ) - sum*t3 + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 ) + z( j, k ) = z( j, k ) - sum*t1 + z( j, k+1 ) = z( j, k+1 ) - sum*t2 + z( j, k+2 ) = z( j, k+2 ) - sum*t3 + end do + end if + else if( nr==2 ) then + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = h( k, j ) + v2*h( k+1, j ) + h( k, j ) = h( k, j ) - sum*t1 + h( k+1, j ) = h( k+1, j ) - sum*t2 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+3,i). + do j = i1, i + sum = h( j, k ) + v2*h( j, k+1 ) + h( j, k ) = h( j, k ) - sum*t1 + h( j, k+1 ) = h( j, k+1 ) - sum*t2 + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = z( j, k ) + v2*z( j, k+1 ) + z( j, k ) = z( j, k ) - sum*t1 + z( j, k+1 ) = z( j, k+1 ) - sum*t2 + end do + end if + end if + end do loop_130 + end do loop_140 + ! failure to converge in remaining number of iterations + info = i + return + 150 continue + if( l==i ) then + ! h(i,i-1) is negligible: one eigenvalue has converged. + wr( i ) = h( i, i ) + wi( i ) = zero + else if( l==i-1 ) then + ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged. + ! transform the 2-by-2 submatrix to standard schur form, + ! and compute and store the eigenvalues. + call stdlib_dlanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), & + wi( i-1 ), wr( i ), wi( i ),cs, sn ) + if( wantt ) then + ! apply the transformation to the rest of h. + if( i2>i )call stdlib_drot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn ) + + call stdlib_drot( i-i1-1, h( i1, i-1 ), 1, h( i1, i ), 1, cs, sn ) + end if + if( wantz ) then + ! apply the transformation to z. + call stdlib_drot( nz, z( iloz, i-1 ), 1, z( iloz, i ), 1, cs, sn ) + end if + end if + ! reset deflation counter + kdefl = 0 + ! return to start of the main loop with new value of i. + i = l - 1 + go to 20 + 160 continue + return + end subroutine stdlib_dlahqr + + !> DLASD2: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> singular values are close together or if there is a tiny entry in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + !> DLASD2 is called from DLASD1. + + pure subroutine stdlib_dlasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, & + u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, k + integer(ilp), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre + real(dp), intent(in) :: alpha, beta + ! Array Arguments + integer(ilp), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*) + integer(ilp), intent(inout) :: idxq(*) + real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) + real(dp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*) + ! ===================================================================== + + ! Local Arrays + integer(ilp) :: ctot(4), psm(4) + ! Local Scalars + integer(ilp) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 + real(dp) :: c, eps, hlftol, s, tau, tol, z1 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre/=1 ) .and. ( sqre/=0 ) ) then + info = -3 + end if + n = nl + nr + 1 + m = n + sqre + if( ldun )go to 110 + if( abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + idxp( k2 ) = j + coltyp( j ) = 4 + else + ! check if singular values are close enough to allow deflation. + if( abs( d( j )-d( jprev ) )<=tol ) then + ! deflation is possible. + s = z( jprev ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_dlapy2( c, s ) + c = c / tau + s = -s / tau + z( j ) = tau + z( jprev ) = zero + ! apply back the givens rotation to the left and right + ! singular vector matrices. + idxjp = idxq( idx( jprev )+1 ) + idxj = idxq( idx( j )+1 ) + if( idxjp<=nlp1 ) then + idxjp = idxjp - 1 + end if + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + call stdlib_drot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s ) + call stdlib_drot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt, c,s ) + if( coltyp( j )/=coltyp( jprev ) ) then + coltyp( j ) = 3 + end if + coltyp( jprev ) = 4 + k2 = k2 - 1 + idxp( k2 ) = jprev + jprev = j + else + k = k + 1 + u2( k, 1 ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + jprev = j + end if + end if + go to 100 + 110 continue + ! record the last singular value. + k = k + 1 + u2( k, 1 ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + 120 continue + ! count up the total number of the various types of columns, then + ! form a permutation which positions the four column types into + ! four groups of uniform structure (although one or more of these + ! groups may be empty). + do j = 1, 4 + ctot( j ) = 0 + end do + do j = 2, n + ct = coltyp( j ) + ctot( ct ) = ctot( ct ) + 1 + end do + ! psm(*) = position in submatrix (of types 1 through 4) + psm( 1 ) = 2 + psm( 2 ) = 2 + ctot( 1 ) + psm( 3 ) = psm( 2 ) + ctot( 2 ) + psm( 4 ) = psm( 3 ) + ctot( 3 ) + ! fill out the idxc array so that the permutation which it induces + ! will place all type-1 columns first, all type-2 columns next, + ! then all type-3's, and finally all type-4's, starting from the + ! second column. this applies similarly to the rows of vt. + do j = 2, n + jp = idxp( j ) + ct = coltyp( jp ) + idxc( psm( ct ) ) = j + psm( ct ) = psm( ct ) + 1 + end do + ! sort the singular values and corresponding singular vectors into + ! dsigma, u2, and vt2 respectively. the singular values/vectors + ! which were not deflated go into the first k slots of dsigma, u2, + ! and vt2 respectively, while those which were deflated go into the + ! last n - k slots, except that the first column/row will be treated + ! separately. + do j = 2, n + jp = idxp( j ) + dsigma( j ) = d( jp ) + idxj = idxq( idx( idxp( idxc( j ) ) )+1 ) + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + call stdlib_dcopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 ) + call stdlib_dcopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 ) + end do + ! determine dsigma(1), dsigma(2) and z(1) + dsigma( 1 ) = zero + hlftol = tol / two + if( abs( dsigma( 2 ) )<=hlftol )dsigma( 2 ) = hlftol + if( m>n ) then + z( 1 ) = stdlib_dlapy2( z1, z( m ) ) + if( z( 1 )<=tol ) then + c = one + s = zero + z( 1 ) = tol + else + c = z1 / z( 1 ) + s = z( m ) / z( 1 ) + end if + else + if( abs( z1 )<=tol ) then + z( 1 ) = tol + else + z( 1 ) = z1 + end if + end if + ! move the rest of the updating row to z. + call stdlib_dcopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 ) + ! determine the first column of u2, the first row of vt2 and the + ! last row of vt. + call stdlib_dlaset( 'A', n, 1, zero, zero, u2, ldu2 ) + u2( nlp1, 1 ) = one + if( m>n ) then + do i = 1, nlp1 + vt( m, i ) = -s*vt( nlp1, i ) + vt2( 1, i ) = c*vt( nlp1, i ) + end do + do i = nlp2, m + vt2( 1, i ) = s*vt( m, i ) + vt( m, i ) = c*vt( m, i ) + end do + else + call stdlib_dcopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 ) + end if + if( m>n ) then + call stdlib_dcopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 ) + end if + ! the deflated singular values and their corresponding vectors go + ! into the back of d, u, and v respectively. + if( n>k ) then + call stdlib_dcopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 ) + call stdlib_dlacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),ldu ) + call stdlib_dlacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1, 1 ),ldvt ) + end if + ! copy ctot into coltyp for referencing in stdlib_dlasd3. + do j = 1, 4 + coltyp( j ) = ctot( j ) + end do + return + end subroutine stdlib_dlasd2 + + !> DLASWLQ: computes a blocked Tall-Skinny LQ factorization of + !> a real M-by-N matrix A for M <= N: + !> A = ( L 0 ) * Q, + !> where: + !> Q is a n-by-N orthogonal matrix, stored on exit in an implicit + !> form in the elements above the diagonal of the array A and in + !> the elements of the array T; + !> L is a lower-triangular M-by-M matrix stored on exit in + !> the elements on and below the diagonal of the array A. + !> 0 is a M-by-(N-M) zero matrix, if M < N, and is not stored. + + pure subroutine stdlib_dlaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, lwork, ldt + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. nm .and. m>0 )) then + info = -3 + else if( nb<0 ) then + info = -4 + else if( lda=n).or.(nb<=m).or.(nb>=n)) then + call stdlib_dgelqt( m, n, mb, a, lda, t, ldt, work, info) + return + end if + kk = mod((n-m),(nb-m)) + ii=n-kk+1 + ! compute the lq factorization of the first block a(1:m,1:nb) + call stdlib_dgelqt( m, nb, mb, a(1,1), lda, t, ldt, work, info) + ctr = 1 + do i = nb+1, ii-nb+m , (nb-m) + ! compute the qr factorization of the current block a(1:m,i:i+nb-m) + call stdlib_dtplqt( m, nb-m, 0, mb, a(1,1), lda, a( 1, i ),lda, t(1, ctr * m + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(1:m,ii:n) + if (ii<=n) then + call stdlib_dtplqt( m, kk, 0, mb, a(1,1), lda, a( 1, ii ),lda, t(1, ctr * m + 1), & + ldt,work, info ) + end if + work( 1 ) = m * mb + return + end subroutine stdlib_dlaswlq + + !> DLATSQR: computes a blocked Tall-Skinny QR factorization of + !> a real M-by-N matrix A for M >= N: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix, stored on exit in an implicit + !> form in the elements below the diagonal of the array A and in + !> the elements of the array T; + !> R is an upper-triangular N-by-N matrix, stored on exit in + !> the elements on and above the diagonal of the array A. + !> 0 is a (M-N)-by-N zero matrix, and is not stored. + + pure subroutine stdlib_dlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, ldt, lwork + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. mn .and. n>0 )) then + info = -4 + else if( lda=m)) then + call stdlib_dgeqrt( m, n, nb, a, lda, t, ldt, work, info) + return + end if + kk = mod((m-n),(mb-n)) + ii=m-kk+1 + ! compute the qr factorization of the first block a(1:mb,1:n) + call stdlib_dgeqrt( mb, n, nb, a(1,1), lda, t, ldt, work, info ) + ctr = 1 + do i = mb+1, ii-mb+n , (mb-n) + ! compute the qr factorization of the current block a(i:i+mb-n,1:n) + call stdlib_dtpqrt( mb-n, n, 0, nb, a(1,1), lda, a( i, 1 ), lda,t(1, ctr * n + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(ii:m,1:n) + if (ii<=m) then + call stdlib_dtpqrt( kk, n, 0, nb, a(1,1), lda, a( ii, 1 ), lda,t(1, ctr * n + 1), & + ldt,work, info ) + end if + work( 1 ) = n*nb + return + end subroutine stdlib_dlatsqr + + !> DORGBR: generates one of the real orthogonal matrices Q or P**T + !> determined by DGEBRD when reducing a real matrix A to bidiagonal + !> form: A = Q * B * P**T. Q and P**T are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + !> is of order N: + !> if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m + !> rows of P**T, where n >= m >= k; + !> if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as + !> an N-by-N matrix. + + pure subroutine stdlib_dorgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantq + integer(ilp) :: i, iinfo, j, lwkopt, mn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + wantq = stdlib_lsame( vect, 'Q' ) + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 .or. ( wantq .and. ( n>m .or. nn .or. m=k ) then + call stdlib_dorgqr( m, n, k, a, lda, tau, work, -1, iinfo ) + else + if( m>1 ) then + call stdlib_dorgqr( m-1, m-1, m-1, a, lda, tau, work, -1,iinfo ) + end if + end if + else + if( k1 ) then + call stdlib_dorglq( n-1, n-1, n-1, a, lda, tau, work, -1,iinfo ) + end if + end if + end if + lwkopt = work( 1 ) + lwkopt = max (lwkopt, mn) + end if + if( info/=0 ) then + call stdlib_xerbla( 'DORGBR', -info ) + return + else if( lquery ) then + work( 1 ) = lwkopt + return + end if + ! quick return if possible + if( m==0 .or. n==0 ) then + work( 1 ) = 1 + return + end if + if( wantq ) then + ! form q, determined by a call to stdlib_dgebrd to reduce an m-by-k + ! matrix + if( m>=k ) then + ! if m >= k, assume m >= n >= k + call stdlib_dorgqr( m, n, k, a, lda, tau, work, lwork, iinfo ) + else + ! if m < k, assume m = n + ! shift the vectors which define the elementary reflectors one + ! column to the right, and set the first row and column of q + ! to those of the unit matrix + do j = m, 2, -1 + a( 1, j ) = zero + do i = j + 1, m + a( i, j ) = a( i, j-1 ) + end do + end do + a( 1, 1 ) = one + do i = 2, m + a( i, 1 ) = zero + end do + if( m>1 ) then + ! form q(2:m,2:m) + call stdlib_dorgqr( m-1, m-1, m-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + else + ! form p**t, determined by a call to stdlib_dgebrd to reduce a k-by-n + ! matrix + if( k= n, assume m = n + ! shift the vectors which define the elementary reflectors one + ! row downward, and set the first row and column of p**t to + ! those of the unit matrix + a( 1, 1 ) = one + do i = 2, n + a( i, 1 ) = zero + end do + do j = 2, n + do i = j - 1, 2, -1 + a( i, j ) = a( i-1, j ) + end do + a( 1, j ) = zero + end do + if( n>1 ) then + ! form p**t(2:n,2:n) + call stdlib_dorglq( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dorgbr + + !> If VECT = 'Q', DORMBR: overwrites the general real M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'T': P**T * C C * P**T + !> Here Q and P**T are the orthogonal matrices determined by DGEBRD when + !> reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + !> P**T are defined as products of elementary reflectors H(i) and G(i) + !> respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the orthogonal matrix Q or P**T that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + + pure subroutine stdlib_dormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), c(ldc,*) + real(dp), intent(in) :: tau(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: applyq, left, lquery, notran + character :: transt + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + applyq = stdlib_lsame( vect, 'Q' ) + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q or p and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( k<0 ) then + info = -6 + else if( ( applyq .and. lda=k ) then + ! q was determined by a call to stdlib_dgebrd with nq >= k + call stdlib_dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & + ) + else if( nq>1 ) then + ! q was determined by a call to stdlib_dgebrd with nq < k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_dormqr( side, trans, mi, ni, nq-1, a( 2, 1 ), lda, tau,c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + else + ! apply p + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + if( nq>k ) then + ! p was determined by a call to stdlib_dgebrd with nq > k + call stdlib_dormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & + iinfo ) + else if( nq>1 ) then + ! p was determined by a call to stdlib_dgebrd with nq <= k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_dormlq( side, transt, mi, ni, nq-1, a( 1, 2 ), lda,tau, c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_dormbr + + !> DPBSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular band matrix, and L is a lower + !> triangular band matrix, with the same number of superdiagonals or + !> subdiagonals as A. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_dpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab DPBSVX: uses the Cholesky factorization A = U**T*U or A = L*L**T to + !> compute the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_dpbsvx( fact, uplo, n, kd, nrhs, ab, ldab, afb, ldafb,equed, s, b, ldb, x, & + ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*), s(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ, upper + integer(ilp) :: i, infequ, j, j1, j2 + real(dp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + upper = stdlib_lsame( uplo, 'U' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_dlansb( '1', uplo, n, kd, ab, ldab, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_dpbcon( uplo, n, kd, afb, ldafb, anorm, rcond, work, iwork,info ) + ! compute the solution matrix x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dpbtrs( uplo, n, kd, nrhs, afb, ldafb, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_dpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DPFTRF: computes the Cholesky factorization of a real symmetric + !> positive definite matrix A. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_dpftrf( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DPFTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_dpotrf( 'L', n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_dtrsm( 'R', 'L', 'T', 'N', n2, n1, one, a( 0 ), n,a( n1 ), n ) + + call stdlib_dsyrk( 'U', 'N', n2, n1, -one, a( n1 ), n, one,a( n ), n ) + call stdlib_dpotrf( 'U', n2, a( n ), n, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_dpotrf( 'L', n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_dtrsm( 'L', 'L', 'N', 'N', n1, n2, one, a( n2 ), n,a( 0 ), n ) + + call stdlib_dsyrk( 'U', 'T', n2, n1, -one, a( 0 ), n, one,a( n1 ), n ) + call stdlib_dpotrf( 'U', n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + call stdlib_dpotrf( 'U', n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_dtrsm( 'L', 'U', 'T', 'N', n1, n2, one, a( 0 ), n1,a( n1*n1 ), n1 & + ) + call stdlib_dsyrk( 'L', 'T', n2, n1, -one, a( n1*n1 ), n1, one,a( 1 ), n1 ) + + call stdlib_dpotrf( 'L', n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + call stdlib_dpotrf( 'U', n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_dtrsm( 'R', 'U', 'N', 'N', n2, n1, one, a( n2*n2 ),n2, a( 0 ), n2 & + ) + call stdlib_dsyrk( 'L', 'N', n2, n1, -one, a( 0 ), n2, one,a( n1*n2 ), n2 ) + + call stdlib_dpotrf( 'L', n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_dpotrf( 'L', k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_dtrsm( 'R', 'L', 'T', 'N', k, k, one, a( 1 ), n+1,a( k+1 ), n+1 ) + + call stdlib_dsyrk( 'U', 'N', k, k, -one, a( k+1 ), n+1, one,a( 0 ), n+1 ) + + call stdlib_dpotrf( 'U', k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_dpotrf( 'L', k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_dtrsm( 'L', 'L', 'N', 'N', k, k, one, a( k+1 ),n+1, a( 0 ), n+1 ) + + call stdlib_dsyrk( 'U', 'T', k, k, -one, a( 0 ), n+1, one,a( k ), n+1 ) + + call stdlib_dpotrf( 'U', k, a( k ), n+1, info ) + if( info>0 )info = info + k + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_dpotrf( 'U', k, a( 0+k ), k, info ) + if( info>0 )return + call stdlib_dtrsm( 'L', 'U', 'T', 'N', k, k, one, a( k ), n1,a( k*( k+1 ) ), & + k ) + call stdlib_dsyrk( 'L', 'T', k, k, -one, a( k*( k+1 ) ), k, one,a( 0 ), k ) + + call stdlib_dpotrf( 'L', k, a( 0 ), k, info ) + if( info>0 )info = info + k + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_dpotrf( 'U', k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_dtrsm( 'R', 'U', 'N', 'N', k, k, one,a( k*( k+1 ) ), k, a( 0 ), k & + ) + call stdlib_dsyrk( 'L', 'N', k, k, -one, a( 0 ), k, one,a( k*k ), k ) + call stdlib_dpotrf( 'L', k, a( k*k ), k, info ) + if( info>0 )info = info + k + end if + end if + end if + return + end subroutine stdlib_dpftrf + + !> DPOSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**T* U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_dposv( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DPOSVX: uses the Cholesky factorization A = U**T*U or A = L*L**T to + !> compute the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_dposvx( fact, uplo, n, nrhs, a, lda, af, ldaf, equed,s, b, ldb, x, ldx, & + rcond, ferr, berr, work,iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*), s(*) + real(dp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(dp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_dlansy( '1', uplo, n, a, lda, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_dpocon( uplo, n, af, ldaf, anorm, rcond, work, iwork, info ) + ! compute the solution matrix x. + call stdlib_dlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_dpotrs( uplo, n, nrhs, af, ldaf, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_dporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x, ldx,ferr, berr, work, & + iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DTREXC: reorders the real Schur factorization of a real matrix + !> A = Q*T*Q**T, so that the diagonal block of T with row index IFST is + !> moved to row ILST. + !> The real Schur form T is reordered by an orthogonal similarity + !> transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors + !> is updated by postmultiplying it with Z. + !> T must be in Schur canonical form (as returned by DHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_dtrexc( compq, n, t, ldt, q, ldq, ifst, ilst, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq + integer(ilp), intent(inout) :: ifst, ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, ldt, n + ! Array Arguments + real(dp), intent(inout) :: q(ldq,*), t(ldt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantq + integer(ilp) :: here, nbf, nbl, nbnext + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test the input arguments. + info = 0 + wantq = stdlib_lsame( compq, 'V' ) + if( .not.wantq .and. .not.stdlib_lsame( compq, 'N' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldtn ).and.( n>0 )) then + info = -7 + else if(( ilst<1 .or. ilst>n ).and.( n>0 )) then + info = -8 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTREXC', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + ! determine the first row of specified block + ! and find out it is 1 by 1 or 2 by 2. + if( ifst>1 ) then + if( t( ifst, ifst-1 )/=zero )ifst = ifst - 1 + end if + nbf = 1 + if( ifst1 ) then + if( t( ilst, ilst-1 )/=zero )ilst = ilst - 1 + end if + nbl = 1 + if( ilst=3 ) then + if( t( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_dlaexc( wantq, n, t, ldt, q, ldq, here-nbnext, nbnext,nbf, work, & + info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - nbnext + ! test if 2 by 2 block breaks into two 1 by 1 blocks + if( nbf==2 ) then + if( t( here+1, here )==zero )nbf = 3 + end if + else + ! current block consists of two 1 by 1 blocks each of which + ! must be swapped individually + nbnext = 1 + if( here>=3 ) then + if( t( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_dlaexc( wantq, n, t, ldt, q, ldq, here-nbnext, nbnext,1, work, info ) + + if( info/=0 ) then + ilst = here + return + end if + if( nbnext==1 ) then + ! swap two 1 by 1 blocks, no problems possible + call stdlib_dlaexc( wantq, n, t, ldt, q, ldq, here, nbnext, 1,work, info ) + + here = here - 1 + else + ! recompute nbnext in case 2 by 2 split + if( t( here, here-1 )==zero )nbnext = 1 + if( nbnext==2 ) then + ! 2 by 2 block did not split + call stdlib_dlaexc( wantq, n, t, ldt, q, ldq, here-1, 2, 1,work, info ) + + if( info/=0 ) then + ilst = here + return + end if + here = here - 2 + else + ! 2 by 2 block did split + call stdlib_dlaexc( wantq, n, t, ldt, q, ldq, here, 1, 1,work, info ) + + call stdlib_dlaexc( wantq, n, t, ldt, q, ldq, here-1, 1, 1,work, info ) + + here = here - 2 + end if + end if + end if + if( here>ilst )go to 20 + end if + ilst = here + return + end subroutine stdlib_dtrexc + + !> DTRSEN: reorders the real Schur factorization of a real matrix + !> A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in + !> the leading diagonal blocks of the upper quasi-triangular matrix T, + !> and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + !> T must be in Schur canonical form (as returned by DHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_dtrsen( job, compq, select, n, t, ldt, q, ldq, wr, wi,m, s, sep, work, & + lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldq, ldt, liwork, lwork, n + real(dp), intent(out) :: s, sep + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: q(ldq,*), t(ldt,*) + real(dp), intent(out) :: wi(*), work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, pair, swap, wantbh, wantq, wants, wantsp + integer(ilp) :: ierr, k, kase, kk, ks, liwmin, lwmin, n1, n2, nn + real(dp) :: est, rnorm, scale + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + wantq = stdlib_lsame( compq, 'V' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( job, 'N' ) .and. .not.wants .and. .not.wantsp )then + info = -1 + else if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt DTRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a real upper + !> quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q + !> orthogonal). + !> T must be in Schur canonical form (as returned by DHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_dtrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm, m, & + work, ldwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, ldwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: s(*), sep(*), work(ldwork,*) + real(dp), intent(in) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: pair, somcon, wantbh, wants, wantsp + integer(ilp) :: i, ierr, ifst, ilst, j, k, kase, ks, n2, nn + real(dp) :: bignum, cond, cs, delta, dumm, eps, est, lnrm, mu, prod, prod1, prod2, & + rnrm, scale, smlnum, sn + ! Local Arrays + integer(ilp) :: isave(3) + real(dp) :: dummy(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + if( .not.wants .and. .not.wantsp ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt DGEJSV: computes the singular value decomposition (SVD) of a real M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^t, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and + !> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + !> DGEJSV can sometimes compute tiny singular values and their singular vectors much + !> more accurately than other SVD routines, see below under Further Details. + + pure subroutine stdlib_dgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & + v, ldv,work, lwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldv, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: sva(n), u(ldu,*), v(ldv,*), work(lwork) + integer(ilp), intent(out) :: iwork(*) + character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv + ! =========================================================================== + + ! Local Scalars + real(dp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & + entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc + integer(ilp) :: ierr, n1, nr, numrank, p, q, warning + logical(lk) :: almort, defr, errest, goscal, jracc, kill, lsvec, l2aber, l2kill, & + l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp + ! Intrinsic Functions + intrinsic :: abs,log,max,min,real,idnint,sign,sqrt + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + jracc = stdlib_lsame( jobv, 'J' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc + rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) + l2rank = stdlib_lsame( joba, 'R' ) + l2aber = stdlib_lsame( joba, 'A' ) + errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) + l2tran = stdlib_lsame( jobt, 'T' ) + l2kill = stdlib_lsame( jobr, 'R' ) + defr = stdlib_lsame( jobr, 'N' ) + l2pert = stdlib_lsame( jobp, 'P' ) + if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & + then + info = - 1 + else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.stdlib_lsame( jobu, 'W' )) )& + then + info = - 2 + else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.stdlib_lsame( jobv, 'W' )) & + .or. ( jracc .and. (.not.lsvec) ) ) then + info = - 3 + else if ( .not. ( l2kill .or. defr ) ) then + info = - 4 + else if ( .not. ( l2tran .or. stdlib_lsame( jobt, 'N' ) ) ) then + info = - 5 + else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = - 6 + else if ( m < 0 ) then + info = - 7 + else if ( ( n < 0 ) .or. ( n > m ) ) then + info = - 8 + else if ( lda < m ) then + info = - 10 + else if ( lsvec .and. ( ldu < m ) ) then + info = - 13 + else if ( rsvec .and. ( ldv < n ) ) then + info = - 15 + else if ( (.not.(lsvec .or. rsvec .or. errest).and.(lwork < max(7,4*n+1,2*m+n))) .or.(& + .not.(lsvec .or. rsvec) .and. errest .and.(lwork < max(7,4*n+n*n,2*m+n))) .or.(lsvec & + .and. (.not.rsvec) .and. (lwork < max(7,2*m+n,4*n+1))).or.(rsvec .and. (.not.lsvec) & + .and. (lwork < max(7,2*m+n,4*n+1))).or.(lsvec .and. rsvec .and. (.not.jracc) .and.(& + lwork big ) then + info = - 9 + call stdlib_xerbla( 'DGEJSV', -info ) + return + end if + aaqq = sqrt(aaqq) + if ( ( aapp < (big / aaqq) ) .and. noscal ) then + sva(p) = aapp * aaqq + else + noscal = .false. + sva(p) = aapp * ( aaqq * scalem ) + if ( goscal ) then + goscal = .false. + call stdlib_dscal( p-1, scalem, sva, 1 ) + end if + end if + end do + if ( noscal ) scalem = one + aapp = zero + aaqq = big + do p = 1, n + aapp = max( aapp, sva(p) ) + if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) + end do + ! quick return for zero m x n matrix + ! #:) + if ( aapp == zero ) then + if ( lsvec ) call stdlib_dlaset( 'G', m, n1, zero, one, u, ldu ) + if ( rsvec ) call stdlib_dlaset( 'G', n, n, zero, one, v, ldv ) + work(1) = one + work(2) = one + if ( errest ) work(3) = one + if ( lsvec .and. rsvec ) then + work(4) = one + work(5) = one + end if + if ( l2tran ) then + work(6) = zero + work(7) = zero + end if + iwork(1) = 0 + iwork(2) = 0 + iwork(3) = 0 + return + end if + ! issue warning if denormalized column norms detected. override the + ! high relative accuracy request. issue licence to kill columns + ! (set them to zero) whose norm is less than sigma_max / big (roughly). + ! #:( + warning = 0 + if ( aaqq <= sfmin ) then + l2rank = .true. + l2kill = .true. + warning = 1 + end if + ! quick return for one-column matrix + ! #:) + if ( n == 1 ) then + if ( lsvec ) then + call stdlib_dlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr ) + call stdlib_dlacpy( 'A', m, 1, a, lda, u, ldu ) + ! computing all m left singular vectors of the m x 1 matrix + if ( n1 /= n ) then + call stdlib_dgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr ) + call stdlib_dorgqr( m,n1,1, u,ldu,work,work(n+1),lwork-n,ierr ) + call stdlib_dcopy( m, a(1,1), 1, u(1,1), 1 ) + end if + end if + if ( rsvec ) then + v(1,1) = one + end if + if ( sva(1) < (big*scalem) ) then + sva(1) = sva(1) / scalem + scalem = one + end if + work(1) = one / scalem + work(2) = one + if ( sva(1) /= zero ) then + iwork(1) = 1 + if ( ( sva(1) / scalem) >= sfmin ) then + iwork(2) = 1 + else + iwork(2) = 0 + end if + else + iwork(1) = 0 + iwork(2) = 0 + end if + iwork(3) = 0 + if ( errest ) work(3) = one + if ( lsvec .and. rsvec ) then + work(4) = one + work(5) = one + end if + if ( l2tran ) then + work(6) = zero + work(7) = zero + end if + return + end if + transp = .false. + l2tran = l2tran .and. ( m == n ) + aatmax = -one + aatmin = big + if ( rowpiv .or. l2tran ) then + ! compute the row norms, needed to determine row pivoting sequence + ! (in the case of heavily row weighted a, row pivoting is strongly + ! advised) and to collect information needed to compare the + ! structures of a * a^t and a^t * a (in the case l2tran==.true.). + if ( l2tran ) then + do p = 1, m + xsc = zero + temp1 = one + call stdlib_dlassq( n, a(p,1), lda, xsc, temp1 ) + ! stdlib_dlassq gets both the ell_2 and the ell_infinity norm + ! in one pass through the vector + work(m+n+p) = xsc * scalem + work(n+p) = xsc * (scalem*sqrt(temp1)) + aatmax = max( aatmax, work(n+p) ) + if (work(n+p) /= zero) aatmin = min(aatmin,work(n+p)) + end do + else + do p = 1, m + work(m+n+p) = scalem*abs( a(p,stdlib_idamax(n,a(p,1),lda)) ) + aatmax = max( aatmax, work(m+n+p) ) + aatmin = min( aatmin, work(m+n+p) ) + end do + end if + end if + ! for square matrix a try to determine whether a^t would be better + ! input for the preconditioned jacobi svd, with faster convergence. + ! the decision is based on an o(n) function of the vector of column + ! and row norms of a, based on the shannon entropy. this should give + ! the right choice in most cases when the difference actually matters. + ! it may fail and pick the slower converging side. + entra = zero + entrat = zero + if ( l2tran ) then + xsc = zero + temp1 = one + call stdlib_dlassq( n, sva, 1, xsc, temp1 ) + temp1 = one / temp1 + entra = zero + do p = 1, n + big1 = ( ( sva(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entra = entra + big1 * log(big1) + end do + entra = - entra / log(real(n,KIND=dp)) + ! now, sva().^2/trace(a^t * a) is a point in the probability simplex. + ! it is derived from the diagonal of a^t * a. do the same with the + ! diagonal of a * a^t, compute the entropy of the corresponding + ! probability distribution. note that a * a^t and a^t * a have the + ! same trace. + entrat = zero + do p = n+1, n+m + big1 = ( ( work(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entrat = entrat + big1 * log(big1) + end do + entrat = - entrat / log(real(m,KIND=dp)) + ! analyze the entropies and decide a or a^t. smaller entropy + ! usually means better input for the algorithm. + transp = ( entrat < entra ) + ! if a^t is better than a, transpose a. + if ( transp ) then + ! in an optimal implementation, this trivial transpose + ! should be replaced with faster transpose. + do p = 1, n - 1 + do q = p + 1, n + temp1 = a(q,p) + a(q,p) = a(p,q) + a(p,q) = temp1 + end do + end do + do p = 1, n + work(m+n+p) = sva(p) + sva(p) = work(n+p) + end do + temp1 = aapp + aapp = aatmax + aatmax = temp1 + temp1 = aaqq + aaqq = aatmin + aatmin = temp1 + kill = lsvec + lsvec = rsvec + rsvec = kill + if ( lsvec ) n1 = n + rowpiv = .true. + end if + end if + ! end if l2tran + ! scale the matrix so that its maximal singular value remains less + ! than sqrt(big) -- the matrix is scaled so that its maximal column + ! has euclidean norm equal to sqrt(big/n). the only reason to keep + ! sqrt(big) instead of big is the fact that stdlib_dgejsv uses lapack and + ! blas routines that, in some implementations, are not capable of + ! working in the full interval [sfmin,big] and that they may provoke + ! overflows in the intermediate results. if the singular values spread + ! from sfmin to big, then stdlib_dgesvj will compute them. so, in that case, + ! one should use stdlib_dgesvj instead of stdlib_dgejsv. + big1 = sqrt( big ) + temp1 = sqrt( big / real(n,KIND=dp) ) + call stdlib_dlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr ) + if ( aaqq > (aapp * sfmin) ) then + aaqq = ( aaqq / aapp ) * temp1 + else + aaqq = ( aaqq * temp1 ) / aapp + end if + temp1 = temp1 * scalem + call stdlib_dlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr ) + ! to undo scaling at the end of this procedure, multiply the + ! computed singular values with uscal2 / uscal1. + uscal1 = temp1 + uscal2 = aapp + if ( l2kill ) then + ! l2kill enforces computation of nonzero singular values in + ! the restricted range of condition number of the initial a, + ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). + xsc = sqrt( sfmin ) + else + xsc = small + ! now, if the condition number of a is too big, + ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, + ! as a precaution measure, the full svd is computed using stdlib_dgesvj + ! with accumulated jacobi rotations. this provides numerically + ! more robust computation, at the cost of slightly increased run + ! time. depending on the concrete implementation of blas and lapack + ! (i.e. how they behave in presence of extreme ill-conditioning) the + ! implementor may decide to remove this switch. + if ( ( aaqq= (temp1*abs(a(1,1))) ) then + nr = nr + 1 + else + go to 3002 + end if + end do + 3002 continue + else if ( l2rank ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r1 is used as the criterion for + ! close-to-rank-deficient. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & + l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! the goal is high relative accuracy. however, if the matrix + ! has high scaled condition number the relative accuracy is in + ! general not feasible. later on, a condition number estimator + ! will be deployed to estimate the scaled condition number. + ! here we just remove the underflowed part of the triangular + ! factor. this prevents the situation in which the code is + ! working hard to get the accuracy not warranted by the data. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to & + 3302 + nr = nr + 1 + end do + 3302 continue + end if + almort = .false. + if ( nr == n ) then + maxprj = one + do p = 2, n + temp1 = abs(a(p,p)) / sva(iwork(p)) + maxprj = min( maxprj, temp1 ) + end do + if ( maxprj**2 >= one - real(n,KIND=dp)*epsln ) almort = .true. + end if + sconda = - one + condr1 = - one + condr2 = - one + if ( errest ) then + if ( n == nr ) then + if ( rsvec ) then + ! V Is Available As Workspace + call stdlib_dlacpy( 'U', n, n, a, lda, v, ldv ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_dscal( p, one/temp1, v(1,p), 1 ) + end do + call stdlib_dpocon( 'U', n, v, ldv, one, temp1,work(n+1), iwork(2*n+m+1), & + ierr ) + else if ( lsvec ) then + ! U Is Available As Workspace + call stdlib_dlacpy( 'U', n, n, a, lda, u, ldu ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_dscal( p, one/temp1, u(1,p), 1 ) + end do + call stdlib_dpocon( 'U', n, u, ldu, one, temp1,work(n+1), iwork(2*n+m+1), & + ierr ) + else + call stdlib_dlacpy( 'U', n, n, a, lda, work(n+1), n ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_dscal( p, one/temp1, work(n+(p-1)*n+1), 1 ) + end do + ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths + call stdlib_dpocon( 'U', n, work(n+1), n, one, temp1,work(n+n*n+1), iwork(2*n+& + m+1), ierr ) + end if + sconda = one / sqrt(temp1) + ! sconda is an estimate of sqrt(||(r^t * r)^(-1)||_1). + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + else + sconda = - one + end if + end if + l2pert = l2pert .and. ( abs( a(1,1)/a(nr,nr) ) > sqrt(big1) ) + ! if there is no violent scaling, artificial perturbation is not needed. + ! phase 3: + if ( .not. ( rsvec .or. lsvec ) ) then + ! singular values only + ! .. transpose a(1:nr,1:n) + do p = 1, min( n-1, nr ) + call stdlib_dcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 ) + end do + ! the following two do-loops introduce small relative perturbation + ! into the strict upper triangle of the lower triangular matrix. + ! small entries below the main diagonal are also changed. + ! this modification is useful if the computing environment does not + ! provide/allow flush to zero underflow, for it prevents many + ! annoying denormalized numbers in case of strongly scaled matrices. + ! the perturbation is structured so that it does not introduce any + ! new perturbation of the singular values, and it does not destroy + ! the job done by the preconditioner. + ! the licence for this perturbation is in the variable l2pert, which + ! should be .false. if flush to zero underflow is active. + if ( .not. almort ) then + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=dp) + do q = 1, nr + temp1 = xsc*abs(a(q,q)) + do p = 1, n + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & + temp1, a(p,q) ) + end do + end do + else + call stdlib_dlaset( 'U', nr-1,nr-1, zero,zero, a(1,2),lda ) + end if + ! Second Preconditioning Using The Qr Factorization + call stdlib_dgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr ) + ! And Transpose Upper To Lower Triangular + do p = 1, nr - 1 + call stdlib_dcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 ) + end do + end if + ! row-cyclic jacobi svd algorithm with column pivoting + ! .. again some perturbation (a "background noise") is added + ! to drown denormals + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=dp) + do q = 1, nr + temp1 = xsc*abs(a(q,q)) + do p = 1, nr + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & + temp1, a(p,q) ) + end do + end do + else + call stdlib_dlaset( 'U', nr-1, nr-1, zero, zero, a(1,2), lda ) + end if + ! .. and one-sided jacobi rotations are started on a lower + ! triangular matrix (plus perturbation which is ignored in + ! the part which destroys triangular form (confusing?!)) + call stdlib_dgesvj( 'L', 'NOU', 'NOV', nr, nr, a, lda, sva,n, v, ldv, work, & + lwork, info ) + scalem = work(1) + numrank = nint(work(2),KIND=ilp) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! -> singular values and right singular vectors <- + if ( almort ) then + ! In This Case Nr Equals N + do p = 1, nr + call stdlib_dcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + call stdlib_dlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_dgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,work, lwork, info ) + + scalem = work(1) + numrank = nint(work(2),KIND=ilp) + else + ! .. two more qr factorizations ( one qrf is not enough, two require + ! accumulated product of jacobi rotations, three are perfect ) + call stdlib_dlaset( 'LOWER', nr-1, nr-1, zero, zero, a(2,1), lda ) + call stdlib_dgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr) + call stdlib_dlacpy( 'LOWER', nr, nr, a, lda, v, ldv ) + call stdlib_dlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_dgeqrf( nr, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr + call stdlib_dcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 ) + end do + call stdlib_dlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_dgesvj( 'LOWER', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, work(n+1), & + lwork, info ) + scalem = work(n+1) + numrank = nint(work(n+2),KIND=ilp) + if ( nr < n ) then + call stdlib_dlaset( 'A',n-nr, nr, zero,zero, v(nr+1,1), ldv ) + call stdlib_dlaset( 'A',nr, n-nr, zero,zero, v(1,nr+1), ldv ) + call stdlib_dlaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv ) + end if + call stdlib_dormlq( 'LEFT', 'TRANSPOSE', n, n, nr, a, lda, work,v, ldv, work(n+1), & + lwork-n, ierr ) + end if + do p = 1, n + call stdlib_dcopy( n, v(p,1), ldv, a(iwork(p),1), lda ) + end do + call stdlib_dlacpy( 'ALL', n, n, a, lda, v, ldv ) + if ( transp ) then + call stdlib_dlacpy( 'ALL', n, n, v, ldv, u, ldu ) + end if + else if ( lsvec .and. ( .not. rsvec ) ) then + ! Singular Values And Left Singular Vectors + ! Second Preconditioning Step To Avoid Need To Accumulate + ! jacobi rotations in the jacobi iterations. + do p = 1, nr + call stdlib_dcopy( n-p+1, a(p,p), lda, u(p,p), 1 ) + end do + call stdlib_dlaset( 'UPPER', nr-1, nr-1, zero, zero, u(1,2), ldu ) + call stdlib_dgeqrf( n, nr, u, ldu, work(n+1), work(2*n+1),lwork-2*n, ierr ) + do p = 1, nr - 1 + call stdlib_dcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 ) + end do + call stdlib_dlaset( 'UPPER', nr-1, nr-1, zero, zero, u(1,2), ldu ) + call stdlib_dgesvj( 'LOWER', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, work(n+1), & + lwork-n, info ) + scalem = work(n+1) + numrank = nint(work(n+2),KIND=ilp) + if ( nr < m ) then + call stdlib_dlaset( 'A', m-nr, nr,zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_dlaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1), ldu ) + call stdlib_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + call stdlib_dormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + if ( rowpiv )call stdlib_dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + do p = 1, n1 + xsc = one / stdlib_dnrm2( m, u(1,p), 1 ) + call stdlib_dscal( m, xsc, u(1,p), 1 ) + end do + if ( transp ) then + call stdlib_dlacpy( 'ALL', n, n, u, ldu, v, ldv ) + end if + else + ! Full Svd + if ( .not. jracc ) then + if ( .not. almort ) then + ! second preconditioning step (qrf [with pivoting]) + ! note that the composition of transpose, qrf and transpose is + ! equivalent to an lqf call. since in many libraries the qrf + ! seems to be better optimized than the lqf, we do explicit + ! transpose and use the qrf. this is subject to changes in an + ! optimized implementation of stdlib_dgejsv. + do p = 1, nr + call stdlib_dcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + ! The Following Two Loops Perturb Small Entries To Avoid + ! denormals in the second qr factorization, where they are + ! as good as zeros. this is done to avoid painfully slow + ! computation with denormals. the relative size of the perturbation + ! is a parameter that can be changed by the implementer. + ! this perturbation device will be obsolete on machines with + ! properly implemented arithmetic. + ! to switch it off, set l2pert=.false. to remove it from the + ! code, remove the action under l2pert=.true., leave the else part. + ! the following two loops should be blocked and fused with the + ! transposed copy above. + if ( l2pert ) then + xsc = sqrt(small) + do q = 1, nr + temp1 = xsc*abs( v(q,q) ) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + sign( temp1, v(p,q) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_dlaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv ) + end if + ! estimate the row scaled condition number of r1 + ! (if r1 is rectangular, n > nr, then the condition number + ! of the leading nr x nr submatrix is estimated.) + call stdlib_dlacpy( 'L', nr, nr, v, ldv, work(2*n+1), nr ) + do p = 1, nr + temp1 = stdlib_dnrm2(nr-p+1,work(2*n+(p-1)*nr+p),1) + call stdlib_dscal(nr-p+1,one/temp1,work(2*n+(p-1)*nr+p),1) + end do + call stdlib_dpocon('LOWER',nr,work(2*n+1),nr,one,temp1,work(2*n+nr*nr+1),iwork(m+& + 2*n+1),ierr) + condr1 = one / sqrt(temp1) + ! Here Need A Second Opinion On The Condition Number + ! Then Assume Worst Case Scenario + ! r1 is ok for inverse <=> condr1 < real(n,KIND=dp) + ! more conservative <=> condr1 < sqrt(real(n,KIND=dp)) + cond_ok = sqrt(real(nr,KIND=dp)) + ! [tp] cond_ok is a tuning parameter. + if ( condr1 < cond_ok ) then + ! .. the second qrf without pivoting. note: in an optimized + ! implementation, this qrf should be implemented as the qrf + ! of a lower triangular matrix. + ! r1^t = q2 * r2 + call stdlib_dgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + + if ( l2pert ) then + xsc = sqrt(small)/epsln + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) + end do + end do + end if + if ( nr /= n )call stdlib_dlacpy( 'A', n, nr, v, ldv, work(2*n+1), n ) + ! .. save ... + ! This Transposed Copy Should Be Better Than Naive + do p = 1, nr - 1 + call stdlib_dcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 ) + end do + condr2 = condr1 + else + ! .. ill-conditioned case: second qrf with pivoting + ! note that windowed pivoting would be equally good + ! numerically, and more run-time efficient. so, in + ! an optimal implementation, the next call to stdlib_dgeqp3 + ! should be replaced with eg. call sgeqpx (acm toms #782) + ! with properly (carefully) chosen parameters. + ! r1^t * p2 = q2 * r2 + do p = 1, nr + iwork(n+p) = 0 + end do + call stdlib_dgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),work(2*n+1), lwork-& + 2*n, ierr ) + ! * call stdlib_dgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1), + ! * $ lwork-2*n, ierr ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) + end do + end do + end if + call stdlib_dlacpy( 'A', n, nr, v, ldv, work(2*n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + v(p,q) = - sign( temp1, v(q,p) ) + end do + end do + else + call stdlib_dlaset( 'L',nr-1,nr-1,zero,zero,v(2,1),ldv ) + end if + ! now, compute r2 = l3 * q3, the lq factorization. + call stdlib_dgelqf( nr, nr, v, ldv, work(2*n+n*nr+1),work(2*n+n*nr+nr+1), & + lwork-2*n-n*nr-nr, ierr ) + ! And Estimate The Condition Number + call stdlib_dlacpy( 'L',nr,nr,v,ldv,work(2*n+n*nr+nr+1),nr ) + do p = 1, nr + temp1 = stdlib_dnrm2( p, work(2*n+n*nr+nr+p), nr ) + call stdlib_dscal( p, one/temp1, work(2*n+n*nr+nr+p), nr ) + end do + call stdlib_dpocon( 'L',nr,work(2*n+n*nr+nr+1),nr,one,temp1,work(2*n+n*nr+nr+& + nr*nr+1),iwork(m+2*n+1),ierr ) + condr2 = one / sqrt(temp1) + if ( condr2 >= cond_ok ) then + ! Save The Householder Vectors Used For Q3 + ! (this overwrites the copy of r2, as it will not be + ! needed in this branch, but it does not overwritte the + ! huseholder vectors of q2.). + call stdlib_dlacpy( 'U', nr, nr, v, ldv, work(2*n+1), n ) + ! And The Rest Of The Information On Q3 Is In + ! work(2*n+n*nr+1:2*n+n*nr+n) + end if + end if + if ( l2pert ) then + xsc = sqrt(small) + do q = 2, nr + temp1 = xsc * v(q,q) + do p = 1, q - 1 + ! v(p,q) = - sign( temp1, v(q,p) ) + v(p,q) = - sign( temp1, v(p,q) ) + end do + end do + else + call stdlib_dlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + end if + ! second preconditioning finished; continue with jacobi svd + ! the input matrix is lower trinagular. + ! recover the right singular vectors as solution of a well + ! conditioned triangular matrix equation. + if ( condr1 < cond_ok ) then + call stdlib_dgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,ldu,work(2*n+n*nr+nr+1),& + lwork-2*n-n*nr-nr,info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + do p = 1, nr + call stdlib_dcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_dscal( nr, sva(p), v(1,p), 1 ) + end do + ! Pick The Right Matrix Equation And Solve It + if ( nr == n ) then + ! :)) .. best case, r1 is inverted. the solution of this matrix + ! equation is q2*v2 = the product of the jacobi rotations + ! used in stdlib_dgesvj, premultiplied with the orthogonal matrix + ! from the second qr factorization. + call stdlib_dtrsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv ) + else + ! .. r1 is well conditioned, but non-square. transpose(r2) + ! is inverted to get the product of the jacobi rotations + ! used in stdlib_dgesvj. the q-factor from the second qr + ! factorization is then built in explicitly. + call stdlib_dtrsm('L','U','T','N',nr,nr,one,work(2*n+1),n,v,ldv) + if ( nr < n ) then + call stdlib_dlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_dlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + end if + call stdlib_dormqr('L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) + end if + else if ( condr2 < cond_ok ) then + ! :) .. the input matrix a is very likely a relative of + ! the kahan matrix :) + ! the matrix r2 is inverted. the solution of the matrix equation + ! is q3^t*v3 = the product of the jacobi rotations (appplied to + ! the lower triangular l3 from the lq factorization of + ! r2=l3*q3), pre-multiplied with the transposed q3. + call stdlib_dgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, work(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr, info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + do p = 1, nr + call stdlib_dcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_dscal( nr, sva(p), u(1,p), 1 ) + end do + call stdlib_dtrsm('L','U','N','N',nr,nr,one,work(2*n+1),n,u,ldu) + ! Apply The Permutation From The Second Qr Factorization + do q = 1, nr + do p = 1, nr + work(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = work(2*n+n*nr+nr+p) + end do + end do + if ( nr < n ) then + call stdlib_dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_dlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_dormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + else + ! last line of defense. + ! #:( this is a rather pathological case: no scaled condition + ! improvement after two pivoted qr factorizations. other + ! possibility is that the rank revealing qr factorization + ! or the condition estimator has failed, or the cond_ok + ! is set very close to one (which is unnecessary). normally, + ! this branch should never be executed, but in rare cases of + ! failure of the rrqr or condition estimator, the last line of + ! defense ensures that stdlib_dgejsv completes the task. + ! compute the full svd of l3 using stdlib_dgesvj with explicit + ! accumulation of jacobi rotations. + call stdlib_dgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, work(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr, info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + if ( nr < n ) then + call stdlib_dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_dlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_dormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + call stdlib_dormlq( 'L', 'T', nr, nr, nr, work(2*n+1), n,work(2*n+n*nr+1), u, & + ldu, work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) + do q = 1, nr + do p = 1, nr + work(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = work(2*n+n*nr+nr+p) + end do + end do + end if + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=dp)) * epsln + do q = 1, n + do p = 1, n + work(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = work(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_dnrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_dscal( n, xsc, & + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_dlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_dlaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! matrix u. this applies to all cases. + call stdlib_dormqr( 'LEFT', 'NO_TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + ! the columns of u are normalized. the cost is o(m*n) flops. + temp1 = sqrt(real(m,KIND=dp)) * epsln + do p = 1, nr + xsc = one / stdlib_dnrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_dscal( m, xsc, & + u(1,p), 1 ) + end do + ! if the initial qrf is computed with row pivoting, the left + ! singular vectors must be adjusted. + if ( rowpiv )call stdlib_dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + else + ! The Initial Matrix A Has Almost Orthogonal Columns And + ! the second qrf is not needed + call stdlib_dlacpy( 'UPPER', n, n, a, lda, work(n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, n + temp1 = xsc * work( n + (p-1)*n + p ) + do q = 1, p - 1 + work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q)) + end do + end do + else + call stdlib_dlaset( 'LOWER',n-1,n-1,zero,zero,work(n+2),n ) + end if + call stdlib_dgesvj( 'UPPER', 'U', 'N', n, n, work(n+1), n, sva,n, u, ldu, work(n+& + n*n+1), lwork-n-n*n, info ) + scalem = work(n+n*n+1) + numrank = nint(work(n+n*n+2),KIND=ilp) + do p = 1, n + call stdlib_dcopy( n, work(n+(p-1)*n+1), 1, u(1,p), 1 ) + call stdlib_dscal( n, sva(p), work(n+(p-1)*n+1), 1 ) + end do + call stdlib_dtrsm( 'LEFT', 'UPPER', 'NOTRANS', 'NO UD', n, n,one, a, lda, work(n+& + 1), n ) + do p = 1, n + call stdlib_dcopy( n, work(n+p), n, v(iwork(p),1), ldv ) + end do + temp1 = sqrt(real(n,KIND=dp))*epsln + do p = 1, n + xsc = one / stdlib_dnrm2( n, v(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_dscal( n, xsc, & + v(1,p), 1 ) + end do + ! assemble the left singular vector matrix u (m x n). + if ( n < m ) then + call stdlib_dlaset( 'A', m-n, n, zero, zero, u(n+1,1), ldu ) + if ( n < n1 ) then + call stdlib_dlaset( 'A',n, n1-n, zero, zero, u(1,n+1),ldu ) + call stdlib_dlaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu ) + end if + end if + call stdlib_dormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + temp1 = sqrt(real(m,KIND=dp))*epsln + do p = 1, n1 + xsc = one / stdlib_dnrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_dscal( m, xsc, & + u(1,p), 1 ) + end do + if ( rowpiv )call stdlib_dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + end if + ! end of the >> almost orthogonal case << in the full svd + else + ! this branch deploys a preconditioned jacobi svd with explicitly + ! accumulated rotations. it is included as optional, mainly for + ! experimental purposes. it does perform well, and can also be used. + ! in this implementation, this branch will be automatically activated + ! if the condition number sigma_max(a) / sigma_min(a) is predicted + ! to be greater than the overflow threshold. this is because the + ! a posteriori computation of the singular vectors assumes robust + ! implementation of blas and some lapack procedures, capable of working + ! in presence of extreme values. since that is not always the case, ... + do p = 1, nr + call stdlib_dcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 1, nr + temp1 = xsc*abs( v(q,q) ) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = sign(& + temp1, v(p,q) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_dlaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv ) + end if + call stdlib_dgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + call stdlib_dlacpy( 'L', n, nr, v, ldv, work(2*n+1), n ) + do p = 1, nr + call stdlib_dcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 2, nr + do p = 1, q - 1 + temp1 = xsc * min(abs(u(p,p)),abs(u(q,q))) + u(p,q) = - sign( temp1, u(q,p) ) + end do + end do + else + call stdlib_dlaset('U', nr-1, nr-1, zero, zero, u(1,2), ldu ) + end if + call stdlib_dgesvj( 'G', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, work(2*n+n*nr+1), & + lwork-2*n-n*nr, info ) + scalem = work(2*n+n*nr+1) + numrank = nint(work(2*n+n*nr+2),KIND=ilp) + if ( nr < n ) then + call stdlib_dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_dlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_dormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+n*nr+nr+1)& + ,lwork-2*n-n*nr-nr,ierr ) + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=dp)) * epsln + do q = 1, n + do p = 1, n + work(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = work(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_dnrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_dscal( n, xsc, & + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_dlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_dlaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1),ldu ) + call stdlib_dlaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu ) + end if + end if + call stdlib_dormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + if ( rowpiv )call stdlib_dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + end if + if ( transp ) then + ! .. swap u and v because the procedure worked on a^t + do p = 1, n + call stdlib_dswap( n, u(1,p), 1, v(1,p), 1 ) + end do + end if + end if + ! end of the full svd + ! undo scaling, if necessary (and possible) + if ( uscal2 <= (big/sva(1))*uscal1 ) then + call stdlib_dlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr ) + uscal1 = one + uscal2 = one + end if + if ( nr < n ) then + do p = nr+1, n + sva(p) = zero + end do + end if + work(1) = uscal2 * scalem + work(2) = uscal1 + if ( errest ) work(3) = sconda + if ( lsvec .and. rsvec ) then + work(4) = condr1 + work(5) = condr2 + end if + if ( l2tran ) then + work(6) = entra + work(7) = entrat + end if + iwork(1) = nr + iwork(2) = numrank + iwork(3) = warning + return + end subroutine stdlib_dgejsv + + !> DGELQ: computes an LQ factorization of a real M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_dgelq( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks, lwmin, lwopt, lwreq + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'DGELQ ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'DGELQ ', ' ', m, n, 2, -1 ) + else + mb = 1 + nb = n + end if + if( mb>min( m, n ) .or. mb<1 ) mb = 1 + if( nb>n .or. nb<=m ) nb = n + mintsz = m + 5 + if ( nb>m .and. n>m ) then + if( mod( n - m, nb - m )==0 ) then + nblcks = ( n - m ) / ( nb - m ) + else + nblcks = ( n - m ) / ( nb - m ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + if( ( n<=m ) .or. ( nb<=m ) .or. ( nb>=n ) ) then + lwmin = max( 1, n ) + lwopt = max( 1, mb*n ) + else + lwmin = max( 1, m ) + lwopt = max( 1, mb*m ) + end if + lminws = .false. + if( ( tsize=lwmin ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=n ) ) then + lwreq = max( 1, mb*n ) + else + lwreq = max( 1, mb*m ) + end if + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) ) then + call stdlib_dgelqt( m, n, mb, a, lda, t( 6 ), mb, work, info ) + else + call stdlib_dlaswlq( m, n, mb, nb, a, lda, t( 6 ), mb, work,lwork, info ) + end if + work( 1 ) = lwreq + return + end subroutine stdlib_dgelq + + !> DGELSY: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by orthogonal transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**T [ inv(T11)*Q1**T*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + + subroutine stdlib_dgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(dp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: imax = 1 + integer(ilp), parameter :: imin = 2 + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, & + nb3, nb4 + real(dp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, & + wsize + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + mn = min( m, n ) + ismin = mn + 1 + ismax = 2*mn + 1 + ! test the input arguments. + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + rank = 0 + go to 70 + end if + bnrm = stdlib_dlange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! compute qr factorization with column pivoting of a: + ! a * p = q * r + call stdlib_dgeqp3( m, n, a, lda, jpvt, work( 1 ), work( mn+1 ),lwork-mn, info ) + + wsize = mn + work( mn+1 ) + ! workspace: mn+2*n+nb*(n+1). + ! details of householder rotations stored in work(1:mn). + ! determine rank using incremental condition estimation + work( ismin ) = one + work( ismax ) = one + smax = abs( a( 1, 1 ) ) + smin = smax + if( abs( a( 1, 1 ) )==zero ) then + rank = 0 + call stdlib_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + go to 70 + else + rank = 1 + end if + 10 continue + if( rank DGEQR: computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_dgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'DGEQR ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'DGEQR ', ' ', m, n, 2, -1 ) + else + mb = m + nb = 1 + end if + if( mb>m .or. mb<=n ) mb = m + if( nb>min( m, n ) .or. nb<1 ) nb = 1 + mintsz = n + 5 + if( mb>n .and. m>n ) then + if( mod( m - n, mb - n )==0 ) then + nblcks = ( m - n ) / ( mb - n ) + else + nblcks = ( m - n ) / ( mb - n ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + lminws = .false. + if( ( tsize=n ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=m ) ) then + call stdlib_dgeqrt( m, n, nb, a, lda, t( 6 ), nb, work, info ) + else + call stdlib_dlatsqr( m, n, mb, nb, a, lda, t( 6 ), nb, work,lwork, info ) + end if + work( 1 ) = max( 1, nb*n ) + return + end subroutine stdlib_dgeqr + + !> DGETSLS: solves overdetermined or underdetermined real linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'T' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_dgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, tran + integer(ilp) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & + lw2, wsizeo, wsizem, info2 + real(dp) :: anrm, bignum, bnrm, smlnum, tq(5), workq(1) + ! Intrinsic Functions + intrinsic :: real,max,min,int + ! Executable Statements + ! test the input arguments. + info = 0 + maxmn = max( m, n ) + tran = stdlib_lsame( trans, 'T' ) + lquery = ( lwork==-1 .or. lwork==-2 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + call stdlib_dgeqr( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_dgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_dgeqr( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_dgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + else + call stdlib_dgelq( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_dgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_dgelq( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_dgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + end if + if( ( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_dlaset( 'F', maxmn, nrhs, zero, zero, b, ldb ) + go to 50 + end if + brow = m + if ( tran ) then + brow = n + end if + bnrm = stdlib_dlange( 'M', brow, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if ( m>=n ) then + ! compute qr factorization of a + call stdlib_dgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + if ( .not.tran ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_dgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& + 1 ), lw2,info ) + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_dtrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = n + else + ! overdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_dtrtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = zero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_dgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_dgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + ! workspace at least m, optimally m*nb. + if( .not.tran ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_dtrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = zero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_dgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_dgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_dtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_dlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_dlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( tszo + lwo,KIND=dp) + return + end subroutine stdlib_dgetsls + + !> DGETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a real M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in DGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of DGEQRT for more details on the format. + + pure subroutine stdlib_dgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1 + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, & + num_all_row_blocks + ! Intrinsic Functions + intrinsic :: ceiling,real,max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m DLAED2: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny entry in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_dlaed2( k, n, n1, d, q, ldq, indxq, rho, z, dlamda, w,q2, indx, indxc,& + indxp, coltyp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, k + integer(ilp), intent(in) :: ldq, n, n1 + real(dp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: coltyp(*), indx(*), indxc(*), indxp(*) + integer(ilp), intent(inout) :: indxq(*) + real(dp), intent(inout) :: d(*), q(ldq,*), z(*) + real(dp), intent(out) :: dlamda(*), q2(*), w(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: mone = -1.0_dp + + ! Local Arrays + integer(ilp) :: ctot(4), psm(4) + ! Local Scalars + integer(ilp) :: ct, i, imax, iq1, iq2, j, jmax, js, k2, n1p1, n2, nj, pj + real(dp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -2 + else if( ldqn1 .or. ( n / 2 )n )go to 100 + if( rho*abs( z( nj ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + coltyp( nj ) = 4 + indxp( k2 ) = nj + else + ! check if eigenvalues are close enough to allow deflation. + s = z( pj ) + c = z( nj ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_dlapy2( c, s ) + t = d( nj ) - d( pj ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( nj ) = tau + z( pj ) = zero + if( coltyp( nj )/=coltyp( pj ) )coltyp( nj ) = 2 + coltyp( pj ) = 4 + call stdlib_drot( n, q( 1, pj ), 1, q( 1, nj ), 1, c, s ) + t = d( pj )*c**2 + d( nj )*s**2 + d( nj ) = d( pj )*s**2 + d( nj )*c**2 + d( pj ) = t + k2 = k2 - 1 + i = 1 + 90 continue + if( k2+i<=n ) then + if( d( pj ) DLAQR2: is identical to DLAQR3 except that it avoids + !> recursion by calling DLAHQR instead of DLAQR4. + !> Aggressive early deflation: + !> This subroutine accepts as input an upper Hessenberg matrix + !> H and performs an orthogonal similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an orthogonal similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + subroutine stdlib_dlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& + sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), z(ldz,*) + real(dp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(dp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & + tau, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & + ltop, lwk1, lwk2, lwkopt + logical(lk) :: bulge, sorted + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,sqrt + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_dgehrd ==== + call stdlib_dgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_dormhr ==== + call stdlib_dormhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = jw + max( lwk1, lwk2 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=dp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = one + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = zero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sr( kwtop ) = h( kwtop, kwtop ) + si( kwtop ) = zero + ns = 1 + nd = 0 + if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero + end if + work( 1 ) = one + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_dlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_dcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_dlaset( 'A', jw, jw, zero, one, v, ldv ) + call stdlib_dlahqr( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, jw, & + v, ldv, infqr ) + ! ==== stdlib_dtrexc needs a clean margin near the diagonal ==== + do j = 1, jw - 3 + t( j+2, j ) = zero + t( j+3, j ) = zero + end do + if( jw>2 )t( jw, jw-2 ) = zero + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + 20 continue + if( ilst<=ns ) then + if( ns==1 ) then + bulge = .false. + else + bulge = t( ns, ns-1 )/=zero + end if + ! ==== small spike tip test for deflation ==== + if( .not.bulge ) then + ! ==== real eigenvalue ==== + foo = abs( t( ns, ns ) ) + if( foo==zero )foo = abs( s ) + if( abs( s*v( 1, ns ) )<=max( smlnum, ulp*foo ) ) then + ! ==== deflatable ==== + ns = ns - 1 + else + ! ==== undeflatable. move it up out of the way. + ! . (stdlib_dtrexc can not fail in this case.) ==== + ifst = ns + call stdlib_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 1 + end if + else + ! ==== complex conjugate pair ==== + foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & + ) ) + if( foo==zero )foo = abs( s ) + if( max( abs( s*v( 1, ns ) ), abs( s*v( 1, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & + then + ! ==== deflatable ==== + ns = ns - 2 + else + ! ==== undeflatable. move them up out of the way. + ! . fortunately, stdlib_dtrexc does the right thing with + ! . ilst in case of a rare exchange failure. ==== + ifst = ns + call stdlib_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 2 + end if + end if + ! ==== end deflation detection loop ==== + go to 20 + end if + ! ==== return to hessenberg form ==== + if( ns==0 )s = zero + if( ns=evk ) then + i = k + else + sorted = .false. + ifst = i + ilst = k + call stdlib_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + if( info==0 ) then + i = ilst + else + i = k + end if + end if + if( i==kend ) then + k = i + 1 + else if( t( i+1, i )==zero ) then + k = i + 1 + else + k = i + 2 + end if + go to 40 + end if + go to 30 + 50 continue + end if + ! ==== restore shift/eigenvalue array from t ==== + i = jw + 60 continue + if( i>=infqr+1 ) then + if( i==infqr+1 ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else if( t( i, i-1 )==zero ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else + aa = t( i-1, i-1 ) + cc = t( i, i-1 ) + bb = t( i-1, i ) + dd = t( i, i ) + call stdlib_dlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& + 1 ),si( kwtop+i-1 ), cs, sn ) + i = i - 2 + end if + go to 60 + end if + if( ns1 .and. s/=zero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_dcopy( ns, v, ldv, work, 1 ) + beta = work( 1 ) + call stdlib_dlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = one + call stdlib_dlaset( 'L', jw-2, jw-2, zero, zero, t( 3, 1 ), ldt ) + call stdlib_dlarf( 'L', ns, jw, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_dlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_dlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_dgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*v( 1, 1 ) + call stdlib_dlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_dcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=zero )call stdlib_dormhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_dgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & + zero, wv, ldwv ) + call stdlib_dlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_dgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & + zero, t, ldt ) + call stdlib_dlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_dgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & + zero, wv, ldwv ) + call stdlib_dlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = real( lwkopt,KIND=dp) + end subroutine stdlib_dlaqr2 + + !> DLASD1: computes the SVD of an upper bidiagonal N-by-M matrix B, + !> where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. + !> A related subroutine DLASD7 handles the case in which the singular + !> values (and the singular vectors in factored form) are desired. + !> DLASD1 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The left singular vectors of the original matrix are stored in U, and + !> the transpose of the right singular vectors are stored in VT, and the + !> singular values are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or when there are zeros in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLASD2. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the square roots of the + !> roots of the secular equation via the routine DLASD4 (as called + !> by DLASD3). This routine also calculates the singular vectors of + !> the current problem. + !> The final stage consists of computing the updated singular vectors + !> directly using the updated singular values. The singular vectors + !> for the current problem are multiplied with the singular vectors + !> from the overall problem. + + pure subroutine stdlib_dlasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, & + work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, nl, nr, sqre + real(dp), intent(inout) :: alpha, beta + ! Array Arguments + integer(ilp), intent(inout) :: idxq(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, & + ldvt2, m, n, n1, n2 + real(dp) :: orgnrm + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLASD1', -info ) + return + end if + n = nl + nr + 1 + m = n + sqre + ! the following values are for bookkeeping purposes only. they are + ! integer pointers which indicate the portion of the workspace + ! used by a particular array in stdlib_dlasd2 and stdlib_dlasd3. + ldu2 = n + ldvt2 = m + iz = 1 + isigma = iz + m + iu2 = isigma + n + ivt2 = iu2 + ldu2*n + iq = ivt2 + ldvt2*m + idx = 1 + idxc = idx + n + coltyp = idxc + n + idxp = coltyp + n + ! scale. + orgnrm = max( abs( alpha ), abs( beta ) ) + d( nl+1 ) = zero + do i = 1, n + if( abs( d( i ) )>orgnrm ) then + orgnrm = abs( d( i ) ) + end if + end do + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + alpha = alpha / orgnrm + beta = beta / orgnrm + ! deflate singular values. + call stdlib_dlasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(& + isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( & + idxc ), idxq, iwork( coltyp ), info ) + ! solve secular equation and update singular vectors. + ldq = k + call stdlib_dlasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( & + iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),& + info ) + ! report the convergence failure. + if( info/=0 ) then + return + end if + ! unscale. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + ! prepare the idxq sorting permutation. + n1 = k + n2 = n - k + call stdlib_dlamrg( n1, n2, d, 1, -1, idxq ) + return + end subroutine stdlib_dlasd1 + + !> DLAED1: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles + !> the case in which eigenvalues only or eigenvalues and eigenvectors + !> of a full symmetric matrix (which was reduced to tridiagonal form) + !> are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**T*u, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by DLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_dlaed1( n, d, q, ldq, indxq, rho, cutpnt, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, ldq, n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: indxq(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), q(ldq,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: coltyp, i, idlmda, indx, indxc, indxp, iq2, is, iw, iz, k, n1, n2, & + zpp1 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( ldqcutpnt .or. ( n / 2 ) DLAED0: computes all eigenvalues and corresponding eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + + pure subroutine stdlib_dlaed0( icompq, qsiz, n, d, e, q, ldq, qstore, ldqs,work, iwork, info & + ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldq, ldqs, n, qsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*), q(ldq,*) + real(dp), intent(out) :: qstore(ldqs,*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: curlvl, curprb, curr, i, igivcl, igivnm, igivpt, indxq, iperm, iprmpt, & + iq, iqptr, iwrem, j, k, lgn, matsiz, msd2, smlsiz, smm1, spm1, spm2, submat, subpbs, & + tlvls + real(dp) :: temp + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>2 ) then + info = -1 + else if( ( icompq==1 ) .and. ( qsizsmlsiz ) then + do j = subpbs, 1, -1 + iwork( 2*j ) = ( iwork( j )+1 ) / 2 + iwork( 2*j-1 ) = iwork( j ) / 2 + end do + tlvls = tlvls + 1 + subpbs = 2*subpbs + go to 10 + end if + do j = 2, subpbs + iwork( j ) = iwork( j ) + iwork( j-1 ) + end do + ! divide the matrix into subpbs submatrices of size at most smlsiz+1 + ! using rank-1 modifications (cuts). + spm1 = subpbs - 1 + do i = 1, spm1 + submat = iwork( i ) + 1 + smm1 = submat - 1 + d( smm1 ) = d( smm1 ) - abs( e( smm1 ) ) + d( submat ) = d( submat ) - abs( e( smm1 ) ) + end do + indxq = 4*n + 3 + if( icompq/=2 ) then + ! set up workspaces for eigenvalues only/accumulate new vectors + ! routine + temp = log( real( n,KIND=dp) ) / log( two ) + lgn = int( temp,KIND=ilp) + if( 2**lgn 1 ) + curlvl = 1 + 80 continue + if( subpbs>1 ) then + spm2 = subpbs - 2 + loop_90: do i = 0, spm2, 2 + if( i==0 ) then + submat = 1 + matsiz = iwork( 2 ) + msd2 = iwork( 1 ) + curprb = 0 + else + submat = iwork( i ) + 1 + matsiz = iwork( i+2 ) - iwork( i ) + msd2 = matsiz / 2 + curprb = curprb + 1 + end if + ! merge lower order eigensystems (of size msd2 and matsiz - msd2) + ! into an eigensystem of size matsiz. + ! stdlib_dlaed1 is used only for the full eigensystem of a tridiagonal + ! matrix. + ! stdlib_dlaed7 handles the cases in which eigenvalues only or eigenvalues + ! and eigenvectors of a full symmetric matrix (which was reduced to + ! tridiagonal form) are desired. + if( icompq==2 ) then + call stdlib_dlaed1( matsiz, d( submat ), q( submat, submat ),ldq, iwork( & + indxq+submat ),e( submat+msd2-1 ), msd2, work,iwork( subpbs+1 ), info ) + + else + call stdlib_dlaed7( icompq, matsiz, qsiz, tlvls, curlvl, curprb,d( submat ), & + qstore( 1, submat ), ldqs,iwork( indxq+submat ), e( submat+msd2-1 ),msd2, & + work( iq ), iwork( iqptr ),iwork( iprmpt ), iwork( iperm ),iwork( igivpt ), & + iwork( igivcl ),work( igivnm ), work( iwrem ),iwork( subpbs+1 ), info ) + + end if + if( info/=0 )go to 130 + iwork( i / 2+1 ) = iwork( i+2 ) + end do loop_90 + subpbs = subpbs / 2 + curlvl = curlvl + 1 + go to 80 + end if + ! end while + ! re-merge the eigenvalues/vectors which were deflated at the final + ! merge step. + if( icompq==1 ) then + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + call stdlib_dcopy( qsiz, qstore( 1, j ), 1, q( 1, i ), 1 ) + end do + call stdlib_dcopy( n, work, 1, d, 1 ) + else if( icompq==2 ) then + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + call stdlib_dcopy( n, q( 1, j ), 1, work( n*i+1 ), 1 ) + end do + call stdlib_dcopy( n, work, 1, d, 1 ) + call stdlib_dlacpy( 'A', n, n, work( n+1 ), n, q, ldq ) + else + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + end do + call stdlib_dcopy( n, work, 1, d, 1 ) + end if + go to 140 + 130 continue + info = submat*( n+1 ) + submat + matsiz - 1 + 140 continue + return + end subroutine stdlib_dlaed0 + + !> DSTEDC: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + !> The eigenvectors of a full or band real symmetric matrix can also be + !> found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this + !> matrix to tridiagonal form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See DLAED3 for details. + + pure subroutine stdlib_dstedc( compz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*), z(ldz,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: finish, i, icompz, ii, j, k, lgn, liwmin, lwmin, m, smlsiz, start, & + storez, strtrw + real(dp) :: eps, orgnrm, p, tiny + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max,mod,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or.( icompz>0 .and. ldztiny ) then + finish = finish + 1 + go to 20 + end if + end if + ! (sub) problem determined. compute its size and solve it. + m = finish - start + 1 + if( m==1 ) then + start = finish + 1 + go to 10 + end if + if( m>smlsiz ) then + ! scale. + orgnrm = stdlib_dlanst( 'M', m, d( start ), e( start ) ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, m, 1, d( start ), m,info ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, m-1, 1, e( start ),m-1, info ) + + if( icompz==1 ) then + strtrw = 1 + else + strtrw = start + end if + call stdlib_dlaed0( icompz, n, m, d( start ), e( start ),z( strtrw, start ), & + ldz, work( 1 ), n,work( storez ), iwork, info ) + if( info/=0 ) then + info = ( info / ( m+1 )+start-1 )*( n+1 ) +mod( info, ( m+1 ) ) + start - & + 1 + go to 50 + end if + ! scale back. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, m, 1, d( start ), m,info ) + else + if( icompz==1 ) then + ! since qr won't update a z matrix which is larger than + ! the length of d, we must solve the sub-problem in a + ! workspace and then multiply back into z. + call stdlib_dsteqr( 'I', m, d( start ), e( start ), work, m,work( m*m+1 ), & + info ) + call stdlib_dlacpy( 'A', n, m, z( 1, start ), ldz,work( storez ), n ) + + call stdlib_dgemm( 'N', 'N', n, m, m, one,work( storez ), n, work, m, zero,& + z( 1, start ), ldz ) + else if( icompz==2 ) then + call stdlib_dsteqr( 'I', m, d( start ), e( start ),z( start, start ), ldz, & + work, info ) + else + call stdlib_dsterf( m, d( start ), e( start ), info ) + end if + if( info/=0 ) then + info = start*( n+1 ) + finish + go to 50 + end if + end if + start = finish + 1 + go to 10 + end if + ! endwhile + if( icompz==0 ) then + ! use quick sort + call stdlib_dlasrt( 'I', n, d, info ) + else + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

DSTEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric tridiagonal matrix. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_dstevd( jobz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iscale, liwmin, lwmin + real(dp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tnrm + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + liwmin = 1 + lwmin = 1 + if( n>1 .and. wantz ) then + lwmin = 1 + 4*n + n**2 + liwmin = 3 + 5*n + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_dscal( n, sigma, d, 1 ) + call stdlib_dscal( n-1, sigma, e( 1 ), 1 ) + end if + ! for eigenvalues only, call stdlib_dsterf. for eigenvalues and + ! eigenvectors, call stdlib_dstedc. + if( .not.wantz ) then + call stdlib_dsterf( n, d, e, info ) + else + call stdlib_dstedc( 'I', n, d, e, z, ldz, work, lwork, iwork, liwork,info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_dscal( n, one / sigma, d, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_dstevd + + !> DSYEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> Because of large use of BLAS of level 3, DSYEVD needs N**2 more + !> workspace than DSYEVX. + + subroutine stdlib_dsyevd( jobz, uplo, n, a, lda, w, work, lwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, inde, indtau, indwk2, indwrk, iscale, liopt, liwmin, llwork, & + llwrk2, lopt, lwmin + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_dlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_dsytrd to reduce symmetric matrix to tridiagonal form. + inde = 1 + indtau = inde + n + indwrk = indtau + n + llwork = lwork - indwrk + 1 + indwk2 = indwrk + n*n + llwrk2 = lwork - indwk2 + 1 + call stdlib_dsytrd( uplo, n, a, lda, w, work( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_dstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_dormtr to multiply it by the + ! householder transformations stored in a. + if( .not.wantz ) then + call stdlib_dsterf( n, w, work( inde ), info ) + else + call stdlib_dstedc( 'I', n, w, work( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, iwork, liwork, info ) + call stdlib_dormtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),work( indwrk ), n, & + work( indwk2 ), llwrk2, iinfo ) + call stdlib_dlacpy( 'A', n, n, work( indwrk ), n, a, lda ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_dscal( n, one / sigma, w, 1 ) + work( 1 ) = lopt + iwork( 1 ) = liopt + return + end subroutine stdlib_dsyevd + + !> DSYGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_dsygvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: liopt, liwmin, lopt, lwmin + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 6*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + 1 + end if + lopt = lwmin + liopt = liwmin + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda DSBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a real symmetric band matrix A. If eigenvectors are desired, it uses + !> a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_dsbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, iwork, liwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: ab(ldab,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, inde, indwk2, indwrk, iscale, liwmin, llwrk2, lwmin + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else + if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 5*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + end if + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_dlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_dlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_dsbtrd to reduce symmetric band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + indwk2 = indwrk + n*n + llwrk2 = lwork - indwk2 + 1 + call stdlib_dsbtrd( jobz, uplo, n, kd, ab, ldab, w, work( inde ), z, ldz,work( indwrk )& + , iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, call stdlib_sstedc. + if( .not.wantz ) then + call stdlib_dsterf( n, w, work( inde ), info ) + else + call stdlib_dstedc( 'I', n, w, work( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, iwork, liwork, info ) + call stdlib_dgemm( 'N', 'N', n, n, n, one, z, ldz, work( indwrk ), n,zero, work( & + indwk2 ), n ) + call stdlib_dlacpy( 'A', n, n, work( indwk2 ), n, z, ldz ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_dscal( n, one / sigma, w, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_dsbevd + + !> DSBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of the + !> form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and + !> banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_dsbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwk2, indwrk, liwmin, llwrk2, lwmin + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 5*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab DSPEVD: computes all the eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_dspevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,iwork, liwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: ap(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iinfo, inde, indtau, indwrk, iscale, liwmin, llwork, lwmin + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_dscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_dsptrd to reduce symmetric packed matrix to tridiagonal form. + inde = 1 + indtau = inde + n + call stdlib_dsptrd( uplo, n, ap, w, work( inde ), work( indtau ), iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_dstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_dopmtr to multiply it by the + ! householder transformations represented in ap. + if( .not.wantz ) then + call stdlib_dsterf( n, w, work( inde ), info ) + else + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_dstedc( 'I', n, w, work( inde ), z, ldz, work( indwrk ),llwork, iwork, & + liwork, info ) + call stdlib_dopmtr( 'L', uplo, 'N', n, n, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_dscal( n, one / sigma, w, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_dspevd + + !> DSPGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_dspgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: ap(*), bp(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: j, liwmin, lwmin, neig + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + do j = 1, neig + call stdlib_dtpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t *y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_dtpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_dspgvd + + !> DBDSDC: computes the singular value decomposition (SVD) of a real + !> N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + !> using a divide and conquer method, where S is a diagonal matrix + !> with non-negative diagonal elements (the singular values of B), and + !> U and VT are orthogonal matrices of left and right singular vectors, + !> respectively. DBDSDC can be used to compute all singular values, + !> and optionally, singular vectors or singular vectors in compact form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See DLASD3 for details. + !> The code currently calls DLASDQ if singular values only are desired. + !> However, it can be slightly modified to compute singular values + !> using the divide and conquer method. + + pure subroutine stdlib_dbdsdc( uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq,work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, n + ! Array Arguments + integer(ilp), intent(out) :: iq(*), iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: q(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + ! changed dimension statement in comment describing e from (n) to + ! (n-1). sven, 17 feb 05. + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: difl, difr, givcol, givnum, givptr, i, ic, icompq, ierr, ii, is, iu, & + iuplo, ivt, j, k, kk, mlvl, nm1, nsize, perm, poles, qstart, smlsiz, smlszp, sqre, & + start, wstart, z + real(dp) :: cs, eps, orgnrm, p, r, sn + ! Intrinsic Functions + intrinsic :: abs,real,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + iuplo = 0 + if( stdlib_lsame( uplo, 'U' ) )iuplo = 1 + if( stdlib_lsame( uplo, 'L' ) )iuplo = 2 + if( stdlib_lsame( compq, 'N' ) ) then + icompq = 0 + else if( stdlib_lsame( compq, 'P' ) ) then + icompq = 1 + else if( stdlib_lsame( compq, 'I' ) ) then + icompq = 2 + else + icompq = -1 + end if + if( iuplo==0 ) then + info = -1 + else if( icompq<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ( ldu<1 ) .or. ( ( icompq==2 ) .and. ( ldu=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - start + 1 + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n). solve this 1-by-1 problem + ! first. + nsize = i - start + 1 + if( icompq==2 ) then + u( n, n ) = sign( one, d( n ) ) + vt( n, n ) = one + else if( icompq==1 ) then + q( n+( qstart-1 )*n ) = sign( one, d( n ) ) + q( n+( smlsiz+qstart-1 )*n ) = one + end if + d( n ) = abs( d( n ) ) + end if + if( icompq==2 ) then + call stdlib_dlasd0( nsize, sqre, d( start ), e( start ),u( start, start ), & + ldu, vt( start, start ),ldvt, smlsiz, iwork, work( wstart ), info ) + else + call stdlib_dlasda( icompq, smlsiz, nsize, sqre, d( start ),e( start ), q( & + start+( iu+qstart-2 )*n ), n,q( start+( ivt+qstart-2 )*n ),iq( start+k*n ), q(& + start+( difl+qstart-2 )*n ), q( start+( difr+qstart-2 )*n ),q( start+( z+& + qstart-2 )*n ),q( start+( poles+qstart-2 )*n ),iq( start+givptr*n ), iq( & + start+givcol*n ),n, iq( start+perm*n ),q( start+( givnum+qstart-2 )*n ),q( & + start+( ic+qstart-2 )*n ),q( start+( is+qstart-2 )*n ),work( wstart ), iwork,& + info ) + end if + if( info/=0 ) then + return + end if + start = i + 1 + end if + end do loop_30 + ! unscale + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, ierr ) + 40 continue + ! use selection sort to minimize swaps of singular vectors + do ii = 2, n + i = ii - 1 + kk = i + p = d( i ) + do j = ii, n + if( d( j )>p ) then + kk = j + p = d( j ) + end if + end do + if( kk/=i ) then + d( kk ) = d( i ) + d( i ) = p + if( icompq==1 ) then + iq( i ) = kk + else if( icompq==2 ) then + call stdlib_dswap( n, u( 1, i ), 1, u( 1, kk ), 1 ) + call stdlib_dswap( n, vt( i, 1 ), ldvt, vt( kk, 1 ), ldvt ) + end if + else if( icompq==1 ) then + iq( i ) = i + end if + end do + ! if icompq = 1, use iq(n,1) as the indicator for uplo + if( icompq==1 ) then + if( iuplo==1 ) then + iq( n ) = 1 + else + iq( n ) = 0 + end if + end if + ! if b is lower bidiagonal, update u by those givens rotations + ! which rotated b to be upper bidiagonal + if( ( iuplo==2 ) .and. ( icompq==2 ) )call stdlib_dlasr( 'L', 'V', 'B', n, n, work( 1 )& + , work( n ), u, ldu ) + return + end subroutine stdlib_dbdsdc + + !> DBDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**T + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**T*VT instead of + !> P**T, for given real input matrices U and VT. When U and VT are the + !> orthogonal matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by DGEBRD, then + !> A = (U*Q) * S * (P**T*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**T*C + !> for a given real input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + + pure subroutine stdlib_dbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, work, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: hndrth = 0.01_dp + real(dp), parameter :: hndrd = 100.0_dp + real(dp), parameter :: meigth = -0.125_dp + integer(ilp), parameter :: maxitr = 6 + + + + + + + + + ! Local Scalars + logical(lk) :: lower, rotate + integer(ilp) :: i, idir, isub, iter, iterdivn, j, ll, lll, m, maxitdivn, nm1, nm12, & + nm13, oldll, oldm + real(dp) :: abse, abss, cosl, cosr, cs, eps, f, g, h, mu, oldcs, oldsn, r, shift, & + sigmn, sigmx, sinl, sinr, sll, smax, smin, sminl, sminoa, sn, thresh, tol, tolmul, & + unfl + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.lower ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ncvt<0 ) then + info = -3 + else if( nru<0 ) then + info = -4 + else if( ncc<0 ) then + info = -5 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + ! if no singular vectors desired, use qd algorithm + if( .not.rotate ) then + call stdlib_dlasq1( n, d, e, work, info ) + ! if info equals 2, dqds didn't finish, try to finish + if( info /= 2 ) return + info = 0 + end if + nm1 = n - 1 + nm12 = nm1 + nm1 + nm13 = nm12 + nm1 + idir = 0 + ! get machine constants + eps = stdlib_dlamch( 'EPSILON' ) + unfl = stdlib_dlamch( 'SAFE MINIMUM' ) + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left + if( lower ) then + do i = 1, n - 1 + call stdlib_dlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + work( i ) = cs + work( nm1+i ) = sn + end do + ! update singular vectors if desired + if( nru>0 )call stdlib_dlasr( 'R', 'V', 'F', nru, n, work( 1 ), work( n ), u,ldu ) + + if( ncc>0 )call stdlib_dlasr( 'L', 'V', 'F', n, ncc, work( 1 ), work( n ), c,ldc ) + + end if + ! compute singular values to relative accuracy tol + ! (by setting tol to be negative, algorithm will compute + ! singular values to absolute accuracy abs(tol)*norm(input matrix)) + tolmul = max( ten, min( hndrd, eps**meigth ) ) + tol = tolmul*eps + ! compute approximate maximum, minimum singular values + smax = zero + do i = 1, n + smax = max( smax, abs( d( i ) ) ) + end do + do i = 1, n - 1 + smax = max( smax, abs( e( i ) ) ) + end do + sminl = zero + if( tol>=zero ) then + ! relative accuracy desired + sminoa = abs( d( 1 ) ) + if( sminoa==zero )go to 50 + mu = sminoa + do i = 2, n + mu = abs( d( i ) )*( mu / ( mu+abs( e( i-1 ) ) ) ) + sminoa = min( sminoa, mu ) + if( sminoa==zero )go to 50 + end do + 50 continue + sminoa = sminoa / sqrt( real( n,KIND=dp) ) + thresh = max( tol*sminoa, maxitr*(n*(n*unfl)) ) + else + ! absolute accuracy desired + thresh = max( abs( tol )*smax, maxitr*(n*(n*unfl)) ) + end if + ! prepare for main iteration loop for the singular values + ! (maxit is the maximum number of passes through the inner + ! loop permitted before nonconvergence signalled.) + maxitdivn = maxitr*n + iterdivn = 0 + iter = -1 + oldll = -1 + oldm = -1 + ! m points to last element of unconverged part of matrix + m = n + ! begin main iteration loop + 60 continue + ! check for convergence or exceeding iteration count + if( m<=1 )go to 160 + if( iter>=n ) then + iter = iter - n + iterdivn = iterdivn + 1 + if( iterdivn>=maxitdivn )go to 200 + end if + ! find diagonal block of matrix to work on + if( tol0 )call stdlib_drot( ncvt, vt( m-1, 1 ), ldvt, vt( m, 1 ), ldvt, cosr,sinr & + ) + if( nru>0 )call stdlib_drot( nru, u( 1, m-1 ), 1, u( 1, m ), 1, cosl, sinl ) + if( ncc>0 )call stdlib_drot( ncc, c( m-1, 1 ), ldc, c( m, 1 ), ldc, cosl,sinl ) + + m = m - 2 + go to 60 + end if + ! if working on new submatrix, choose shift direction + ! (from larger end diagonal element towards smaller) + if( ll>oldm .or. m=abs( d( m ) ) ) then + ! chase bulge from top (big end) to bottom (small end) + idir = 1 + else + ! chase bulge from bottom (big end) to top (small end) + idir = 2 + end if + end if + ! apply convergence tests + if( idir==1 ) then + ! run convergence test in forward direction + ! first apply standard test to bottom of matrix + if( abs( e( m-1 ) )<=abs( tol )*abs( d( m ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion forward + mu = abs( d( ll ) ) + sminl = mu + do lll = ll, m - 1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll+1 ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + else + ! run convergence test in backward direction + ! first apply standard test to top of matrix + if( abs( e( ll ) )<=abs( tol )*abs( d( ll ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion backward + mu = abs( d( m ) ) + sminl = mu + do lll = m - 1, ll, -1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + end if + oldll = ll + oldm = m + ! compute shift. first, test if shifting would ruin relative + ! accuracy, and if so set the shift to zero. + if( tol>=zero .and. n*tol*( sminl / smax )<=max( eps, hndrth*tol ) ) then + ! use a zero shift to avoid loss of relative accuracy + shift = zero + else + ! compute the shift from 2-by-2 block at end of matrix + if( idir==1 ) then + sll = abs( d( ll ) ) + call stdlib_dlas2( d( m-1 ), e( m-1 ), d( m ), shift, r ) + else + sll = abs( d( m ) ) + call stdlib_dlas2( d( ll ), e( ll ), d( ll+1 ), shift, r ) + end if + ! test if shift negligible, and if so set to zero + if( sll>zero ) then + if( ( shift / sll )**2ll )e( i-1 ) = oldsn*r + call stdlib_dlartg( oldcs*r, d( i+1 )*sn, oldcs, oldsn, d( i ) ) + work( i-ll+1 ) = cs + work( i-ll+1+nm1 ) = sn + work( i-ll+1+nm12 ) = oldcs + work( i-ll+1+nm13 ) = oldsn + end do + h = d( m )*cs + d( m ) = h*oldcs + e( m-1 ) = h*oldsn + ! update singular vectors + if( ncvt>0 )call stdlib_dlasr( 'L', 'V', 'F', m-ll+1, ncvt, work( 1 ),work( n ), & + vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_dlasr( 'R', 'V', 'F', nru, m-ll+1, work( nm12+1 ),work( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_dlasr( 'L', 'V', 'F', m-ll+1, ncc, work( nm12+1 ),work( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + cs = one + oldcs = one + do i = m, ll + 1, -1 + call stdlib_dlartg( d( i )*cs, e( i-1 ), cs, sn, r ) + if( i0 )call stdlib_dlasr( 'L', 'V', 'B', m-ll+1, ncvt, work( nm12+1 ),work( & + nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_dlasr( 'R', 'V', 'B', nru, m-ll+1, work( 1 ),work( n ), u(& + 1, ll ), ldu ) + if( ncc>0 )call stdlib_dlasr( 'L', 'V', 'B', m-ll+1, ncc, work( 1 ),work( n ), c(& + ll, 1 ), ldc ) + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + end if + else + ! use nonzero shift + if( idir==1 ) then + ! chase bulge from top to bottom + ! save cosines and sines for later singular vector updates + f = ( abs( d( ll ) )-shift )*( sign( one, d( ll ) )+shift / d( ll ) ) + g = e( ll ) + do i = ll, m - 1 + call stdlib_dlartg( f, g, cosr, sinr, r ) + if( i>ll )e( i-1 ) = r + f = cosr*d( i ) + sinr*e( i ) + e( i ) = cosr*e( i ) - sinr*d( i ) + g = sinr*d( i+1 ) + d( i+1 ) = cosr*d( i+1 ) + call stdlib_dlartg( f, g, cosl, sinl, r ) + d( i ) = r + f = cosl*e( i ) + sinl*d( i+1 ) + d( i+1 ) = cosl*d( i+1 ) - sinl*e( i ) + if( i0 )call stdlib_dlasr( 'L', 'V', 'F', m-ll+1, ncvt, work( 1 ),work( n ), & + vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_dlasr( 'R', 'V', 'F', nru, m-ll+1, work( nm12+1 ),work( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_dlasr( 'L', 'V', 'F', m-ll+1, ncc, work( nm12+1 ),work( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + f = ( abs( d( m ) )-shift )*( sign( one, d( m ) )+shift /d( m ) ) + g = e( m-1 ) + do i = m, ll + 1, -1 + call stdlib_dlartg( f, g, cosr, sinr, r ) + if( ill+1 ) then + g = sinl*e( i-2 ) + e( i-2 ) = cosl*e( i-2 ) + end if + work( i-ll ) = cosr + work( i-ll+nm1 ) = -sinr + work( i-ll+nm12 ) = cosl + work( i-ll+nm13 ) = -sinl + end do + e( ll ) = f + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + ! update singular vectors if desired + if( ncvt>0 )call stdlib_dlasr( 'L', 'V', 'B', m-ll+1, ncvt, work( nm12+1 ),work( & + nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_dlasr( 'R', 'V', 'B', nru, m-ll+1, work( 1 ),work( n ), u(& + 1, ll ), ldu ) + if( ncc>0 )call stdlib_dlasr( 'L', 'V', 'B', m-ll+1, ncc, work( 1 ),work( n ), c(& + ll, 1 ), ldc ) + end if + end if + ! qr iteration finished, go back and check convergence + go to 60 + ! all singular values converged, so make them positive + 160 continue + do i = 1, n + if( d( i )0 )call stdlib_dscal( ncvt, negone, vt( i, 1 ), ldvt ) + end if + end do + ! sort the singular values into decreasing order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n - 1 + ! scan for smallest d(i) + isub = 1 + smin = d( 1 ) + do j = 2, n + 1 - i + if( d( j )<=smin ) then + isub = j + smin = d( j ) + end if + end do + if( isub/=n+1-i ) then + ! swap singular values and vectors + d( isub ) = d( n+1-i ) + d( n+1-i ) = smin + if( ncvt>0 )call stdlib_dswap( ncvt, vt( isub, 1 ), ldvt, vt( n+1-i, 1 ),ldvt ) + + if( nru>0 )call stdlib_dswap( nru, u( 1, isub ), 1, u( 1, n+1-i ), 1 ) + if( ncc>0 )call stdlib_dswap( ncc, c( isub, 1 ), ldc, c( n+1-i, 1 ), ldc ) + + end if + end do + go to 220 + ! maximum number of iterations exceeded, failure to converge + 200 continue + info = 0 + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + 220 continue + return + end subroutine stdlib_dbdsqr + + !> DGEES: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues, the real Schur form T, and, optionally, the matrix of + !> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> real Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A matrix is in real Schur form if it is upper quasi-triangular with + !> 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the + !> form + !> [ a b ] + !> [ c a ] + !> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). + + subroutine stdlib_dgees( jobvs, sort, select, n, a, lda, sdim, wr, wi,vs, ldvs, work, lwork, & + bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: vs(ldvs,*), wi(*), work(*), wr(*) + ! Function Arguments + procedure(stdlib_select_d) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, lastsl, lquery, lst2sl, scalea, wantst, wantvs + integer(ilp) :: hswork, i, i1, i2, ibal, icond, ierr, ieval, ihi, ilo, inxt, ip, itau, & + iwrk, maxwrk, minwrk + real(dp) :: anrm, bignum, cscale, eps, s, sep, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_dlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need n) + ibal = 1 + call stdlib_dgebal( 'P', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (workspace: need 3*n, prefer 2*n+n*nb) + itau = n + ibal + iwrk = n + itau + call stdlib_dgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_dlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate orthogonal matrix in vs + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_dorghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'S', jobvs, n, ilo, ihi, a, lda, wr, wi, vs, ldvs,work( iwrk ), & + lwork-iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea ) then + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n, 1, wr, n, ierr ) + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n, 1, wi, n, ierr ) + end if + do i = 1, n + bwork( i ) = select( wr( i ), wi( i ) ) + end do + ! reorder eigenvalues and transform schur vectors + ! (workspace: none needed) + call stdlib_dtrsen( 'N', jobvs, bwork, n, a, lda, vs, ldvs, wr, wi,sdim, s, sep, & + work( iwrk ), lwork-iwrk+1, idum, 1,icond ) + if( icond>0 )info = n + icond + end if + if( wantvs ) then + ! undo balancing + ! (workspace: need n) + call stdlib_dgebak( 'P', 'R', n, ilo, ihi, work( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_dlascl( 'H', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_dcopy( n, a, lda+1, wr, 1 ) + if( cscale==smlnum ) then + ! if scaling back towards underflow, adjust wi if an + ! offdiagonal element of a 2-by-2 block in the schur form + ! underflows. + if( ieval>0 ) then + i1 = ieval + 1 + i2 = ihi - 1 + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi,max( ilo-1, 1 ), & + ierr ) + else if( wantst ) then + i1 = 1 + i2 = n - 1 + else + i1 = ilo + i2 = ihi - 1 + end if + inxt = i1 - 1 + loop_20: do i = i1, i2 + if( i1 )call stdlib_dswap( i-1, a( 1, i ), 1, a( 1, i+1 ), 1 ) + if( n>i+1 )call stdlib_dswap( n-i-1, a( i, i+2 ), lda,a( i+1, i+2 ), & + lda ) + if( wantvs ) then + call stdlib_dswap( n, vs( 1, i ), 1, vs( 1, i+1 ), 1 ) + end if + a( i, i+1 ) = a( i+1, i ) + a( i+1, i ) = zero + end if + inxt = i + 2 + end if + end do loop_20 + end if + ! undo scaling for the imaginary part of the eigenvalues + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n-ieval, 1,wi( ieval+1 ), max( n-ieval,& + 1 ), ierr ) + end if + if( wantst .and. info==0 ) then + ! check if reordering successful + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = select( wr( i ), wi( i ) ) + if( wi( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_dgees + + !> DGEESX: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues, the real Schur form T, and, optionally, the matrix of + !> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> real Schur form so that selected eigenvalues are at the top left; + !> computes a reciprocal condition number for the average of the + !> selected eigenvalues (RCONDE); and computes a reciprocal condition + !> number for the right invariant subspace corresponding to the + !> selected eigenvalues (RCONDV). The leading columns of Z form an + !> orthonormal basis for this invariant subspace. + !> For further explanation of the reciprocal condition numbers RCONDE + !> and RCONDV, see Section 4.10_dp of the LAPACK Users' Guide (where + !> these quantities are called s and sep respectively). + !> A real matrix is in real Schur form if it is upper quasi-triangular + !> with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in + !> the form + !> [ a b ] + !> [ c a ] + !> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). + + subroutine stdlib_dgeesx( jobvs, sort, select, sense, n, a, lda, sdim,wr, wi, vs, ldvs, & + rconde, rcondv, work, lwork,iwork, liwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, liwork, lwork, n + real(dp), intent(out) :: rconde, rcondv + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: vs(ldvs,*), wi(*), work(*), wr(*) + ! Function Arguments + procedure(stdlib_select_d) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, lastsl, lquery, lst2sl, scalea, wantsb, wantse, wantsn, wantst, & + wantsv, wantvs + integer(ilp) :: hswork, i, i1, i2, ibal, icond, ierr, ieval, ihi, ilo, inxt, ip, itau, & + iwrk, liwrk, lwrk, maxwrk, minwrk + real(dp) :: anrm, bignum, cscale, eps, smlnum + ! Local Arrays + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_dlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (rworkspace: need n) + ibal = 1 + call stdlib_dgebal( 'P', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (rworkspace: need 3*n, prefer 2*n+n*nb) + itau = n + ibal + iwrk = n + itau + call stdlib_dgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_dlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate orthogonal matrix in vs + ! (rworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_dorghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (rworkspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'S', jobvs, n, ilo, ihi, a, lda, wr, wi, vs, ldvs,work( iwrk ), & + lwork-iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea ) then + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n, 1, wr, n, ierr ) + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n, 1, wi, n, ierr ) + end if + do i = 1, n + bwork( i ) = select( wr( i ), wi( i ) ) + end do + ! reorder eigenvalues, transform schur vectors, and compute + ! reciprocal condition numbers + ! (rworkspace: if sense is not 'n', need n+2*sdim*(n-sdim) + ! otherwise, need n ) + ! (iworkspace: if sense is 'v' or 'b', need sdim*(n-sdim) + ! otherwise, need 0 ) + call stdlib_dtrsen( sense, jobvs, bwork, n, a, lda, vs, ldvs, wr, wi,sdim, rconde, & + rcondv, work( iwrk ), lwork-iwrk+1,iwork, liwork, icond ) + if( .not.wantsn )maxwrk = max( maxwrk, n+2*sdim*( n-sdim ) ) + if( icond==-15 ) then + ! not enough real workspace + info = -16 + else if( icond==-17 ) then + ! not enough integer workspace + info = -18 + else if( icond>0 ) then + ! stdlib_dtrsen failed to reorder or to restore standard schur form + info = icond + n + end if + end if + if( wantvs ) then + ! undo balancing + ! (rworkspace: need n) + call stdlib_dgebak( 'P', 'R', n, ilo, ihi, work( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_dlascl( 'H', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_dcopy( n, a, lda+1, wr, 1 ) + if( ( wantsv .or. wantsb ) .and. info==0 ) then + dum( 1 ) = rcondv + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + rcondv = dum( 1 ) + end if + if( cscale==smlnum ) then + ! if scaling back towards underflow, adjust wi if an + ! offdiagonal element of a 2-by-2 block in the schur form + ! underflows. + if( ieval>0 ) then + i1 = ieval + 1 + i2 = ihi - 1 + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + else if( wantst ) then + i1 = 1 + i2 = n - 1 + else + i1 = ilo + i2 = ihi - 1 + end if + inxt = i1 - 1 + loop_20: do i = i1, i2 + if( i1 )call stdlib_dswap( i-1, a( 1, i ), 1, a( 1, i+1 ), 1 ) + if( n>i+1 )call stdlib_dswap( n-i-1, a( i, i+2 ), lda,a( i+1, i+2 ), & + lda ) + if( wantvs ) then + call stdlib_dswap( n, vs( 1, i ), 1, vs( 1, i+1 ), 1 ) + end if + a( i, i+1 ) = a( i+1, i ) + a( i+1, i ) = zero + end if + inxt = i + 2 + end if + end do loop_20 + end if + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n-ieval, 1,wi( ieval+1 ), max( n-ieval,& + 1 ), ierr ) + end if + if( wantst .and. info==0 ) then + ! check if reordering successful + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = select( wr( i ), wi( i ) ) + if( wi( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + work( 1 ) = maxwrk + if( wantsv .or. wantsb ) then + iwork( 1 ) = max( 1, sdim*( n-sdim ) ) + else + iwork( 1 ) = 1 + end if + return + end subroutine stdlib_dgeesx + + !> DGEEV: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate-transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + + subroutine stdlib_dgeev( jobvl, jobvr, n, a, lda, wr, wi, vl, ldvl, vr,ldvr, work, lwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: vl(ldvl,*), vr(ldvr,*), wi(*), work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr + character :: side + integer(ilp) :: hswork, i, ibal, ierr, ihi, ilo, itau, iwrk, k, lwork_trevc, maxwrk, & + minwrk, nout + real(dp) :: anrm, bignum, cs, cscale, eps, r, scl, smlnum, sn + ! Local Arrays + logical(lk) :: select(1) + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -1 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_dlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix + ! (workspace: need n) + ibal = 1 + call stdlib_dgebal( 'B', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (workspace: need 3*n, prefer 2*n+n*nb) + itau = ibal + n + iwrk = itau + n + call stdlib_dgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_dlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate orthogonal matrix in vl + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_dorghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vl, ldvl,work( iwrk ), & + lwork-iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_dlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_dlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate orthogonal matrix in vr + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_dorghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + else + ! compute eigenvalues only + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'E', 'N', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + end if + ! if info /= 0 from stdlib_dhseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (workspace: need 4*n, prefer n + n + 2*n*nb) + call stdlib_dtrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1, ierr ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + ! (workspace: need n) + call stdlib_dgebak( 'B', 'L', n, ilo, ihi, work( ibal ), n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_dnrm2( n, vl( 1, i ), 1 ) + call stdlib_dscal( n, scl, vl( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_dlapy2( stdlib_dnrm2( n, vl( 1, i ), 1 ),stdlib_dnrm2( n, & + vl( 1, i+1 ), 1 ) ) + call stdlib_dscal( n, scl, vl( 1, i ), 1 ) + call stdlib_dscal( n, scl, vl( 1, i+1 ), 1 ) + do k = 1, n + work( iwrk+k-1 ) = vl( k, i )**2 + vl( k, i+1 )**2 + end do + k = stdlib_idamax( n, work( iwrk ), 1 ) + call stdlib_dlartg( vl( k, i ), vl( k, i+1 ), cs, sn, r ) + call stdlib_drot( n, vl( 1, i ), 1, vl( 1, i+1 ), 1, cs, sn ) + vl( k, i+1 ) = zero + end if + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + ! (workspace: need n) + call stdlib_dgebak( 'B', 'R', n, ilo, ihi, work( ibal ), n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_dnrm2( n, vr( 1, i ), 1 ) + call stdlib_dscal( n, scl, vr( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_dlapy2( stdlib_dnrm2( n, vr( 1, i ), 1 ),stdlib_dnrm2( n, & + vr( 1, i+1 ), 1 ) ) + call stdlib_dscal( n, scl, vr( 1, i ), 1 ) + call stdlib_dscal( n, scl, vr( 1, i+1 ), 1 ) + do k = 1, n + work( iwrk+k-1 ) = vr( k, i )**2 + vr( k, i+1 )**2 + end do + k = stdlib_idamax( n, work( iwrk ), 1 ) + call stdlib_dlartg( vr( k, i ), vr( k, i+1 ), cs, sn, r ) + call stdlib_drot( n, vr( 1, i ), 1, vr( 1, i+1 ), 1, cs, sn ) + vr( k, i+1 ) = zero + end if + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wr( info+1 ),max( n-info, 1 & + ), ierr ) + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wi( info+1 ),max( n-info, 1 & + ), ierr ) + if( info>0 ) then + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wr, n,ierr ) + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_dgeev + + !> DGEEVX: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues + !> (RCONDE), and reciprocal condition numbers for the right + !> eigenvectors (RCONDV). + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate-transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + !> Balancing a matrix means permuting the rows and columns to make it + !> more nearly upper triangular, and applying a diagonal similarity + !> transformation D * A * D**(-1), where D is a diagonal matrix, to + !> make its rows and columns closer in norm and the condition numbers + !> of its eigenvalues and eigenvectors smaller. The computed + !> reciprocal condition numbers correspond to the balanced matrix. + !> Permuting rows and columns will not change the condition numbers + !> (in exact arithmetic) but diagonal scaling will. For further + !> explanation of balancing, see section 4.10.2_dp of the LAPACK + !> Users' Guide. + + subroutine stdlib_dgeevx( balanc, jobvl, jobvr, sense, n, a, lda, wr, wi,vl, ldvl, vr, ldvr, & + ilo, ihi, scale, abnrm,rconde, rcondv, work, lwork, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + real(dp), intent(out) :: abnrm + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: rconde(*), rcondv(*), scale(*), vl(ldvl,*), vr(ldvr,*), wi(*),& + work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr, wntsnb, wntsne, wntsnn, wntsnv + character :: job, side + integer(ilp) :: hswork, i, icond, ierr, itau, iwrk, k, lwork_trevc, maxwrk, minwrk, & + nout + real(dp) :: anrm, bignum, cs, cscale, eps, r, scl, smlnum, sn + ! Local Arrays + logical(lk) :: select(1) + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + wntsnn = stdlib_lsame( sense, 'N' ) + wntsne = stdlib_lsame( sense, 'E' ) + wntsnv = stdlib_lsame( sense, 'V' ) + wntsnb = stdlib_lsame( sense, 'B' ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'S' ).or. & + stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) )then + info = -1 + else if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -2 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -3 + else if( .not.( wntsnn .or. wntsne .or. wntsnb .or. wntsnv ) .or.( ( wntsne .or. & + wntsnb ) .and. .not.( wantvl .and.wantvr ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_dlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix and compute abnrm + call stdlib_dgebal( balanc, n, a, lda, ilo, ihi, scale, ierr ) + abnrm = stdlib_dlange( '1', n, n, a, lda, dum ) + if( scalea ) then + dum( 1 ) = abnrm + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + abnrm = dum( 1 ) + end if + ! reduce to upper hessenberg form + ! (workspace: need 2*n, prefer n+n*nb) + itau = 1 + iwrk = itau + n + call stdlib_dgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_dlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate orthogonal matrix in vl + ! (workspace: need 2*n-1, prefer n+(n-1)*nb) + call stdlib_dorghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vl, ldvl,work( iwrk ), & + lwork-iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_dlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_dlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate orthogonal matrix in vr + ! (workspace: need 2*n-1, prefer n+(n-1)*nb) + call stdlib_dorghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + else + ! compute eigenvalues only + ! if condition numbers desired, compute schur form + if( wntsnn ) then + job = 'E' + else + job = 'S' + end if + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_dhseqr( job, 'N', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + end if + ! if info /= 0 from stdlib_dhseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (workspace: need 3*n, prefer n + 2*n*nb) + call stdlib_dtrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1, ierr ) + end if + ! compute condition numbers if desired + ! (workspace: need n*n+6*n unless sense = 'e') + if( .not.wntsnn ) then + call stdlib_dtrsna( sense, 'A', select, n, a, lda, vl, ldvl, vr, ldvr,rconde, & + rcondv, n, nout, work( iwrk ), n, iwork,icond ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + call stdlib_dgebak( balanc, 'L', n, ilo, ihi, scale, n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_dnrm2( n, vl( 1, i ), 1 ) + call stdlib_dscal( n, scl, vl( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_dlapy2( stdlib_dnrm2( n, vl( 1, i ), 1 ),stdlib_dnrm2( n, & + vl( 1, i+1 ), 1 ) ) + call stdlib_dscal( n, scl, vl( 1, i ), 1 ) + call stdlib_dscal( n, scl, vl( 1, i+1 ), 1 ) + do k = 1, n + work( k ) = vl( k, i )**2 + vl( k, i+1 )**2 + end do + k = stdlib_idamax( n, work, 1 ) + call stdlib_dlartg( vl( k, i ), vl( k, i+1 ), cs, sn, r ) + call stdlib_drot( n, vl( 1, i ), 1, vl( 1, i+1 ), 1, cs, sn ) + vl( k, i+1 ) = zero + end if + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + call stdlib_dgebak( balanc, 'R', n, ilo, ihi, scale, n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_dnrm2( n, vr( 1, i ), 1 ) + call stdlib_dscal( n, scl, vr( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_dlapy2( stdlib_dnrm2( n, vr( 1, i ), 1 ),stdlib_dnrm2( n, & + vr( 1, i+1 ), 1 ) ) + call stdlib_dscal( n, scl, vr( 1, i ), 1 ) + call stdlib_dscal( n, scl, vr( 1, i+1 ), 1 ) + do k = 1, n + work( k ) = vr( k, i )**2 + vr( k, i+1 )**2 + end do + k = stdlib_idamax( n, work, 1 ) + call stdlib_dlartg( vr( k, i ), vr( k, i+1 ), cs, sn, r ) + call stdlib_drot( n, vr( 1, i ), 1, vr( 1, i+1 ), 1, cs, sn ) + vr( k, i+1 ) = zero + end if + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wr( info+1 ),max( n-info, 1 & + ), ierr ) + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wi( info+1 ),max( n-info, 1 & + ), ierr ) + if( info==0 ) then + if( ( wntsnv .or. wntsnb ) .and. icond==0 )call stdlib_dlascl( 'G', 0, 0, cscale,& + anrm, n, 1, rcondv, n,ierr ) + else + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wr, n,ierr ) + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_dgeevx + + !> DGELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_dgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(dp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: s(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, & + maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd + real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum + ! Intrinsic Functions + intrinsic :: real,int,log,max,min + ! Executable Statements + ! test the input arguments. + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + mnthr = stdlib_ilaenv( 6, 'DGELSD', ' ', m, n, nrhs, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns. + mm = n + maxwrk = max( maxwrk, n+n*stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n,-1, -1 ) ) + + maxwrk = max( maxwrk, n+nrhs*stdlib_ilaenv( 1, 'DORMQR', 'LT', m, nrhs, n, -1 ) ) + + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined. + maxwrk = max( maxwrk, 3*n+( mm+n )*stdlib_ilaenv( 1, 'DGEBRD', ' ', mm, n, -1, -& + 1 ) ) + maxwrk = max( maxwrk, 3*n+nrhs*stdlib_ilaenv( 1, 'DORMBR', 'QLT', mm, nrhs, n, -& + 1 ) ) + maxwrk = max( maxwrk, 3*n+( n-1 )*stdlib_ilaenv( 1, 'DORMBR', 'PLN', n, nrhs, n, & + -1 ) ) + wlalsd = 9*n+2*n*smlsiz+8*n*nlvl+n*nrhs+(smlsiz+1)**2 + maxwrk = max( maxwrk, 3*n+wlalsd ) + minwrk = max( 3*n+mm, 3*n+nrhs, 3*n+wlalsd ) + end if + if( n>m ) then + wlalsd = 9*m+2*m*smlsiz+8*m*nlvl+m*nrhs+(smlsiz+1)**2 + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows. + maxwrk = m + m*stdlib_ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 ) + maxwrk = max( maxwrk, m*m+4*m+2*m*stdlib_ilaenv( 1, 'DGEBRD', ' ', m, m, -1, -& + 1 ) ) + maxwrk = max( maxwrk, m*m+4*m+nrhs*stdlib_ilaenv( 1, 'DORMBR', 'QLT', m, nrhs,& + m, -1 ) ) + maxwrk = max( maxwrk, m*m+4*m+( m-1 )*stdlib_ilaenv( 1, 'DORMBR', 'PLN', m, & + nrhs, m, -1 ) ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m+m+m*nrhs ) + else + maxwrk = max( maxwrk, m*m+2*m ) + end if + maxwrk = max( maxwrk, m+nrhs*stdlib_ilaenv( 1, 'DORMLQ', 'LT', n, nrhs, m, -1 & + ) ) + maxwrk = max( maxwrk, m*m+4*m+wlalsd ) + ! xxx: ensure the path 2a case below is triggered. the workspace + ! calculation should use queries for all routines eventually. + maxwrk = max( maxwrk,4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) + else + ! path 2 - remaining underdetermined cases. + maxwrk = 3*m + ( n+m )*stdlib_ilaenv( 1, 'DGEBRD', ' ', m, n,-1, -1 ) + maxwrk = max( maxwrk, 3*m+nrhs*stdlib_ilaenv( 1, 'DORMBR', 'QLT', m, nrhs, n, & + -1 ) ) + maxwrk = max( maxwrk, 3*m+m*stdlib_ilaenv( 1, 'DORMBR', 'PLN', n, nrhs, m, -1 & + ) ) + maxwrk = max( maxwrk, 3*m+wlalsd ) + end if + minwrk = max( 3*m+nrhs, 3*m+m, 3*m+wlalsd ) + end if + minwrk = min( minwrk, maxwrk ) + work( 1 ) = maxwrk + iwork( 1 ) = liwork + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + call stdlib_dlaset( 'F', minmn, 1, zero, zero, s, 1 ) + rank = 0 + go to 10 + end if + ! scale b if max entry outside range [smlnum,bignum]. + bnrm = stdlib_dlange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_dlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! if m < n make sure certain entries of b are zero. + if( m=n ) then + ! path 1 - overdetermined or exactly determined. + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns. + mm = n + itau = 1 + nwork = itau + n + ! compute a=q*r. + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + info ) + ! multiply b by transpose(q). + ! (workspace: need n+nrhs, prefer n+nrhs*nb) + call stdlib_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + nwork ), lwork-nwork+1, info ) + ! zero out below r. + if( n>1 ) then + call stdlib_dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + end if + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a. + ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) + call stdlib_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r. + ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) + call stdlib_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_dlalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of r. + call stdlib_dormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m, wlalsd ) ) & + then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm. + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4*m+m*lda+& + wlalsd ) )ldwork = lda + itau = 1 + nwork = m + 1 + ! compute a=l*q. + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) + + il = nwork + ! copy l to work(il), zeroing out above its diagonal. + call stdlib_dlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) + ie = il + ldwork*m + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il). + ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) + call stdlib_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & + itaup ), work( nwork ),lwork-nwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l. + ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_dlalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of l. + call stdlib_dormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! zero out below first m rows of b. + call stdlib_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1 ), ldb ) + nwork = itau + m + ! multiply transpose(q) by b. + ! (workspace: need m+nrhs, prefer m+nrhs*nb) + call stdlib_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& + , lwork-nwork+1, info ) + else + ! path 2 - remaining underdetermined cases. + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a. + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors. + ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) + call stdlib_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_dlalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of a. + call stdlib_dormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + end if + ! undo scaling. + if( iascl==1 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_dlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_dlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 10 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwork + return + end subroutine stdlib_dgelsd + + !> DGELSS: computes the minimum norm solution to a real linear least + !> squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + + subroutine stdlib_dgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(dp), intent(in) :: rcond + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: s(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & + ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr + integer(ilp) :: lwork_dgeqrf, lwork_dormqr, lwork_dgebrd, lwork_dormbr, lwork_dorgbr, & + lwork_dormlq, lwork_dgelqf + real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr + ! Local Arrays + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + mm = m + mnthr = stdlib_ilaenv( 6, 'DGELSS', ' ', m, n, nrhs, -1 ) + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns + ! compute space needed for stdlib_dgeqrf + call stdlib_dgeqrf( m, n, a, lda, dum(1), dum(1), -1, info ) + lwork_dgeqrf=dum(1) + ! compute space needed for stdlib_dormqr + call stdlib_dormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1), b,ldb, dum(1), -1, & + info ) + lwork_dormqr=dum(1) + mm = n + maxwrk = max( maxwrk, n + lwork_dgeqrf ) + maxwrk = max( maxwrk, n + lwork_dormqr ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + ! compute workspace needed for stdlib_dbdsqr + bdspac = max( 1, 5*n ) + ! compute space needed for stdlib_dgebrd + call stdlib_dgebrd( mm, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, info & + ) + lwork_dgebrd=dum(1) + ! compute space needed for stdlib_dormbr + call stdlib_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1),b, ldb, dum(1),& + -1, info ) + lwork_dormbr=dum(1) + ! compute space needed for stdlib_dorgbr + call stdlib_dorgbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, info ) + lwork_dorgbr=dum(1) + ! compute total workspace needed + maxwrk = max( maxwrk, 3*n + lwork_dgebrd ) + maxwrk = max( maxwrk, 3*n + lwork_dormbr ) + maxwrk = max( maxwrk, 3*n + lwork_dorgbr ) + maxwrk = max( maxwrk, bdspac ) + maxwrk = max( maxwrk, n*nrhs ) + minwrk = max( 3*n + mm, 3*n + nrhs, bdspac ) + maxwrk = max( minwrk, maxwrk ) + end if + if( n>m ) then + ! compute workspace needed for stdlib_dbdsqr + bdspac = max( 1, 5*m ) + minwrk = max( 3*m+nrhs, 3*m+n, bdspac ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows + ! compute space needed for stdlib_dgelqf + call stdlib_dgelqf( m, n, a, lda, dum(1), dum(1),-1, info ) + lwork_dgelqf=dum(1) + ! compute space needed for stdlib_dgebrd + call stdlib_dgebrd( m, m, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, & + info ) + lwork_dgebrd=dum(1) + ! compute space needed for stdlib_dormbr + call stdlib_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_dormbr=dum(1) + ! compute space needed for stdlib_dorgbr + call stdlib_dorgbr( 'P', m, m, m, a, lda, dum(1),dum(1), -1, info ) + lwork_dorgbr=dum(1) + ! compute space needed for stdlib_dormlq + call stdlib_dormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1),b, ldb, dum(1), -& + 1, info ) + lwork_dormlq=dum(1) + ! compute total workspace needed + maxwrk = m + lwork_dgelqf + maxwrk = max( maxwrk, m*m + 4*m + lwork_dgebrd ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_dormbr ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_dorgbr ) + maxwrk = max( maxwrk, m*m + m + bdspac ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + lwork_dormlq ) + else + ! path 2 - underdetermined + ! compute space needed for stdlib_dgebrd + call stdlib_dgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, & + info ) + lwork_dgebrd=dum(1) + ! compute space needed for stdlib_dormbr + call stdlib_dormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_dormbr=dum(1) + ! compute space needed for stdlib_dorgbr + call stdlib_dorgbr( 'P', m, n, m, a, lda, dum(1),dum(1), -1, info ) + lwork_dorgbr=dum(1) + maxwrk = 3*m + lwork_dgebrd + maxwrk = max( maxwrk, 3*m + lwork_dormbr ) + maxwrk = max( maxwrk, 3*m + lwork_dorgbr ) + maxwrk = max( maxwrk, bdspac ) + maxwrk = max( maxwrk, n*nrhs ) + end if + end if + maxwrk = max( minwrk, maxwrk ) + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + call stdlib_dlaset( 'F', minmn, 1, zero, zero, s, minmn ) + rank = 0 + go to 70 + end if + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_dlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! overdetermined case + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + info ) + ! multiply b by transpose(q) + ! (workspace: need n+nrhs, prefer n+nrhs*nb) + call stdlib_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + iwork ), lwork-iwork+1, info ) + ! zero out below r + if( n>1 )call stdlib_dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) + call stdlib_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r + ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) + call stdlib_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in a + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + iwork = ie + n + ! perform bidiagonal qr iteration + ! multiply b by transpose of left singular vectors + ! compute right singular vectors in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, n, 0, nrhs, s, work( ie ), a, lda, dum,1, b, ldb, work( & + iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_drscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_dlaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors + ! (workspace: need n, prefer n*nrhs) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_dgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) + + call stdlib_dlacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_dgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1, i ),ldb, zero, work,& + n ) + call stdlib_dlacpy( 'G', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_dgemv( 'T', n, n, one, a, lda, b, 1, zero, work, 1 ) + call stdlib_dcopy( n, work, 1, b, 1 ) + end if + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & + lda + itau = 1 + iwork = m + 1 + ! compute a=l*q + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) + + il = iwork + ! copy l to work(il), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) + ie = il + ldwork*m + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(il) + ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) + call stdlib_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & + itaup ), work( iwork ),lwork-iwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l + ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( iwork ),lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in work(il) + ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & + lwork-iwork+1, info ) + iwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of l in work(il) and + ! multiplying b by transpose of left singular vectors + ! (workspace: need m*m+m+bdspac) + call stdlib_dbdsqr( 'U', m, m, 0, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& + ldb, work( iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_drscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_dlaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + iwork = ie + ! multiply b by right singular vectors of l in work(il) + ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) + if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1 ) then + call stdlib_dgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & + work( iwork ), ldb ) + call stdlib_dlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = ( lwork-iwork+1 ) / m + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_dgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1, i ), ldb,& + zero, work( iwork ), m ) + call stdlib_dlacpy( 'G', m, bl, work( iwork ), m, b( 1, i ),ldb ) + end do + else + call stdlib_dgemv( 'T', m, m, one, work( il ), ldwork, b( 1, 1 ),1, zero, work( & + iwork ), 1 ) + call stdlib_dcopy( m, work( iwork ), 1, b( 1, 1 ), 1 ) + end if + ! zero out below first m rows of b + call stdlib_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1 ), ldb ) + iwork = itau + m + ! multiply transpose(q) by b + ! (workspace: need m+nrhs, prefer m+nrhs*nb) + call stdlib_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& + , lwork-iwork+1, info ) + else + ! path 2 - remaining underdetermined cases + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors + ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) + call stdlib_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + iwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of a in a and + ! multiplying b by transpose of left singular vectors + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'L', m, n, 0, nrhs, s, work( ie ), a, lda, dum,1, b, ldb, work( & + iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_drscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_dlaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors of a + ! (workspace: need n, prefer n*nrhs) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_dgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) + + call stdlib_dlacpy( 'F', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_dgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1, i ),ldb, zero, work,& + n ) + call stdlib_dlacpy( 'F', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_dgemv( 'T', m, n, one, a, lda, b, 1, zero, work, 1 ) + call stdlib_dcopy( n, work, 1, b, 1 ) + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_dlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_dlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 70 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_dgelss + + !> DGESDD: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, optionally computing the left and right singular + !> vectors. If singular vectors are desired, it uses a + !> divide-and-conquer algorithm. + !> The SVD is written + !> A = U * SIGMA * transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + !> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**T, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_dgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs + integer(ilp) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & + ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl + integer(ilp) :: lwork_dgebrd_mn, lwork_dgebrd_mm, lwork_dgebrd_nn, lwork_dgelqf_mn, & + lwork_dgeqrf_mn, lwork_dorgbr_p_mm, lwork_dorgbr_q_nn, lwork_dorglq_mn, & + lwork_dorglq_nn, lwork_dorgqr_mm, lwork_dorgqr_mn, lwork_dormbr_prt_mm, & + lwork_dormbr_qln_mm, lwork_dormbr_prt_mn, lwork_dormbr_qln_mn, lwork_dormbr_prt_nn, & + lwork_dormbr_qln_nn + real(dp) :: anrm, bignum, eps, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: int,max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntqa = stdlib_lsame( jobz, 'A' ) + wntqs = stdlib_lsame( jobz, 'S' ) + wntqas = wntqa .or. wntqs + wntqo = stdlib_lsame( jobz, 'O' ) + wntqn = stdlib_lsame( jobz, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n .and. ldvt=n .and. minmn>0 ) then + ! compute space needed for stdlib_dbdsdc + if( wntqn ) then + ! stdlib_dbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_dp) + ! keep 7*n for backwards compatibility. + bdspac = 7*n + else + bdspac = 3*n*n + 4*n + end if + ! compute space preferred for each routine + call stdlib_dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_dgebrd_mn = int( dum(1),KIND=ilp) + call stdlib_dgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_dgebrd_nn = int( dum(1),KIND=ilp) + call stdlib_dgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_dgeqrf_mn = int( dum(1),KIND=ilp) + call stdlib_dorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,ierr ) + lwork_dorgbr_q_nn = int( dum(1),KIND=ilp) + call stdlib_dorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_dorgqr_mm = int( dum(1),KIND=ilp) + call stdlib_dorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_dorgqr_mn = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'P', 'R', 'T', n, n, n, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_dormbr_prt_nn = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_dormbr_qln_nn = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_dormbr_qln_mn = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_dormbr_qln_mm = int( dum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + wrkbl = n + lwork_dgeqrf_mn + wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn ) + maxwrk = max( wrkbl, bdspac + n ) + minwrk = bdspac + n + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + wrkbl = n + lwork_dgeqrf_mn + wrkbl = max( wrkbl, n + lwork_dorgqr_mn ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + 2*n*n + minwrk = bdspac + 2*n*n + 3*n + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + wrkbl = n + lwork_dgeqrf_mn + wrkbl = max( wrkbl, n + lwork_dorgqr_mn ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + n*n + minwrk = bdspac + n*n + 3*n + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + wrkbl = n + lwork_dgeqrf_mn + wrkbl = max( wrkbl, n + lwork_dorgqr_mm ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + n*n + minwrk = n*n + max( 3*n + bdspac, n + m ) + end if + else + ! path 5 (m >= n, but not much larger) + wrkbl = 3*n + lwork_dgebrd_mn + if( wntqn ) then + ! path 5n (m >= n, jobz='n') + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + else if( wntqo ) then + ! path 5o (m >= n, jobz='o') + wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + m*n + minwrk = 3*n + max( m, n*n + bdspac ) + else if( wntqs ) then + ! path 5s (m >= n, jobz='s') + wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn ) + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + else if( wntqa ) then + ! path 5a (m >= n, jobz='a') + wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn ) + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + end if + end if + else if( minmn>0 ) then + ! compute space needed for stdlib_dbdsdc + if( wntqn ) then + ! stdlib_dbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_dp) + ! keep 7*n for backwards compatibility. + bdspac = 7*m + else + bdspac = 3*m*m + 4*m + end if + ! compute space preferred for each routine + call stdlib_dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_dgebrd_mn = int( dum(1),KIND=ilp) + call stdlib_dgebrd( m, m, a, m, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_dgebrd_mm = int( dum(1),KIND=ilp) + call stdlib_dgelqf( m, n, a, m, dum(1), dum(1), -1, ierr ) + lwork_dgelqf_mn = int( dum(1),KIND=ilp) + call stdlib_dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr ) + lwork_dorglq_nn = int( dum(1),KIND=ilp) + call stdlib_dorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr ) + lwork_dorglq_mn = int( dum(1),KIND=ilp) + call stdlib_dorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr ) + lwork_dorgbr_p_mm = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'P', 'R', 'T', m, m, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_dormbr_prt_mm = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'P', 'R', 'T', m, n, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_dormbr_prt_mn = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'P', 'R', 'T', n, n, m, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_dormbr_prt_nn = int( dum(1),KIND=ilp) + call stdlib_dormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_dormbr_qln_mm = int( dum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + wrkbl = m + lwork_dgelqf_mn + wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm ) + maxwrk = max( wrkbl, bdspac + m ) + minwrk = bdspac + m + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + wrkbl = m + lwork_dgelqf_mn + wrkbl = max( wrkbl, m + lwork_dorglq_mn ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + 2*m*m + minwrk = bdspac + 2*m*m + 3*m + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + wrkbl = m + lwork_dgelqf_mn + wrkbl = max( wrkbl, m + lwork_dorglq_mn ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*m + minwrk = bdspac + m*m + 3*m + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + wrkbl = m + lwork_dgelqf_mn + wrkbl = max( wrkbl, m + lwork_dorglq_nn ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*m + minwrk = m*m + max( 3*m + bdspac, m + n ) + end if + else + ! path 5t (n > m, but not much larger) + wrkbl = 3*m + lwork_dgebrd_mn + if( wntqn ) then + ! path 5tn (n > m, jobz='n') + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + else if( wntqo ) then + ! path 5to (n > m, jobz='o') + wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*n + minwrk = 3*m + max( n, m*m + bdspac ) + else if( wntqs ) then + ! path 5ts (n > m, jobz='s') + wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn ) + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + else if( wntqa ) then + ! path 5ta (n > m, jobz='a') + wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_nn ) + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + end if + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = stdlib_droundup_lwork( maxwrk ) + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + n + ! compute a=q*r + ! workspace: need n [tau] + n [work] + ! workspace: prefer n [tau] + n*nb [work] + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! zero out below r + call stdlib_dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! workspace: need 3*n [e, tauq, taup] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + nwork = ie + n + ! perform bidiagonal svd, computing singular values only + ! workspace: need n [e] + bdspac + call stdlib_dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 2 (m >> n, jobz = 'o') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is ldwrkr by n + if( lwork >= lda*n + n*n + 3*n + bdspac ) then + ldwrkr = lda + else + ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n + end if + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),ldwrkr ) + ! generate q in a + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! work(iu) is n by n + iu = nwork + nwork = iu + n*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & + idum, work( nwork ), iwork,info ) + ! overwrite work(iu) by left singular vectors of r + ! and vt by right singular vectors of r + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iu ), n, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in work(ir) and copying to a + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] + do i = 1, m, ldwrkr + chunk = min( m - i + 1, ldwrkr ) + call stdlib_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( iu ), & + n, zero, work( ir ),ldwrkr ) + call stdlib_dlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is n by n + ldwrkr = n + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),ldwrkr ) + ! generate q in a + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagoal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of r and vt + ! by right singular vectors of r + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! workspace: need n*n [r] + call stdlib_dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) + call stdlib_dgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& + ldu ) + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + itau = iu + ldwrku*n + nwork = itau + n + ! compute a=q*r, copying result to u + ! workspace: need n*n [u] + n [tau] + n [work] + ! workspace: prefer n*n [u] + n [tau] + n*nb [work] + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! workspace: need n*n [u] + n [tau] + m [work] + ! workspace: prefer n*n [u] + n [tau] + m*nb [work] + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ! produce r in a, zeroing out other entries + call stdlib_dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & + idum, work( nwork ), iwork,info ) + ! overwrite work(iu) by left singular vectors of r and vt + ! by right singular vectors of r + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & + ldwrku,work( nwork ), lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! workspace: need n*n [u] + call stdlib_dgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& + lda ) + ! copy left singular vectors of a from a to u + call stdlib_dlacpy( 'F', m, n, a, lda, u, ldu ) + end if + else + ! m < mnthr + ! path 5 (m >= n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! workspace: need 3*n [e, tauq, taup] + m [work] + ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] + call stdlib_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5n (m >= n, jobz='n') + ! perform bidiagonal svd, only computing singular values + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 5o (m >= n, jobz='o') + iu = nwork + if( lwork >= m*n + 3*n + bdspac ) then + ! work( iu ) is m by n + ldwrku = m + nwork = iu + ldwrku*n + call stdlib_dlaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) + ! ir is unused; silence compile warnings + ir = -1 + else + ! work( iu ) is n by n + ldwrku = n + nwork = iu + ldwrku*n + ! work(ir) is ldwrkr by n + ir = nwork + ldwrkr = ( lwork - n*n - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & + dum, idum, work( nwork ),iwork, info ) + ! overwrite vt by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + if( lwork >= m*n + 3*n + bdspac ) then + ! path 5o-fast + ! overwrite work(iu) by left singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & + ), ldwrku,work( nwork ), lwork - nwork + 1, ierr ) + ! copy left singular vectors of a from work(iu) to a + call stdlib_dlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) + else + ! path 5o-slow + ! generate q in a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & + lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of + ! bidiagonal matrix in work(iu), storing result in + ! work(ir) and copying to a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] + do i = 1, m, ldwrkr + chunk = min( m - i + 1, ldwrkr ) + call stdlib_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( iu )& + , ldwrku, zero,work( ir ), ldwrkr ) + call stdlib_dlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + end if + else if( wntqs ) then + ! path 5s (m >= n, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_dlaset( 'F', m, n, zero, zero, u, ldu ) + call stdlib_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + else if( wntqa ) then + ! path 5a (m >= n, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_dlaset( 'F', m, m, zero, zero, u, ldu ) + call stdlib_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! set the right corner of u to identity matrix + if( m>n ) then + call stdlib_dlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) + end if + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + m [work] + ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + m + ! compute a=l*q + ! workspace: need m [tau] + m [work] + ! workspace: prefer m [tau] + m*nb [work] + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! zero out above l + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! workspace: need 3*m [e, tauq, taup] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + nwork = ie + m + ! perform bidiagonal svd, computing singular values only + ! workspace: need m [e] + bdspac + call stdlib_dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ! work(il) is m by m; it is later resized to m by chunk for gemm + il = ivt + m*m + if( lwork >= m*n + m*m + 3*m + bdspac ) then + ldwrkl = m + chunk = n + else + ldwrkl = m + chunk = ( lwork - m*m ) / m + end if + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy l to work(il), zeroing about above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_dlaset( 'U', m - 1, m - 1, zero, zero,work( il + ldwrkl ), ldwrkl & + ) + ! generate q in a + ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u, and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & + idum, work( nwork ),iwork, info ) + ! overwrite u by left singular vectors of l and work(ivt) + ! by right singular vectors of l + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & + work( ivt ), m,work( nwork ), lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(ivt) by q + ! in a, storing result in work(il) and copying to a + ! workspace: need m*m [vt] + m*m [l] + ! workspace: prefer m*m [vt] + m*n [l] + ! at this point, l is resized as m by chunk. + do i = 1, n, chunk + blk = min( n - i + 1, chunk ) + call stdlib_dgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1, i ), lda,& + zero, work( il ), ldwrkl ) + call stdlib_dlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + il = 1 + ! work(il) is m by m + ldwrkl = m + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! workspace: need m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [l] + m [tau] + m*nb [work] + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy l to work(il), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_dlaset( 'U', m - 1, m - 1, zero, zero,work( il + ldwrkl ), ldwrkl & + ) + ! generate q in a + ! workspace: need m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [l] + m [tau] + m*nb [work] + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(iu). + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of l and vt + ! by right singular vectors of l + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(il) by + ! q in a, storing result in vt + ! workspace: need m*m [l] + call stdlib_dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & + vt, ldvt ) + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ldwkvt = m + itau = ivt + ldwkvt*m + nwork = itau + m + ! compute a=l*q, copying result to vt + ! workspace: need m*m [vt] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! workspace: need m*m [vt] + m [tau] + n [work] + ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ! produce l in a, zeroing out other entries + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & + dum, idum,work( nwork ), iwork, info ) + ! overwrite u by left singular vectors of l and work(ivt) + ! by right singular vectors of l + ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] + ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& + ldwkvt,work( nwork ), lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(ivt) by + ! q in vt, storing result in a + ! workspace: need m*m [vt] + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& + a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_dlacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else + ! n < mnthr + ! path 5t (n > m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! workspace: need 3*m [e, tauq, taup] + n [work] + ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] + call stdlib_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5tn (n > m, jobz='n') + ! perform bidiagonal svd, only computing singular values + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 5to (n > m, jobz='o') + ldwkvt = m + ivt = nwork + if( lwork >= m*n + 3*m + bdspac ) then + ! work( ivt ) is m by n + call stdlib_dlaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) + nwork = ivt + ldwkvt*n + ! il is unused; silence compile warnings + il = -1 + else + ! work( ivt ) is m by m + nwork = ivt + ldwkvt*m + il = nwork + ! work(il) is m by chunk + chunk = ( lwork - m*m - 3*m ) / m + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac + call stdlib_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & + dum, idum,work( nwork ), iwork, info ) + ! overwrite u by left singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + if( lwork >= m*n + 3*m + bdspac ) then + ! path 5to-fast + ! overwrite work(ivt) by left singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] + call stdlib_dormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & + ivt ), ldwkvt,work( nwork ), lwork - nwork + 1, ierr ) + ! copy right singular vectors of a from work(ivt) to a + call stdlib_dlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) + else + ! path 5to-slow + ! generate p**t in a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] + call stdlib_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & + lwork - nwork + 1, ierr ) + ! multiply q in a by right singular vectors of + ! bidiagonal matrix in work(ivt), storing result in + ! work(il) and copying to a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] + do i = 1, n, chunk + blk = min( n - i + 1, chunk ) + call stdlib_dgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1, & + i ), lda, zero,work( il ), m ) + call stdlib_dlacpy( 'F', m, blk, work( il ), m, a( 1, i ),lda ) + end do + end if + else if( wntqs ) then + ! path 5ts (n > m, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_dlaset( 'F', m, n, zero, zero, vt, ldvt ) + call stdlib_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + else if( wntqa ) then + ! path 5ta (n > m, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_dlaset( 'F', n, n, zero, zero, vt, ldvt ) + call stdlib_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! set the right corner of vt to identity matrix + if( n>m ) then + call stdlib_dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) + end if + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + n [work] + ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] + call stdlib_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_dormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( anrm DGESVD: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + !> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**T, not V. + + subroutine stdlib_dgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu, jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& + wntvs + integer(ilp) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & + iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & + wrkbl + integer(ilp) :: lwork_dgeqrf, lwork_dorgqr_n, lwork_dorgqr_m, lwork_dgebrd, & + lwork_dorgbr_p, lwork_dorgbr_q, lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m + real(dp) :: anrm, bignum, eps, smlnum + ! Local Arrays + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntua = stdlib_lsame( jobu, 'A' ) + wntus = stdlib_lsame( jobu, 'S' ) + wntuas = wntua .or. wntus + wntuo = stdlib_lsame( jobu, 'O' ) + wntun = stdlib_lsame( jobu, 'N' ) + wntva = stdlib_lsame( jobvt, 'A' ) + wntvs = stdlib_lsame( jobvt, 'S' ) + wntvas = wntva .or. wntvs + wntvo = stdlib_lsame( jobvt, 'O' ) + wntvn = stdlib_lsame( jobvt, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then + info = -1 + else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda=n .and. minmn>0 ) then + ! compute space needed for stdlib_dbdsqr + mnthr = stdlib_ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 ) + bdspac = 5*n + ! compute space needed for stdlib_dgeqrf + call stdlib_dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_dgeqrf = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dorgqr + call stdlib_dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_dorgqr_n = int( dum(1),KIND=ilp) + call stdlib_dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_dorgqr_m = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dgebrd + call stdlib_dgebrd( n, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_dgebrd = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dorgbr p + call stdlib_dorgbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_p = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dorgbr q + call stdlib_dorgbr( 'Q', n, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_q = int( dum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + maxwrk = n + lwork_dgeqrf + maxwrk = max( maxwrk, 3*n + lwork_dgebrd ) + if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p ) + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 4*n, bdspac ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( n*n + wrkbl, n*n + m*n + n ) + minwrk = max( 3*n + m, bdspac ) + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or + ! 'a') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( n*n + wrkbl, n*n + m*n + n ) + minwrk = max( 3*n + m, bdspac ) + else if( wntus .and. wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntus .and. wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntus .and. wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' or + ! 'a') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntua .and. wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_m ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntua .and. wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_m ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntua .and. wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' or + ! 'a') + wrkbl = n + lwork_dgeqrf + wrkbl = max( wrkbl, n + lwork_dorgqr_m ) + wrkbl = max( wrkbl, 3*n + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + end if + else + ! path 10 (m at least n, but not much larger) + call stdlib_dgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_dgebrd = int( dum(1),KIND=ilp) + maxwrk = 3*n + lwork_dgebrd + if( wntus .or. wntuo ) then + call stdlib_dorgbr( 'Q', m, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_q = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q ) + end if + if( wntua ) then + call stdlib_dorgbr( 'Q', m, m, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_q = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q ) + end if + if( .not.wntvn ) then + maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p ) + end if + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 3*n + m, bdspac ) + end if + else if( minmn>0 ) then + ! compute space needed for stdlib_dbdsqr + mnthr = stdlib_ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 ) + bdspac = 5*m + ! compute space needed for stdlib_dgelqf + call stdlib_dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_dgelqf = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dorglq + call stdlib_dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr ) + lwork_dorglq_n = int( dum(1),KIND=ilp) + call stdlib_dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr ) + lwork_dorglq_m = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dgebrd + call stdlib_dgebrd( m, m, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_dgebrd = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dorgbr p + call stdlib_dorgbr( 'P', m, m, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_p = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_dorgbr q + call stdlib_dorgbr( 'Q', m, m, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_q = int( dum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + maxwrk = m + lwork_dgelqf + maxwrk = max( maxwrk, 3*m + lwork_dgebrd ) + if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q ) + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 4*m, bdspac ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( m*m + wrkbl, m*m + m*n + m ) + minwrk = max( 3*m + n, bdspac ) + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', + ! jobvt='o') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( m*m + wrkbl, m*m + m*n + m ) + minwrk = max( 3*m + n, bdspac ) + else if( wntvs .and. wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntvs .and. wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntvs .and. wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntva .and. wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_n ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntva .and. wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_n ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntva .and. wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + wrkbl = m + lwork_dgelqf + wrkbl = max( wrkbl, m + lwork_dorglq_n ) + wrkbl = max( wrkbl, 3*m + lwork_dgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + end if + else + ! path 10t(n greater than m, but not much larger) + call stdlib_dgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_dgebrd = int( dum(1),KIND=ilp) + maxwrk = 3*m + lwork_dgebrd + if( wntvs .or. wntvo ) then + ! compute space needed for stdlib_dorgbr p + call stdlib_dorgbr( 'P', m, n, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_p = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p ) + end if + if( wntva ) then + call stdlib_dorgbr( 'P', n, n, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_dorgbr_p = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p ) + end if + if( .not.wntun ) then + maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q ) + end if + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 3*m + n, bdspac ) + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + ! no left singular vectors to be computed + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out below r + if( n > 1 ) then + call stdlib_dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + ncvt = 0 + if( wntvo .or. wntvas ) then + ! if right singular vectors desired, generate p'. + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + ncvt = n + end if + iwork = ie + n + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a if desired + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,dum, 1, dum, 1, & + work( iwork ), info ) + ! if right singular vectors desired in vt, copy them there + if( wntvas )call stdlib_dlacpy( 'F', n, n, a, lda, vt, ldvt ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + ! n left singular vectors to be overwritten on a and + ! no right singular vectors to be computed + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then + ! work(iu) is lda by n, work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then + ! work(iu) is lda by n, work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n, work(ir) is n by n + ldwrku = ( lwork-n*n-n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to work(ir) and zero out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n + bdspac) + call stdlib_dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + iu = ie + n + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( ir )& + , ldwrkr, zero,work( iu ), ldwrku ) + call stdlib_dlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) + call stdlib_dgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing a + ! (workspace: need 4*n, prefer 3*n + n*nb) + call stdlib_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,a, lda, dum, 1, & + work( iwork ), info ) + end if + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n and work(ir) is n by n + ldwrku = ( lwork-n*n-n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_dlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt, copying result to work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (workspace: need n*n + 4*n-1, prefer n*n + 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) and computing right + ! singular vectors of r in vt + ! (workspace: need n*n + bdspac) + call stdlib_dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + iu = ie + n + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( ir )& + , ldwrkr, zero,work( iu ), ldwrku ) + call stdlib_dlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_dlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in a by left vectors bidiagonalizing r + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& + work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,a, lda, dum, & + 1, work( iwork ), info ) + end if + else if( wntus ) then + if( wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + ! n left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n + bdspac) + call stdlib_dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,1, work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! (workspace: need n*n) + call stdlib_dgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & + zero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,1, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda + n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*n*n + 4*n, + ! prefer 2*n*n+3*n+2*n*nb) + call stdlib_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need 2*n*n + 4*n-1, + ! prefer 2*n*n+3*n+(n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (workspace: need 2*n*n + bdspac) + call stdlib_dbdsqr( 'U', n, n, n, 0, s, work( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, dum, 1, work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (workspace: need n*n) + call stdlib_dgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & + zero, u, ldu ) + ! copy right singular vectors of r to a + ! (workspace: need n*n) + call stdlib_dlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in a + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,lda, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' + ! or 'a') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need n*n + 4*n-1, + ! prefer n*n+3*n+(n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (workspace: need n*n + bdspac) + call stdlib_dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,ldvt, work( iu ),& + ldwrku, dum, 1,work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (workspace: need n*n) + call stdlib_dgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & + zero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_dlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt & + ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + else if( wntua ) then + if( wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + ! m left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! copy r to work(ir), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) + + ! generate q in u + ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n + bdspac) + call stdlib_dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,1, work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(ir), storing result in a + ! (workspace: need n*n) + call stdlib_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_dlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n + m, prefer n + m*nb) + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,1, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda + n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n*n + n + m, prefer 2*n*n + n + m*nb) + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*n*n + 4*n, + ! prefer 2*n*n+3*n+2*n*nb) + call stdlib_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need 2*n*n + 4*n-1, + ! prefer 2*n*n+3*n+(n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (workspace: need 2*n*n + bdspac) + call stdlib_dbdsqr( 'U', n, n, n, 0, s, work( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, dum, 1, work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (workspace: need n*n) + call stdlib_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_dlacpy( 'F', m, n, a, lda, u, ldu ) + ! copy right singular vectors of r from work(ir) to a + call stdlib_dlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n + m, prefer n + m*nb) + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in a + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,lda, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' + ! or 'a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need n*n + 4*n-1, + ! prefer n*n+3*n+(n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (workspace: need n*n + bdspac) + call stdlib_dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,ldvt, work( iu ),& + ldwrku, dum, 1,work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (workspace: need n*n) + call stdlib_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_dlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n + m, prefer n + m*nb) + call stdlib_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r from a to vt, zeroing out below it + call stdlib_dlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_dlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt & + ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + end if + else + ! m < mnthr + ! path 10 (m at least n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) + call stdlib_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (workspace: need 3*n + ncu, prefer 3*n + ncu*nb) + call stdlib_dlacpy( 'L', m, n, a, lda, u, ldu ) + if( wntus )ncu = n + if( wntua )ncu = m + call stdlib_dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (workspace: need 4*n, prefer 3*n + n*nb) + call stdlib_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + iwork = ie + n + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,ldvt, u, ldu, dum,& + 1, work( iwork ), info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,u, ldu, dum, & + 1, work( iwork ), info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,ldvt, a, lda, dum,& + 1, work( iwork ), info ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + ! no right singular vectors to be computed + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out above l + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuo .or. wntuas ) then + ! if left singular vectors desired, generate q + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + iwork = ie + m + nru = 0 + if( wntuo .or. wntuas )nru = m + ! perform bidiagonal qr iteration, computing left singular + ! vectors of a in a if desired + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,lda, dum, 1, & + work( iwork ), info ) + ! if left singular vectors desired in u, copy them there + if( wntuas )call stdlib_dlacpy( 'F', m, m, a, lda, u, ldu ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! no left singular vectors to be computed + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m-m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to work(ir) and zero out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) + + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l + ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, dum,& + 1, dum, 1,work( iwork ), info ) + iu = ie + m + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (workspace: need m*m + 2*m, prefer m*m + m*n + m) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_dgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1, i & + ), lda, zero,work( iu ), ldwrku ) + call stdlib_dlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) + call stdlib_dgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,dum, 1, dum, 1, & + work( iwork ), info ) + end if + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m-m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing about above it + call stdlib_dlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u, copying result to work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) + ! generate right vectors bidiagonalizing l in work(ir) + ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u, and computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, m, 0, s, work( ie ),work( ir ), ldwrkr, u, & + ldu, dum, 1,work( iwork ), info ) + iu = ie + m + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (workspace: need m*m + 2*m, prefer m*m + m*n + m)) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_dgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1, i & + ), lda, zero,work( iu ), ldwrku ) + call stdlib_dlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ! generate q in a + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in a + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,u, ldu, dum, 1, & + work( iwork ), info ) + end if + else if( wntvs ) then + if( wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + ! m right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(ir), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & + ) + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l in + ! work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, & + dum, 1, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & + zero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy result to vt + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,ldvt, dum, 1, & + dum, 1, work( iwork ),info ) + end if + else if( wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda + m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out below it + call stdlib_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ! generate q in a + ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*m*m + 4*m, + ! prefer 2*m*m+3*m+2*m*nb) + call stdlib_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need 2*m*m + 4*m-1, + ! prefer 2*m*m+3*m+(m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (workspace: need 2*m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, dum, 1, work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & + zero, vt, ldvt ) + ! copy left singular vectors of l to a + ! (workspace: need m*m) + call stdlib_dlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors of l in a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, compute left + ! singular vectors of a in a and compute right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, a, lda, & + dum, 1, work( iwork ),info ) + end if + else if( wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is lda by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need m*m + 4*m-1, + ! prefer m*m+3*m+(m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (workspace: need m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + u, ldu, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & + zero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + else if( wntva ) then + if( wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + ! n right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( n + m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! copy l to work(ir), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & + ) + ! generate q in vt + ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need m*m + 4*m-1, + ! prefer m*m+3*m+(m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, & + dum, 1, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_dlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m + n, prefer m + n*nb) + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,ldvt, dum, 1, & + dum, 1, work( iwork ),info ) + end if + else if( wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( n + m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda + m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m*m + m + n, prefer 2*m*m + m + n*nb) + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*m*m + 4*m, + ! prefer 2*m*m+3*m+2*m*nb) + call stdlib_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need 2*m*m + 4*m-1, + ! prefer 2*m*m+3*m+(m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (workspace: need 2*m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, dum, 1, work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_dlacpy( 'F', m, n, a, lda, vt, ldvt ) + ! copy left singular vectors of a from work(ir) to a + call stdlib_dlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m + n, prefer m + n*nb) + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, a, lda, & + dum, 1, work( iwork ),info ) + end if + else if( wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( n + m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by m + ldwrku = lda + else + ! work(iu) is m by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_dlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) + call stdlib_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (workspace: need m*m + bdspac) + call stdlib_dbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + u, ldu, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_dlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m + n, prefer m + n*nb) + call stdlib_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_dlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + end if + else + ! n < mnthr + ! path 10t(n greater than m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) + call stdlib_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) + call stdlib_dlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (workspace: need 3*m + nrvt, prefer 3*m + nrvt*nb) + call stdlib_dlacpy( 'U', m, n, a, lda, vt, ldvt ) + if( wntva )nrvt = n + if( wntvs )nrvt = m + call stdlib_dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) + call stdlib_dorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + iwork = ie + m + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,ldvt, u, ldu, dum,& + 1, work( iwork ), info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,u, ldu, dum, & + 1, work( iwork ), info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,ldvt, a, lda, dum,& + 1, work( iwork ), info ) + end if + end if + end if + ! if stdlib_dbdsqr failed to converge, copy unconverged superdiagonals + ! to work( 2:minmn ) + if( info/=0 ) then + if( ie>2 ) then + do i = 1, minmn - 1 + work( i+1 ) = work( i+ie-1 ) + end do + end if + if( ie<2 ) then + do i = minmn - 1, 1, -1 + work( i+1 ) = work( i+ie-1 ) + end do + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1, work( 2 ),minmn, ierr ) + if( anrm DGESVDQ: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + subroutine stdlib_dgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) + ! Scalar Arguments + character, intent(in) :: joba, jobp, jobr, jobu, jobv + integer(ilp), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork + integer(ilp), intent(out) :: numrank, info + integer(ilp), intent(inout) :: lwork + ! Array Arguments + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: u(ldu,*), v(ldv,*), work(*) + real(dp), intent(out) :: s(*), rwork(*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: ierr, iwoff, nr, n1, optratio, p, q + integer(ilp) :: lwcon, lwqp3, lwrk_dgelqf, lwrk_dgesvd, lwrk_dgesvd2, lwrk_dgeqp3, & + lwrk_dgeqrf, lwrk_dormlq, lwrk_dormqr, lwrk_dormqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & + lworq, lworq2, lworlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk + logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& + rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr + real(dp) :: big, epsln, rtmp, sconda, sfmin + ! Local Arrays + real(dp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! test the input arguments + wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) + wntur = stdlib_lsame( jobu, 'R' ) + wntua = stdlib_lsame( jobu, 'A' ) + wntuf = stdlib_lsame( jobu, 'F' ) + lsvc0 = wntus .or. wntur .or. wntua + lsvec = lsvc0 .or. wntuf + dntwu = stdlib_lsame( jobu, 'N' ) + wntvr = stdlib_lsame( jobv, 'R' ) + wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) + rsvec = wntvr .or. wntva + dntwv = stdlib_lsame( jobv, 'N' ) + accla = stdlib_lsame( joba, 'A' ) + acclm = stdlib_lsame( joba, 'M' ) + conda = stdlib_lsame( joba, 'E' ) + acclh = stdlib_lsame( joba, 'H' ) .or. conda + rowprm = stdlib_lsame( jobp, 'P' ) + rtrans = stdlib_lsame( jobr, 'T' ) + if ( rowprm ) then + if ( conda ) then + iminwrk = max( 1, n + m - 1 + n ) + else + iminwrk = max( 1, n + m - 1 ) + end if + rminwrk = max( 2, m ) + else + if ( conda ) then + iminwrk = max( 1, n + n ) + else + iminwrk = max( 1, n ) + end if + rminwrk = 2 + end if + lquery = (liwork == -1 .or. lwork == -1 .or. lrwork == -1) + info = 0 + if ( .not. ( accla .or. acclm .or. acclh ) ) then + info = -1 + else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = -2 + else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then + info = -3 + else if ( .not.( lsvec .or. dntwu ) ) then + info = -4 + else if ( wntur .and. wntva ) then + info = -5 + else if ( .not.( rsvec .or. dntwv )) then + info = -5 + else if ( m<0 ) then + info = -6 + else if ( ( n<0 ) .or. ( n>m ) ) then + info = -7 + else if ( lda big / sqrt(real(m,KIND=dp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_dlascl('G',0,0,sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + call stdlib_dlaswp( n, a, lda, 1, m-1, iwork(n+1), 1 ) + end if + ! .. at this stage, preemptive scaling is done only to avoid column + ! norms overflows during the qr factorization. the svd procedure should + ! have its own scaling to save the singular values from overflows and + ! underflows. that depends on the svd procedure. + if ( .not.rowprm ) then + rtmp = stdlib_dlange( 'M', m, n, a, lda, rdummy ) + if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then + info = -8 + call stdlib_xerbla( 'DGESVDQ', -info ) + return + end if + if ( rtmp > big / sqrt(real(m,KIND=dp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_dlascl('G',0,0, sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + end if + ! Qr Factorization With Column Pivoting + ! a * p = q * [ r ] + ! [ 0 ] + do p = 1, n + ! All Columns Are Free Columns + iwork(p) = 0 + end do + call stdlib_dgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,ierr ) + ! if the user requested accuracy level allows truncation in the + ! computed upper triangular factor, the matrix r is examined and, + ! if possible, replaced with its leading upper trapezoidal part. + epsln = stdlib_dlamch('E') + sfmin = stdlib_dlamch('S') + ! small = sfmin / epsln + nr = n + if ( accla ) then + ! standard absolute error bound suffices. all sigma_i with + ! sigma_i < n*eps*||a||_f are flushed to zero. this is an + ! aggressive enforcement of lower numerical rank by introducing a + ! backward error of the order of n*eps*||a||_f. + nr = 1 + rtmp = sqrt(real(n,KIND=dp))*epsln + do p = 2, n + if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 + nr = nr + 1 + end do + 3002 continue + elseif ( acclm ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r is used as the criterion for being + ! close-to-rank-deficient. the threshold is set to epsln=stdlib_dlamch('e'). + ! [[this can be made more flexible by replacing this hard-coded value + ! with a user specified threshold.]] also, the values that underflow + ! will be truncated. + nr = 1 + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go & + to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! Rrqr Not Authorized To Determine Numerical Rank Except In The + ! obvious case of zero pivots. + ! .. inspect r for exact zeros on the diagonal; + ! r(i,i)=0 => r(i:n,i:n)=0. + nr = 1 + do p = 2, n + if ( abs(a(p,p)) == zero ) go to 3502 + nr = nr + 1 + end do + 3502 continue + if ( conda ) then + ! estimate the scaled condition number of a. use the fact that it is + ! the same as the scaled condition number of r. + ! V Is Used As Workspace + call stdlib_dlacpy( 'U', n, n, a, lda, v, ldv ) + ! only the leading nr x nr submatrix of the triangular factor + ! is considered. only if nr=n will this give a reliable error + ! bound. however, even for nr < n, this can be used on an + ! expert level and obtain useful information in the sense of + ! perturbation theory. + do p = 1, nr + rtmp = stdlib_dnrm2( p, v(1,p), 1 ) + call stdlib_dscal( p, one/rtmp, v(1,p), 1 ) + end do + if ( .not. ( lsvec .or. rsvec ) ) then + call stdlib_dpocon( 'U', nr, v, ldv, one, rtmp,work, iwork(n+iwoff), ierr & + ) + else + call stdlib_dpocon( 'U', nr, v, ldv, one, rtmp,work(n+1), iwork(n+iwoff), & + ierr ) + end if + sconda = one / sqrt(rtmp) + ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + ! see the reference [1] for more details. + end if + endif + if ( wntur ) then + n1 = nr + else if ( wntus .or. wntuf) then + n1 = n + else if ( wntua ) then + n1 = m + end if + if ( .not. ( rsvec .or. lsvec ) ) then + ! ....................................................................... + ! Only The Singular Values Are Requested + ! ....................................................................... + if ( rtrans ) then + ! .. compute the singular values of r**t = [a](1:nr,1:n)**t + ! .. set the lower triangle of [a] to [a](1:nr,1:n)**t and + ! the upper triangle of [a] to zero. + do p = 1, min( n, nr ) + do q = p + 1, n + a(q,p) = a(p,q) + if ( q <= nr ) a(p,q) = zero + end do + end do + call stdlib_dgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, work, lwork, info & + ) + else + ! .. compute the singular values of r = [a](1:nr,1:n) + if ( nr > 1 )call stdlib_dlaset( 'L', nr-1,nr-1, zero,zero, a(2,1), lda ) + call stdlib_dgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, work, lwork, info & + ) + end if + else if ( lsvec .and. ( .not. rsvec) ) then + ! ....................................................................... + ! The Singular Values And The Left Singular Vectors Requested + ! ......................................................................."""""""" + if ( rtrans ) then + ! .. apply stdlib_dgesvd to r**t + ! .. copy r**t into [u] and overwrite [u] with the right singular + ! vectors of r + do p = 1, nr + do q = p, n + u(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_dlaset( 'U', nr-1,nr-1, zero,zero, u(1,2), ldu ) + ! .. the left singular vectors not computed, the nr right singular + ! vectors overwrite [u](1:nr,1:nr) as transposed. these + ! will be pre-multiplied by q to build the left singular vectors of a. + call stdlib_dgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, nr + do q = p + 1, nr + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + else + ! Apply Stdlib_Dgesvd To R + ! .. copy r into [u] and overwrite [u] with the left singular vectors + call stdlib_dlacpy( 'U', nr, n, a, lda, u, ldu ) + if ( nr > 1 )call stdlib_dlaset( 'L', nr-1, nr-1, zero, zero, u(2,1), ldu ) + + ! .. the right singular vectors not computed, the nr left singular + ! vectors overwrite [u](1:nr,1:nr) + call stdlib_dgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of + ! r. these will be pre-multiplied by q to build the left singular + ! vectors of a. + end if + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. ( .not.wntuf ) ) then + call stdlib_dlaset('A', m-nr, nr, zero, zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_dlaset( 'A',nr,n1-nr,zero,zero,u(1,nr+1), ldu ) + call stdlib_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not.wntuf )call stdlib_dormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& + n+1), lwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_dlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! ....................................................................... + ! The Singular Values And The Right Singular Vectors Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_dgesvd to r**t + ! .. copy r**t into v and overwrite v with the left singular vectors + do p = 1, nr + do q = p, n + v(q,p) = (a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_dlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + ! .. the left singular vectors of r**t overwrite v, the right singular + ! vectors not computed + if ( wntvr .or. ( nr == n ) ) then + call stdlib_dgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, nr + do q = p + 1, nr + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr + 1, n + v(p,q) = v(q,p) + end do + end do + end if + call stdlib_dlapmt( .false., nr, n, v, ldv, iwork ) + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:n,1:nr) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the qr factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_dlaset('G', n, n-nr, zero, zero, v(1,nr+1), ldv) + call stdlib_dgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, n + do q = p + 1, n + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + call stdlib_dlapmt( .false., n, n, v, ldv, iwork ) + end if + else + ! Aply Stdlib_Dgesvd To R + ! Copy R Into V And Overwrite V With The Right Singular Vectors + call stdlib_dlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_dlaset( 'L', nr-1, nr-1, zero, zero, v(2,1), ldv ) + + ! .. the right singular vectors overwrite v, the nr left singular + ! vectors stored in u(1:nr,1:nr) + if ( wntvr .or. ( nr == n ) ) then + call stdlib_dgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_dlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:nr,1:n) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the lq factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_dlaset('G', n-nr, n, zero,zero, v(nr+1,1), ldv) + call stdlib_dgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_dlapmt( .false., n, n, v, ldv, iwork ) + end if + ! .. now [v] contains the transposed matrix of the right singular + ! vectors of a. + end if + else + ! ....................................................................... + ! Full Svd Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_dgesvd to r**t [[this option is left for r + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r**t into [v] and overwrite [v] with the left singular + ! vectors of r**t + do p = 1, nr + do q = p, n + v(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_dlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + ! .. the left singular vectors of r**t overwrite [v], the nr right + ! singular vectors of r**t stored in [u](1:nr,1:nr) as transposed + call stdlib_dgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, work(n+1), & + lwork-n, info ) + ! Assemble V + do p = 1, nr + do q = p + 1, nr + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr+1, n + v(p,q) = v(q,p) + end do + end do + end if + call stdlib_dlapmt( .false., nr, n, v, ldv, iwork ) + do p = 1, nr + do q = p + 1, nr + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_dlaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! .. copy r**t into [v] and overwrite [v] with the left singular + ! vectors of r**t + ! [[the optimal ratio n/nr for using qrf instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio*nr > n ) then + do p = 1, nr + do q = p, n + v(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_dlaset('U',nr-1,nr-1, zero,zero, v(1,2),ldv) + + call stdlib_dlaset('A',n,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_dgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, n + do q = p + 1, n + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + call stdlib_dlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + do p = 1, n + do q = p + 1, n + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_dlaset('A',m-n,n,zero,zero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_dlaset('A',n,n1-n,zero,zero,u(1,n+1),ldu) + call stdlib_dlaset('A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) + end if + end if + else + ! .. copy r**t into [u] and overwrite [u] with the right + ! singular vectors of r + do p = 1, nr + do q = p, n + u(q,nr+p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_dlaset('U',nr-1,nr-1,zero,zero,u(1,nr+2),ldu) + + call stdlib_dgeqrf( n, nr, u(1,nr+1), ldu, work(n+1),work(n+nr+1), lwork-& + n-nr, ierr ) + do p = 1, nr + do q = 1, n + v(q,p) = u(p,nr+q) + end do + end do + call stdlib_dlaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv) + call stdlib_dgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, work(n+nr+1)& + ,lwork-n-nr, info ) + call stdlib_dlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_dlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + call stdlib_dormqr('R','C', n, n, nr, u(1,nr+1), ldu,work(n+1),v,ldv,work(& + n+nr+1),lwork-n-nr,ierr) + call stdlib_dlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_dlaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) + end if + end if + end if + end if + else + ! .. apply stdlib_dgesvd to r [[this is the recommended option]] + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r into [v] and overwrite v with the right singular vectors + call stdlib_dlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_dlaset( 'L', nr-1,nr-1, zero,zero, v(2,1), ldv ) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_dgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_dlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_dlaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! The Requested Number Of The Left Singular Vectors + ! is then n1 (n or m) + ! [[the optimal ratio n/nr for using lq instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio * nr > n ) then + call stdlib_dlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_dlaset('L', nr-1,nr-1, zero,zero, v(2,1),ldv) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_dlaset('A', n-nr,n, zero,zero, v(nr+1,1),ldv) + call stdlib_dgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_dlapmt( .false., n, n, v, ldv, iwork ) + ! .. now [v] contains the transposed matrix of the right + ! singular vectors of a. the leading n left singular vectors + ! are in [u](1:n,1:n) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_dlaset('A',m-n,n,zero,zero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_dlaset('A',n,n1-n,zero,zero,u(1,n+1),ldu) + call stdlib_dlaset( 'A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) + end if + end if + else + call stdlib_dlacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu ) + if ( nr > 1 )call stdlib_dlaset('L',nr-1,nr-1,zero,zero,u(nr+2,1),ldu) + + call stdlib_dgelqf( nr, n, u(nr+1,1), ldu, work(n+1),work(n+nr+1), lwork-n-& + nr, ierr ) + call stdlib_dlacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv) + if ( nr > 1 )call stdlib_dlaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv) + call stdlib_dgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, work(n+nr+& + 1), lwork-n-nr, info ) + call stdlib_dlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_dlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + call stdlib_dormlq('R','N',n,n,nr,u(nr+1,1),ldu,work(n+1),v, ldv, work(n+& + nr+1),lwork-n-nr,ierr) + call stdlib_dlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_dlaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + end if + end if + ! .. end of the "r**t or r" branch + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not. wntuf )call stdlib_dormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& + n+1), lwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_dlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + ! ... end of the "full svd" branch + end if + ! check whether some singular values are returned as zeros, e.g. + ! due to underflow, and update the numerical rank. + p = nr + do q = p, 1, -1 + if ( s(q) > zero ) go to 4002 + nr = nr - 1 + end do + 4002 continue + ! .. if numerical rank deficiency is detected, the truncated + ! singular values are set to zero. + if ( nr < n ) call stdlib_dlaset( 'G', n-nr,1, zero,zero, s(nr+1), n ) + ! .. undo scaling; this may cause overflow in the largest singular + ! values. + if ( ascaled )call stdlib_dlascl( 'G',0,0, one,sqrt(real(m,KIND=dp)), nr,1, s, n, ierr & + ) + if ( conda ) rwork(1) = sconda + rwork(2) = p - nr + ! .. p-nr is the number of singular values that are computed as + ! exact zeros in stdlib_dgesvd() applied to the (possibly truncated) + ! full row rank triangular (trapezoidal) factor of a. + numrank = nr + return + end subroutine stdlib_dgesvdq + + !> DGGES3: computes for a pair of N-by-N real nonsymmetric matrices (A,B), + !> the generalized eigenvalues, the generalized real Schur form (S,T), + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T.The + !> leading columns of VSL and VSR then form an orthonormal basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> DGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_dgges3( jobvsl, jobvsr, sort, selctg, n, a, lda, b,ldb, sdim, alphar, & + alphai, beta, vsl, ldvsl,vsr, ldvsr, work, lwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), & + work(*) + ! Function Arguments + procedure(stdlib_selctg_d) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, & + wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, ip, iright, irows, & + itau, iwrk, lwkopt + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, safmax, safmin, & + smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_dlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_dggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_dgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_dormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + if( ilvsl ) then + call stdlib_dlaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_dlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_dorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_dlaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + call stdlib_dgghd3( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + work( iwrk ), lwork+1-iwrk,ierr ) + ! perform qz algorithm, computing schur vectors if desired + iwrk = itau + call stdlib_dlaqz0( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 50 + end if + ! sort eigenvalues alpha/beta if desired + sdim = 0 + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + call stdlib_dtgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alphar,alphai, beta, & + vsl, ldvsl, vsr, ldvsr, sdim, pvsl,pvsr, dif, work( iwrk ), lwork-iwrk+1, idum, 1,& + ierr ) + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + if( ilvsl )call stdlib_dggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_dggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) ) then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_dlascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_dlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 50 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_dgges3 + + !> DGGEV3: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B . + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_dggev3( jobvl, jobvr, n, a, lda, b, ldb, alphar,alphai, beta, vl, ldvl, vr,& + ldvr, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: alphai(*), alphar(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, itau, & + iwrk, jc, jr, lwkopt + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_dlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_dlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_dlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_dggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = iwrk + iwrk = itau + irows + call stdlib_dgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_dormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + if( ilvl ) then + call stdlib_dlaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_dlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_dorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_dlaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_dgghd3( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + work( iwrk ), lwork+1-iwrk, ierr ) + else + call stdlib_dgghd3( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_dlaqz0( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 110 + end if + ! compute eigenvectors + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_dtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 110 + end if + ! undo balancing on vl and vr and normalization + if( ilvl ) then + call stdlib_dggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, vl, & + ldvl, ierr ) + loop_50: do jc = 1, n + if( alphai( jc ) DHSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_dhseqr( job, compz, n, ilo, ihi, h, ldh, wr, wi, z,ldz, work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + character, intent(in) :: compz, job + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), z(ldz,*) + real(dp), intent(out) :: wi(*), work(*), wr(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: nl = 49 + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_dlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== nl allocates some local workspace to help small matrices + ! . through a rare stdlib_dlahqr failure. nl > ntiny = 15 is + ! . required and nl <= nmin = stdlib_ilaenv(ispec=12,...) is recom- + ! . mended. (the default value of nmin is 75.) using nl = 49 + ! . allows up to six simultaneous shifts and a 16-by-16 + ! . deflation window. ==== + + + ! Local Arrays + real(dp) :: hl(nl,nl), workl(nl) + ! Local Scalars + integer(ilp) :: i, kbot, nmin + logical(lk) :: initz, lquery, wantt, wantz + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! ==== decode and check the input parameters. ==== + wantt = stdlib_lsame( job, 'S' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + work( 1 ) = real( max( 1, n ),KIND=dp) + lquery = lwork==-1 + info = 0 + if( .not.stdlib_lsame( job, 'E' ) .and. .not.wantt ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( ldhnmin ) then + call stdlib_dlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo,ihi, z, ldz, & + work, lwork, info ) + else + ! ==== small matrix ==== + call stdlib_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo,ihi, z, ldz, & + info ) + if( info>0 ) then + ! ==== a rare stdlib_dlahqr failure! stdlib_dlaqr0 sometimes succeeds + ! . when stdlib_dlahqr fails. ==== + kbot = info + if( n>=nl ) then + ! ==== larger matrices have enough subdiagonal scratch + ! . space to call stdlib_dlaqr0 directly. ==== + call stdlib_dlaqr0( wantt, wantz, n, ilo, kbot, h, ldh, wr,wi, ilo, ihi, z,& + ldz, work, lwork, info ) + else + ! ==== tiny matrices don't have enough subdiagonal + ! . scratch space to benefit from stdlib_dlaqr0. hence, + ! . tiny matrices must be copied into a larger + ! . array before calling stdlib_dlaqr0. ==== + call stdlib_dlacpy( 'A', n, n, h, ldh, hl, nl ) + hl( n+1, n ) = zero + call stdlib_dlaset( 'A', nl, nl-n, zero, zero, hl( 1, n+1 ),nl ) + call stdlib_dlaqr0( wantt, wantz, nl, ilo, kbot, hl, nl, wr,wi, ilo, ihi, & + z, ldz, workl, nl, info ) + if( wantt .or. info/=0 )call stdlib_dlacpy( 'A', n, n, hl, nl, h, ldh ) + + end if + end if + end if + ! ==== clear out the trash, if necessary. ==== + if( ( wantt .or. info/=0 ) .and. n>2 )call stdlib_dlaset( 'L', n-2, n-2, zero, zero,& + h( 3, 1 ), ldh ) + ! ==== ensure reported workspace size is backward-compatible with + ! . previous lapack versions. ==== + work( 1 ) = max( real( max( 1, n ),KIND=dp), work( 1 ) ) + end if + end subroutine stdlib_dhseqr + + !> DLALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, DLALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by DLALSA. + + pure subroutine stdlib_dlalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, difl,& + difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, work,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldb, ldbx, ldgcol, ldu, n, nrhs, smlsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), givptr(*), k(*), perm(ldgcol,*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: b(ldb,*) + real(dp), intent(out) :: bx(ldbx,*), work(*) + real(dp), intent(in) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), s(& + *), u(ldu,*), vt(ldu,*), z(ldu,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, im1, inode, j, lf, ll, lvl, lvl2, nd, ndb1, ndiml, ndimr, & + nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqre + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n DLALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_dlalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: ldb, n, nrhs, smlsiz + real(dp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: b(ldb,*), d(*), e(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: bx, bxst, c, difl, difr, givcol, givnum, givptr, i, icmpq1, icmpq2, & + iwk, j, k, nlvl, nm1, nsize, nsub, nwork, perm, poles, s, sizei, smlszp, sqre, st, st1,& + u, vt, z + real(dp) :: cs, eps, orgnrm, r, rcnd, sn, tol + ! Intrinsic Functions + intrinsic :: abs,real,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -3 + else if( nrhs<1 ) then + info = -4 + else if( ( ldb<1 ) .or. ( ldb=one ) ) then + rcnd = eps + else + rcnd = rcond + end if + rank = 0 + ! quick return if possible. + if( n==0 ) then + return + else if( n==1 ) then + if( d( 1 )==zero ) then + call stdlib_dlaset( 'A', 1, nrhs, zero, zero, b, ldb ) + else + rank = 1 + call stdlib_dlascl( 'G', 0, 0, d( 1 ), one, 1, nrhs, b, ldb, info ) + d( 1 ) = abs( d( 1 ) ) + end if + return + end if + ! rotate the matrix if it is lower bidiagonal. + if( uplo=='L' ) then + do i = 1, n - 1 + call stdlib_dlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( nrhs==1 ) then + call stdlib_drot( 1, b( i, 1 ), 1, b( i+1, 1 ), 1, cs, sn ) + else + work( i*2-1 ) = cs + work( i*2 ) = sn + end if + end do + if( nrhs>1 ) then + do i = 1, nrhs + do j = 1, n - 1 + cs = work( j*2-1 ) + sn = work( j*2 ) + call stdlib_drot( 1, b( j, i ), 1, b( j+1, i ), 1, cs, sn ) + end do + end do + end if + end if + ! scale. + nm1 = n - 1 + orgnrm = stdlib_dlanst( 'M', n, d, e ) + if( orgnrm==zero ) then + call stdlib_dlaset( 'A', n, nrhs, zero, zero, b, ldb ) + return + end if + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, nm1, 1, e, nm1, info ) + ! if n is smaller than the minimum divide size smlsiz, then solve + ! the problem with another solver. + if( n<=smlsiz ) then + nwork = 1 + n*n + call stdlib_dlaset( 'A', n, n, zero, one, work, n ) + call stdlib_dlasdq( 'U', 0, n, n, 0, nrhs, d, e, work, n, work, n, b,ldb, work( & + nwork ), info ) + if( info/=0 ) then + return + end if + tol = rcnd*abs( d( stdlib_idamax( n, d, 1 ) ) ) + do i = 1, n + if( d( i )<=tol ) then + call stdlib_dlaset( 'A', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + else + call stdlib_dlascl( 'G', 0, 0, d( i ), one, 1, nrhs, b( i, 1 ),ldb, info ) + + rank = rank + 1 + end if + end do + call stdlib_dgemm( 'T', 'N', n, nrhs, n, one, work, n, b, ldb, zero,work( nwork ), & + n ) + call stdlib_dlacpy( 'A', n, nrhs, work( nwork ), n, b, ldb ) + ! unscale. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_dlasrt( 'D', n, d, info ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end if + ! book-keeping and setting up some constants. + nlvl = int( log( real( n,KIND=dp) / real( smlsiz+1,KIND=dp) ) / log( two ),KIND=ilp) + & + 1 + smlszp = smlsiz + 1 + u = 1 + vt = 1 + smlsiz*n + difl = vt + smlszp*n + difr = difl + nlvl*n + z = difr + nlvl*n*2 + c = z + nlvl*n + s = c + n + poles = s + n + givnum = poles + 2*nlvl*n + bx = givnum + 2*nlvl*n + nwork = bx + n*nrhs + sizei = 1 + n + k = sizei + n + givptr = k + n + perm = givptr + n + givcol = perm + nlvl*n + iwk = givcol + nlvl*n*2 + st = 1 + sqre = 0 + icmpq1 = 1 + icmpq2 = 0 + nsub = 0 + do i = 1, n + if( abs( d( i ) )=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - st + 1 + iwork( sizei+nsub-1 ) = nsize + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n), which is not solved + ! explicitly. + nsize = i - st + 1 + iwork( sizei+nsub-1 ) = nsize + nsub = nsub + 1 + iwork( nsub ) = n + iwork( sizei+nsub-1 ) = 1 + call stdlib_dcopy( nrhs, b( n, 1 ), ldb, work( bx+nm1 ), n ) + end if + st1 = st - 1 + if( nsize==1 ) then + ! this is a 1-by-1 subproblem and is not solved + ! explicitly. + call stdlib_dcopy( nrhs, b( st, 1 ), ldb, work( bx+st1 ), n ) + else if( nsize<=smlsiz ) then + ! this is a small subproblem and is solved by stdlib_dlasdq. + call stdlib_dlaset( 'A', nsize, nsize, zero, one,work( vt+st1 ), n ) + call stdlib_dlasdq( 'U', 0, nsize, nsize, 0, nrhs, d( st ),e( st ), work( vt+& + st1 ), n, work( nwork ),n, b( st, 1 ), ldb, work( nwork ), info ) + if( info/=0 ) then + return + end if + call stdlib_dlacpy( 'A', nsize, nrhs, b( st, 1 ), ldb,work( bx+st1 ), n ) + + else + ! a large problem. solve it using divide and conquer. + call stdlib_dlasda( icmpq1, smlsiz, nsize, sqre, d( st ),e( st ), work( u+st1 & + ), n, work( vt+st1 ),iwork( k+st1 ), work( difl+st1 ),work( difr+st1 ), work( & + z+st1 ),work( poles+st1 ), iwork( givptr+st1 ),iwork( givcol+st1 ), n, iwork( & + perm+st1 ),work( givnum+st1 ), work( c+st1 ),work( s+st1 ), work( nwork ), & + iwork( iwk ),info ) + if( info/=0 ) then + return + end if + bxst = bx + st1 + call stdlib_dlalsa( icmpq2, smlsiz, nsize, nrhs, b( st, 1 ),ldb, work( bxst ),& + n, work( u+st1 ), n,work( vt+st1 ), iwork( k+st1 ),work( difl+st1 ), work( & + difr+st1 ),work( z+st1 ), work( poles+st1 ),iwork( givptr+st1 ), iwork( & + givcol+st1 ), n,iwork( perm+st1 ), work( givnum+st1 ),work( c+st1 ), work( s+& + st1 ), work( nwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + st = i + 1 + end if + end do loop_60 + ! apply the singular values and treat the tiny ones as zero. + tol = rcnd*abs( d( stdlib_idamax( n, d, 1 ) ) ) + do i = 1, n + ! some of the elements in d can be negative because 1-by-1 + ! subproblems were not solved explicitly. + if( abs( d( i ) )<=tol ) then + call stdlib_dlaset( 'A', 1, nrhs, zero, zero, work( bx+i-1 ), n ) + else + rank = rank + 1 + call stdlib_dlascl( 'G', 0, 0, d( i ), one, 1, nrhs,work( bx+i-1 ), n, info ) + + end if + d( i ) = abs( d( i ) ) + end do + ! now apply back the right singular vectors. + icmpq2 = 1 + do i = 1, nsub + st = iwork( i ) + st1 = st - 1 + nsize = iwork( sizei+i-1 ) + bxst = bx + st1 + if( nsize==1 ) then + call stdlib_dcopy( nrhs, work( bxst ), n, b( st, 1 ), ldb ) + else if( nsize<=smlsiz ) then + call stdlib_dgemm( 'T', 'N', nsize, nrhs, nsize, one,work( vt+st1 ), n, work( & + bxst ), n, zero,b( st, 1 ), ldb ) + else + call stdlib_dlalsa( icmpq2, smlsiz, nsize, nrhs, work( bxst ), n,b( st, 1 ), ldb,& + work( u+st1 ), n,work( vt+st1 ), iwork( k+st1 ),work( difl+st1 ), work( difr+& + st1 ),work( z+st1 ), work( poles+st1 ),iwork( givptr+st1 ), iwork( givcol+st1 ),& + n,iwork( perm+st1 ), work( givnum+st1 ),work( c+st1 ), work( s+st1 ), work( & + nwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + end do + ! unscale and sort the singular values. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_dlasrt( 'D', n, d, info ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end subroutine stdlib_dlalsd + + !> DLAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_dlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), z(ldz,*) + real(dp), intent(out) :: wi(*), work(*), wr(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(dp), parameter :: wilk1 = 0.75_dp + real(dp), parameter :: wilk2 = -0.4375_dp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_dlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constants wilk1 and wilk2 are used to form the + ! . exceptional shifts. ==== + + + ! Local Scalars + real(dp) :: aa, bb, cc, cs, dd, sn, ss, swap + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + real(dp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,mod + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = one + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_dlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & + ihiz, z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_dlaqr3 ==== + call stdlib_dlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_dlaqr5, stdlib_dlaqr3) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=dp) + return + end if + ! ==== stdlib_dlahqr/stdlib_dlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_80: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_dlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, wr, wi, h( kv, 1 ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh,& + work, lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_dlaqr3 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_dlaqr3 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, max( ks+1, ktop+2 ), -2 + ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + aa = wilk1*ss + h( i, i ) + bb = ss + cc = wilk2*ss + dd = aa + call stdlib_dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & + ), cs, sn ) + end do + if( ks==ktop ) then + wr( ks+1 ) = h( ks+1, ks+1 ) + wi( ks+1 ) = zero + wr( ks ) = wr( ks+1 ) + wi( ks ) = wi( ks+1 ) + end if + else + ! ==== got ns/2 or fewer shifts? use stdlib_dlaqr4 or + ! . stdlib_dlahqr on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_dlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + if( ns>nmin ) then + call stdlib_dlaqr4( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( & + ks ),wi( ks ), 1, 1, zdum, 1, work,lwork, inf ) + else + call stdlib_dlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( & + ks ),wi( ks ), 1, 1, zdum, 1, inf ) + end if + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. ==== + if( ks>=kbot ) then + aa = h( kbot-1, kbot-1 ) + cc = h( kbot, kbot-1 ) + bb = h( kbot-1, kbot ) + dd = h( kbot, kbot ) + call stdlib_dlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & + kbot ),wi( kbot ), cs, sn ) + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) + ! . bubble sort keeps complex conjugate + ! . pairs together. ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( abs( wr( i ) )+abs( wi( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_80 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 90 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = real( lwkopt,KIND=dp) + end subroutine stdlib_dlaqr0 + + !> Aggressive early deflation: + !> DLAQR3: accepts as input an upper Hessenberg matrix + !> H and performs an orthogonal similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an orthogonal similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + subroutine stdlib_dlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& + sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), z(ldz,*) + real(dp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(dp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & + tau, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & + ltop, lwk1, lwk2, lwk3, lwkopt, nmin + logical(lk) :: bulge, sorted + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,sqrt + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_dgehrd ==== + call stdlib_dgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_dormhr ==== + call stdlib_dormhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_dlaqr4 ==== + call stdlib_dlaqr4( .true., .true., jw, 1, jw, t, ldt, sr, si, 1, jw,v, ldv, work, -& + 1, infqr ) + lwk3 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=dp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = one + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = zero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sr( kwtop ) = h( kwtop, kwtop ) + si( kwtop ) = zero + ns = 1 + nd = 0 + if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero + end if + work( 1 ) = one + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_dlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_dcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_dlaset( 'A', jw, jw, zero, one, v, ldv ) + nmin = stdlib_ilaenv( 12, 'DLAQR3', 'SV', jw, 1, jw, lwork ) + if( jw>nmin ) then + call stdlib_dlaqr4( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, & + jw, v, ldv, work, lwork, infqr ) + else + call stdlib_dlahqr( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, & + jw, v, ldv, infqr ) + end if + ! ==== stdlib_dtrexc needs a clean margin near the diagonal ==== + do j = 1, jw - 3 + t( j+2, j ) = zero + t( j+3, j ) = zero + end do + if( jw>2 )t( jw, jw-2 ) = zero + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + 20 continue + if( ilst<=ns ) then + if( ns==1 ) then + bulge = .false. + else + bulge = t( ns, ns-1 )/=zero + end if + ! ==== small spike tip test for deflation ==== + if( .not. bulge ) then + ! ==== real eigenvalue ==== + foo = abs( t( ns, ns ) ) + if( foo==zero )foo = abs( s ) + if( abs( s*v( 1, ns ) )<=max( smlnum, ulp*foo ) ) then + ! ==== deflatable ==== + ns = ns - 1 + else + ! ==== undeflatable. move it up out of the way. + ! . (stdlib_dtrexc can not fail in this case.) ==== + ifst = ns + call stdlib_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 1 + end if + else + ! ==== complex conjugate pair ==== + foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & + ) ) + if( foo==zero )foo = abs( s ) + if( max( abs( s*v( 1, ns ) ), abs( s*v( 1, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & + then + ! ==== deflatable ==== + ns = ns - 2 + else + ! ==== undeflatable. move them up out of the way. + ! . fortunately, stdlib_dtrexc does the right thing with + ! . ilst in case of a rare exchange failure. ==== + ifst = ns + call stdlib_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 2 + end if + end if + ! ==== end deflation detection loop ==== + go to 20 + end if + ! ==== return to hessenberg form ==== + if( ns==0 )s = zero + if( ns=evk ) then + i = k + else + sorted = .false. + ifst = i + ilst = k + call stdlib_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + if( info==0 ) then + i = ilst + else + i = k + end if + end if + if( i==kend ) then + k = i + 1 + else if( t( i+1, i )==zero ) then + k = i + 1 + else + k = i + 2 + end if + go to 40 + end if + go to 30 + 50 continue + end if + ! ==== restore shift/eigenvalue array from t ==== + i = jw + 60 continue + if( i>=infqr+1 ) then + if( i==infqr+1 ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else if( t( i, i-1 )==zero ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else + aa = t( i-1, i-1 ) + cc = t( i, i-1 ) + bb = t( i-1, i ) + dd = t( i, i ) + call stdlib_dlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& + 1 ),si( kwtop+i-1 ), cs, sn ) + i = i - 2 + end if + go to 60 + end if + if( ns1 .and. s/=zero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_dcopy( ns, v, ldv, work, 1 ) + beta = work( 1 ) + call stdlib_dlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = one + call stdlib_dlaset( 'L', jw-2, jw-2, zero, zero, t( 3, 1 ), ldt ) + call stdlib_dlarf( 'L', ns, jw, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_dlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_dlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_dgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*v( 1, 1 ) + call stdlib_dlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_dcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=zero )call stdlib_dormhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_dgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & + zero, wv, ldwv ) + call stdlib_dlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_dgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & + zero, t, ldt ) + call stdlib_dlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_dgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & + zero, wv, ldwv ) + call stdlib_dlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = real( lwkopt,KIND=dp) + end subroutine stdlib_dlaqr3 + + !> DLAQR4: implements one level of recursion for DLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by DLAQR0 and, for large enough + !> deflation window size, it may be called by DLAQR3. This + !> subroutine is identical to DLAQR0 except that it calls DLAQR2 + !> instead of DLAQR3. + !> DLAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_dlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(dp), intent(inout) :: h(ldh,*), z(ldz,*) + real(dp), intent(out) :: wi(*), work(*), wr(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(dp), parameter :: wilk1 = 0.75_dp + real(dp), parameter :: wilk2 = -0.4375_dp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_dlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constants wilk1 and wilk2 are used to form the + ! . exceptional shifts. ==== + + + ! Local Scalars + real(dp) :: aa, bb, cc, cs, dd, sn, ss, swap + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + real(dp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,mod + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = one + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_dlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & + ihiz, z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_dlaqr2 ==== + call stdlib_dlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_dlaqr5, stdlib_dlaqr2) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=dp) + return + end if + ! ==== stdlib_dlahqr/stdlib_dlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_80: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_dlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, wr, wi, h( kv, 1 ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh,& + work, lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_dlaqr2 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_dlaqr2 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, max( ks+1, ktop+2 ), -2 + ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + aa = wilk1*ss + h( i, i ) + bb = ss + cc = wilk2*ss + dd = aa + call stdlib_dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & + ), cs, sn ) + end do + if( ks==ktop ) then + wr( ks+1 ) = h( ks+1, ks+1 ) + wi( ks+1 ) = zero + wr( ks ) = wr( ks+1 ) + wi( ks ) = wi( ks+1 ) + end if + else + ! ==== got ns/2 or fewer shifts? use stdlib_dlahqr + ! . on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_dlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + call stdlib_dlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( ks & + ), wi( ks ),1, 1, zdum, 1, inf ) + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. ==== + if( ks>=kbot ) then + aa = h( kbot-1, kbot-1 ) + cc = h( kbot, kbot-1 ) + bb = h( kbot-1, kbot ) + dd = h( kbot, kbot ) + call stdlib_dlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & + kbot ),wi( kbot ), cs, sn ) + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) + ! . bubble sort keeps complex conjugate + ! . pairs together. ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( abs( wr( i ) )+abs( wi( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_80 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 90 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = real( lwkopt,KIND=dp) + end subroutine stdlib_dlaqr4 + + !> DLAQZ0: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**T, B = Q1*T*Z1**T, + !> as computed by DGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**T, T = Q*P*Z**T, + !> where Q and Z are orthogonal matrices, P is an upper triangular + !> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 + !> diagonal blocks. + !> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair + !> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of + !> eigenvalues. + !> Additionally, the 2-by-2 upper triangular diagonal blocks of P + !> corresponding to 2-by-2 blocks of S are reduced to positive diagonal + !> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, + !> P(j,j) > 0, and P(j+1,j+1) > 0. + !> Optionally, the orthogonal matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> orthogonal matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the orthogonal factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Real eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + + recursive subroutine stdlib_dlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, & + alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) + ! arguments + character, intent( in ) :: wants, wantq, wantz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec + integer(ilp), intent( out ) :: info + real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& + * ),alphai( * ), beta( * ), work( * ) + + ! local scalars + real(dp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap + integer(ilp) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& + nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & + istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & + rcost, i + logical(lk) :: ilschur, ilq, ilz + character :: jbcmpz*3 + if( stdlib_lsame( wants, 'E' ) ) then + ilschur = .false. + iwants = 1 + else if( stdlib_lsame( wants, 'S' ) ) then + ilschur = .true. + iwants = 2 + else + iwants = 0 + end if + if( stdlib_lsame( wantq, 'N' ) ) then + ilq = .false. + iwantq = 1 + else if( stdlib_lsame( wantq, 'V' ) ) then + ilq = .true. + iwantq = 2 + else if( stdlib_lsame( wantq, 'I' ) ) then + ilq = .true. + iwantq = 3 + else + iwantq = 0 + end if + if( stdlib_lsame( wantz, 'N' ) ) then + ilz = .false. + iwantz = 1 + else if( stdlib_lsame( wantz, 'V' ) ) then + ilz = .true. + iwantz = 2 + else if( stdlib_lsame( wantz, 'I' ) ) then + ilz = .true. + iwantz = 3 + else + iwantz = 0 + end if + ! check argument values + info = 0 + if( iwants==0 ) then + info = -1 + else if( iwantq==0 ) then + info = -2 + else if( iwantz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi= 2 ) then + call stdlib_dhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,& + beta, q, ldq, z, ldz, work,lwork, info ) + return + end if + ! find out required workspace + ! workspace query to stdlib_dlaqz3 + nw = max( nwr, nmin ) + call stdlib_dlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & + n_undeflated, n_deflated, alphar,alphai, beta, work, nw, work, nw, work, -1, rec,& + aed_info ) + itemp1 = int( work( 1 ),KIND=ilp) + ! workspace query to stdlib_dlaqz4 + call stdlib_dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, & + lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1, sweep_info ) + itemp2 = int( work( 1 ),KIND=ilp) + lworkreq = max( itemp1+2*nw**2, itemp2+2*nbr**2 ) + if ( lwork ==-1 ) then + work( 1 ) = real( lworkreq,KIND=dp) + return + else if ( lwork < lworkreq ) then + info = -19 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAQZ0', info ) + return + end if + ! initialize q and z + if( iwantq==3 ) call stdlib_dlaset( 'FULL', n, n, zero, one, q, ldq ) + if( iwantz==3 ) call stdlib_dlaset( 'FULL', n, n, zero, one, z, ldz ) + ! get machine constants + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp)/ulp ) + istart = ilo + istop = ihi + maxit = 3*( ihi-ilo+1 ) + ld = 0 + do iiter = 1, maxit + if( iiter >= maxit ) then + info = istop+1 + goto 80 + end if + if ( istart+1 >= istop ) then + istop = istart + exit + end if + ! check deflations at the end + if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+& + abs( a( istop-2,istop-2 ) ) ) ) ) then + a( istop-1, istop-2 ) = zero + istop = istop-2 + ld = 0 + eshift = zero + else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+& + abs( a( istop-1,istop-1 ) ) ) ) ) then + a( istop, istop-1 ) = zero + istop = istop-1 + ld = 0 + eshift = zero + end if + ! check deflations at the start + if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 & + ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then + a( istart+2, istart+1 ) = zero + istart = istart+2 + ld = 0 + eshift = zero + else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )& + )+abs( a( istart+1,istart+1 ) ) ) ) ) then + a( istart+1, istart ) = zero + istart = istart+1 + ld = 0 + eshift = zero + end if + if ( istart+1 >= istop ) then + exit + end if + ! check interior deflations + istart2 = istart + do k = istop, istart+1, -1 + if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & + ) ) ) ) then + a( k, k-1 ) = zero + istart2 = k + exit + end if + end do + ! get range to apply rotations to + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = istart2 + istopm = istop + end if + ! check infinite eigenvalues, this is done without blocking so might + ! slow down the method when many infinite eigenvalues are present + k = istop + do while ( k>=istart2 ) + temp = zero + if( k < istop ) then + temp = temp+abs( b( k, k+1 ) ) + end if + if( k > istart2 ) then + temp = temp+abs( b( k-1, k ) ) + end if + if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then + ! a diagonal element of b is negligable, move it + ! to the top and deflate it + do k2 = k, istart2+1, -1 + call stdlib_dlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) + b( k2-1, k2 ) = temp + b( k2-1, k2-1 ) = zero + call stdlib_drot( k2-2-istartm+1, b( istartm, k2 ), 1,b( istartm, k2-1 ), & + 1, c1, s1 ) + call stdlib_drot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1, a( & + istartm, k2-1 ), 1, c1, s1 ) + if ( ilz ) then + call stdlib_drot( n, z( 1, k2 ), 1, z( 1, k2-1 ), 1, c1,s1 ) + end if + if( k2= istop ) then + istop = istart2-1 + ld = 0 + eshift = zero + cycle + end if + nw = nwr + nshifts = nsr + nblock = nbr + if ( istop-istart2+1 < nmin ) then + ! setting nw to the size of the subblock will make aed deflate + ! all the eigenvalues. this is slightly more efficient than just + ! using stdlib_dhgeqz because the off diagonal part gets updated via blas. + if ( istop-istart+1 < nmin ) then + nw = istop-istart+1 + istart2 = istart + else + nw = istop-istart2+1 + end if + end if + ! time for aed + call stdlib_dlaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& + z, ldz, n_undeflated, n_deflated,alphar, alphai, beta, work, nw, work( nw**2+1 ),& + nw, work( 2*nw**2+1 ), lwork-2*nw**2, rec,aed_info ) + if ( n_deflated > 0 ) then + istop = istop-n_deflated + ld = 0 + eshift = zero + end if + if ( 100*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & + ) then + ! aed has uncovered many eigenvalues. skip a qz sweep and run + ! aed again. + cycle + end if + ld = ld+1 + ns = min( nshifts, istop-istart2 ) + ns = min( ns, n_undeflated ) + shiftpos = istop-n_deflated-n_undeflated+1 + ! shuffle shifts to put double shifts in front + ! this ensures that we don't split up a double shift + do i = shiftpos, shiftpos+n_undeflated-1, 2 + if( alphai( i )/=-alphai( i+1 ) ) then + swap = alphar( i ) + alphar( i ) = alphar( i+1 ) + alphar( i+1 ) = alphar( i+2 ) + alphar( i+2 ) = swap + swap = alphai( i ) + alphai( i ) = alphai( i+1 ) + alphai( i+1 ) = alphai( i+2 ) + alphai( i+2 ) = swap + swap = beta( i ) + beta( i ) = beta( i+1 ) + beta( i+1 ) = beta( i+2 ) + beta( i+2 ) = swap + end if + end do + if ( mod( ld, 6 ) == 0 ) then + ! exceptional shift. chosen for no particularly good reason. + if( ( real( maxit,KIND=dp)*safmin )*abs( a( istop,istop-1 ) ) DLAQZ3: performs AED + + recursive subroutine stdlib_dlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & + ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info ) + ! arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & + rec + real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& + * ),alphai( * ), beta( * ) + integer(ilp), intent( out ) :: ns, nd, info + real(dp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) + real(dp), intent(out) :: work(*) + + ! local scalars + logical(lk) :: bulge + integer(ilp) :: jw, kwtop, kwbot, istopm, istartm, k, k2, dtgexc_info, ifst, ilst, & + lworkreq, qz_small_info + real(dp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp + info = 0 + ! set up deflation window + jw = min( nw, ihi-ilo+1 ) + kwtop = ihi-jw+1 + if ( kwtop == ilo ) then + s = zero + else + s = a( kwtop, kwtop-1 ) + end if + ! determine required workspace + ifst = 1 + ilst = jw + call stdlib_dtgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, & + work, -1, dtgexc_info ) + lworkreq = int( work( 1 ),KIND=ilp) + call stdlib_dlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1, rec+1, qz_small_info ) + lworkreq = max( lworkreq, int( work( 1 ),KIND=ilp)+2*jw**2 ) + lworkreq = max( lworkreq, n*nw, 2*nw**2+n ) + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = lworkreq + return + else if ( lwork < lworkreq ) then + info = -26 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAQZ3', -info ) + return + end if + ! get machine constants + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp)/ulp ) + if ( ihi == kwtop ) then + ! 1 by 1 deflation window, just try a regular deflation + alphar( kwtop ) = a( kwtop, kwtop ) + alphai( kwtop ) = zero + beta( kwtop ) = b( kwtop, kwtop ) + ns = 1 + nd = 0 + if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if ( kwtop > ilo ) then + a( kwtop, kwtop-1 ) = zero + end if + end if + end if + ! store window in case of convergence failure + call stdlib_dlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) + call stdlib_dlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2+1 ), jw ) + + ! transform window to real schur form + call stdlib_dlaset( 'FULL', jw, jw, zero, one, qc, ldqc ) + call stdlib_dlaset( 'FULL', jw, jw, zero, one, zc, ldzc ) + call stdlib_dlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2*jw**2+1 ), lwork-2*jw**2,rec+1, & + qz_small_info ) + if( qz_small_info /= 0 ) then + ! convergence failure, restore the window and exit + nd = 0 + ns = jw-qz_small_info + call stdlib_dlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) + call stdlib_dlacpy( 'ALL', jw, jw, work( jw**2+1 ), jw, b( kwtop,kwtop ), ldb ) + + return + end if + ! deflation detection loop + if ( kwtop == ilo .or. s == zero ) then + kwbot = kwtop-1 + else + kwbot = ihi + k = 1 + k2 = 1 + do while ( k <= jw ) + bulge = .false. + if ( kwbot-kwtop+1 >= 2 ) then + bulge = a( kwbot, kwbot-1 ) /= zero + end if + if ( bulge ) then + ! try to deflate complex conjugate eigenvalue pair + temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( & + a( kwbot-1, kwbot ) ) ) + if( temp == zero )then + temp = abs( s ) + end if + if ( max( abs( s*qc( 1, kwbot-kwtop ) ), abs( s*qc( 1,kwbot-kwtop+1 ) ) ) <= & + max( smlnum,ulp*temp ) ) then + ! deflatable + kwbot = kwbot-2 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_dtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) + + k2 = k2+2 + end if + k = k+2 + else + ! try to deflate real eigenvalue + temp = abs( a( kwbot, kwbot ) ) + if( temp == zero ) then + temp = abs( s ) + end if + if ( ( abs( s*qc( 1, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) & + then + ! deflatable + kwbot = kwbot-1 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_dtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) + + k2 = k2+1 + end if + k = k+1 + end if + end do + end if + ! store eigenvalues + nd = ihi-kwbot + ns = jw-nd + k = kwtop + do while ( k <= ihi ) + bulge = .false. + if ( k < ihi ) then + if ( a( k+1, k ) /= zero ) then + bulge = .true. + end if + end if + if ( bulge ) then + ! 2x2 eigenvalue block + call stdlib_dlag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),& + alphar( k ),alphar( k+1 ), alphai( k ) ) + alphai( k+1 ) = -alphai( k ) + k = k+2 + else + ! 1x1 eigenvalue block + alphar( k ) = a( k, k ) + alphai( k ) = zero + beta( k ) = b( k, k ) + k = k+1 + end if + end do + if ( kwtop /= ilo .and. s /= zero ) then + ! reflect spike back, this will create optimally packed bulges + a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1,1:jw-nd ) + do k = kwbot-1, kwtop, -1 + call stdlib_dlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) + a( k, kwtop-1 ) = temp + a( k+1, kwtop-1 ) = zero + k2 = max( kwtop, k-1 ) + call stdlib_drot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) + call stdlib_drot( ihi-( k-1 )+1, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) + + call stdlib_drot( jw, qc( 1, k-kwtop+1 ), 1, qc( 1, k+1-kwtop+1 ),1, c1, s1 ) + + end do + ! chase bulges down + istartm = kwtop + istopm = ihi + k = kwbot-1 + do while ( k >= kwtop ) + if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then + ! move double pole block down and remove it + do k2 = k-1, kwbot-2 + call stdlib_dlaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,& + ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc ) + end do + k = k-2 + else + ! k points to single shift + do k2 = k, kwbot-2 + ! move shift down + call stdlib_dlartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp ) + b( k2+1, k2+1 ) = temp + b( k2+1, k2 ) = zero + call stdlib_drot( k2+2-istartm+1, a( istartm, k2+1 ), 1,a( istartm, k2 ), & + 1, c1, s1 ) + call stdlib_drot( k2-istartm+1, b( istartm, k2+1 ), 1,b( istartm, k2 ), 1, & + c1, s1 ) + call stdlib_drot( jw, zc( 1, k2+1-kwtop+1 ), 1, zc( 1,k2-kwtop+1 ), 1, c1, & + s1 ) + call stdlib_dlartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp ) + a( k2+1, k2 ) = temp + a( k2+2, k2 ) = zero + call stdlib_drot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,& + s1 ) + call stdlib_drot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,& + s1 ) + call stdlib_drot( jw, qc( 1, k2+1-kwtop+1 ), 1, qc( 1,k2+2-kwtop+1 ), 1, & + c1, s1 ) + end do + ! remove the shift + call stdlib_dlartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp ) + + b( kwbot, kwbot ) = temp + b( kwbot, kwbot-1 ) = zero + call stdlib_drot( kwbot-istartm, b( istartm, kwbot ), 1,b( istartm, kwbot-1 ),& + 1, c1, s1 ) + call stdlib_drot( kwbot-istartm+1, a( istartm, kwbot ), 1,a( istartm, kwbot-1 & + ), 1, c1, s1 ) + call stdlib_drot( jw, zc( 1, kwbot-kwtop+1 ), 1, zc( 1,kwbot-1-kwtop+1 ), 1, & + c1, s1 ) + k = k-1 + end if + end do + end if + ! apply qc and zc to rest of the matrix + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + if ( istopm-ihi > 0 ) then + call stdlib_dgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), & + lda, zero, work, jw ) + call stdlib_dlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) + call stdlib_dgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), & + ldb, zero, work, jw ) + call stdlib_dlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) + end if + if ( ilq ) then + call stdlib_dgemm( 'N', 'N', n, jw, jw, one, q( 1, kwtop ), ldq, qc,ldqc, zero, & + work, n ) + call stdlib_dlacpy( 'ALL', n, jw, work, n, q( 1, kwtop ), ldq ) + end if + if ( kwtop-1-istartm+1 > 0 ) then + call stdlib_dgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, & + zc, ldzc, zero, work,kwtop-istartm ) + call stdlib_dlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop & + ), lda ) + call stdlib_dgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, & + zc, ldzc, zero, work,kwtop-istartm ) + call stdlib_dlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop & + ), ldb ) + end if + if ( ilz ) then + call stdlib_dgemm( 'N', 'N', n, jw, jw, one, z( 1, kwtop ), ldz, zc,ldzc, zero, & + work, n ) + call stdlib_dlacpy( 'ALL', n, jw, work, n, z( 1, kwtop ), ldz ) + end if + end subroutine stdlib_dlaqz3 + + !> To find the desired eigenvalues of a given real symmetric + !> tridiagonal matrix T, DLARRE: sets any "small" off-diagonal + !> elements to zero, and for each unreduced block T_i, it finds + !> (a) a suitable shift at one end of the block's spectrum, + !> (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and + !> (c) eigenvalues of each L_i D_i L_i^T. + !> The representations and eigenvalues found are then used by + !> DSTEMR to compute the eigenvectors of T. + !> The accuracy varies depending on whether bisection is used to + !> find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to + !> conpute all and then discard any unwanted one. + !> As an added benefit, DLARRE also outputs the n + !> Gerschgorin intervals for the matrices L_i D_i L_i^T. + + pure subroutine stdlib_dlarre( range, n, vl, vu, il, iu, d, e, e2,rtol1, rtol2, spltol, & + nsplit, isplit, m,w, werr, wgap, iblock, indexw, gers, pivmin,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: range + integer(ilp), intent(in) :: il, iu, n + integer(ilp), intent(out) :: info, m, nsplit + real(dp), intent(out) :: pivmin + real(dp), intent(in) :: rtol1, rtol2, spltol + real(dp), intent(inout) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), isplit(*), iwork(*), indexw(*) + real(dp), intent(inout) :: d(*), e(*), e2(*) + real(dp), intent(out) :: gers(*), w(*), werr(*), wgap(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: hndrd = 100.0_dp + real(dp), parameter :: pert = 8.0_dp + real(dp), parameter :: fourth = one/four + real(dp), parameter :: fac = half + real(dp), parameter :: maxgrowth = 64.0_dp + real(dp), parameter :: fudge = 2.0_dp + integer(ilp), parameter :: maxtry = 6 + integer(ilp), parameter :: allrng = 1 + integer(ilp), parameter :: indrng = 2 + integer(ilp), parameter :: valrng = 3 + + + ! Local Scalars + logical(lk) :: forceb, norep, usedqd + integer(ilp) :: cnt, cnt1, cnt2, i, ibegin, idum, iend, iinfo, in, indl, indu, irange, & + j, jblk, mb, mm, wbegin, wend + real(dp) :: avgap, bsrtol, clwdth, dmax, dpivot, eabs, emax, eold, eps, gl, gu, isleft,& + isrght, rtl, rtol, s1, s2, safmin, sgndef, sigma, spdiam, tau, tmp, tmp1 + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = allrng + else if( stdlib_lsame( range, 'V' ) ) then + irange = valrng + else if( stdlib_lsame( range, 'I' ) ) then + irange = indrng + end if + m = 0 + ! get machine constants + safmin = stdlib_dlamch( 'S' ) + eps = stdlib_dlamch( 'P' ) + ! set parameters + rtl = sqrt(eps) + bsrtol = sqrt(eps) + ! treat case of 1x1 matrix for quick return + if( n==1 ) then + if( (irange==allrng).or.((irange==valrng).and.(d(1)>vl).and.(d(1)<=vu)).or.((& + irange==indrng).and.(il==1).and.(iu==1)) ) then + m = 1 + w(1) = d(1) + ! the computation error of the eigenvalue is zero + werr(1) = zero + wgap(1) = zero + iblock( 1 ) = 1 + indexw( 1 ) = 1 + gers(1) = d( 1 ) + gers(2) = d( 1 ) + endif + ! store the shift for the initial rrr, which is zero in this case + e(1) = zero + return + end if + ! general case: tridiagonal matrix of order > 1 + ! init werr, wgap. compute gerschgorin intervals and spectral diameter. + ! compute maximum off-diagonal entry and pivmin. + gl = d(1) + gu = d(1) + eold = zero + emax = zero + e(n) = zero + do i = 1,n + werr(i) = zero + wgap(i) = zero + eabs = abs( e(i) ) + if( eabs >= emax ) then + emax = eabs + end if + tmp1 = eabs + eold + gers( 2*i-1) = d(i) - tmp1 + gl = min( gl, gers( 2*i - 1)) + gers( 2*i ) = d(i) + tmp1 + gu = max( gu, gers(2*i) ) + eold = eabs + end do + ! the minimum pivot allowed in the sturm sequence for t + pivmin = safmin * max( one, emax**2 ) + ! compute spectral diameter. the gerschgorin bounds give an + ! estimate that is wrong by at most a factor of sqrt(2) + spdiam = gu - gl + ! compute splitting points + call stdlib_dlarra( n, d, e, e2, spltol, spdiam,nsplit, isplit, iinfo ) + ! can force use of bisection instead of faster dqds. + ! option left in the code for future multisection work. + forceb = .false. + ! initialize usedqd, dqds should be used for allrng unless someone + ! explicitly wants bisection. + usedqd = (( irange==allrng ) .and. (.not.forceb)) + if( (irange==allrng) .and. (.not. forceb) ) then + ! set interval [vl,vu] that contains all eigenvalues + vl = gl + vu = gu + else + ! we call stdlib_dlarrd to find crude approximations to the eigenvalues + ! in the desired range. in case irange = indrng, we also obtain the + ! interval (vl,vu] that contains all the wanted eigenvalues. + ! an interval [left,right] has converged if + ! right-leftvl ).and.( d( & + ibegin )<=vu ) ).or. ( (irange==indrng).and.(iblock(wbegin)==jblk))) then + m = m + 1 + w( m ) = d( ibegin ) + werr(m) = zero + ! the gap for a single block doesn't matter for the later + ! algorithm and is assigned an arbitrary large value + wgap(m) = zero + iblock( m ) = jblk + indexw( m ) = 1 + wbegin = wbegin + 1 + endif + ! e( iend ) holds the shift for the initial rrr + e( iend ) = zero + ibegin = iend + 1 + cycle loop_170 + end if + ! blocks of size larger than 1x1 + ! e( iend ) will hold the shift for the initial rrr, for now set it =0 + e( iend ) = zero + ! find local outer bounds gl,gu for the block + gl = d(ibegin) + gu = d(ibegin) + do i = ibegin , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + if(.not. ((irange==allrng).and.(.not.forceb)) ) then + ! count the number of eigenvalues in the current block. + mb = 0 + do i = wbegin,mm + if( iblock(i)==jblk ) then + mb = mb+1 + else + goto 21 + endif + end do + 21 continue + if( mb==0) then + ! no eigenvalue in the current block lies in the desired range + ! e( iend ) holds the shift for the initial rrr + e( iend ) = zero + ibegin = iend + 1 + cycle loop_170 + else + ! decide whether dqds or bisection is more efficient + usedqd = ( (mb > fac*in) .and. (.not.forceb) ) + wend = wbegin + mb - 1 + ! calculate gaps for the current block + ! in later stages, when representations for individual + ! eigenvalues are different, we use sigma = e( iend ). + sigma = zero + do i = wbegin, wend - 1 + wgap( i ) = max( zero,w(i+1)-werr(i+1) - (w(i)+werr(i)) ) + end do + wgap( wend ) = max( zero,vu - sigma - (w( wend )+werr( wend ))) + ! find local index of the first and last desired evalue. + indl = indexw(wbegin) + indu = indexw( wend ) + endif + endif + if(( (irange==allrng) .and. (.not. forceb) ).or.usedqd) then + ! case of dqds + ! find approximations to the extremal eigenvalues of the block + call stdlib_dlarrk( in, 1, gl, gu, d(ibegin),e2(ibegin), pivmin, rtl, tmp, tmp1, & + iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + isleft = max(gl, tmp - tmp1- hndrd * eps* abs(tmp - tmp1)) + call stdlib_dlarrk( in, in, gl, gu, d(ibegin),e2(ibegin), pivmin, rtl, tmp, tmp1,& + iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + isrght = min(gu, tmp + tmp1+ hndrd * eps * abs(tmp + tmp1)) + ! improve the estimate of the spectral diameter + spdiam = isrght - isleft + else + ! case of bisection + ! find approximations to the wanted extremal eigenvalues + isleft = max(gl, w(wbegin) - werr(wbegin)- hndrd * eps*abs(w(wbegin)- werr(& + wbegin) )) + isrght = min(gu,w(wend) + werr(wend)+ hndrd * eps * abs(w(wend)+ werr(wend))) + + endif + ! decide whether the base representation for the current block + ! l_jblk d_jblk l_jblk^t = t_jblk - sigma_jblk i + ! should be on the left or the right end of the current block. + ! the strategy is to shift to the end which is "more populated" + ! furthermore, decide whether to use dqds for the computation of + ! dqds is chosen if all eigenvalues are desired or the number of + ! eigenvalues to be computed is large compared to the blocksize. + if( ( irange==allrng ) .and. (.not.forceb) ) then + ! if all the eigenvalues have to be computed, we use dqd + usedqd = .true. + ! indl is the local index of the first eigenvalue to compute + indl = 1 + indu = in + ! mb = number of eigenvalues to compute + mb = in + wend = wbegin + mb - 1 + ! define 1/4 and 3/4 points of the spectrum + s1 = isleft + fourth * spdiam + s2 = isrght - fourth * spdiam + else + ! stdlib_dlarrd has computed iblock and indexw for each eigenvalue + ! approximation. + ! choose sigma + if( usedqd ) then + s1 = isleft + fourth * spdiam + s2 = isrght - fourth * spdiam + else + tmp = min(isrght,vu) - max(isleft,vl) + s1 = max(isleft,vl) + fourth * tmp + s2 = min(isrght,vu) - fourth * tmp + endif + endif + ! compute the negcount at the 1/4 and 3/4 points + if(mb>1) then + call stdlib_dlarrc( 'T', in, s1, s2, d(ibegin),e(ibegin), pivmin, cnt, cnt1, & + cnt2, iinfo) + endif + if(mb==1) then + sigma = gl + sgndef = one + elseif( cnt1 - indl >= indu - cnt2 ) then + if( ( irange==allrng ) .and. (.not.forceb) ) then + sigma = max(isleft,gl) + elseif( usedqd ) then + ! use gerschgorin bound as shift to get pos def matrix + ! for dqds + sigma = isleft + else + ! use approximation of the first desired eigenvalue of the + ! block as shift + sigma = max(isleft,vl) + endif + sgndef = one + else + if( ( irange==allrng ) .and. (.not.forceb) ) then + sigma = min(isrght,gu) + elseif( usedqd ) then + ! use gerschgorin bound as shift to get neg def matrix + ! for dqds + sigma = isrght + else + ! use approximation of the first desired eigenvalue of the + ! block as shift + sigma = min(isrght,vu) + endif + sgndef = -one + endif + ! an initial sigma has been chosen that will be used for computing + ! t - sigma i = l d l^t + ! define the increment tau of the shift in case the initial shift + ! needs to be refined to obtain a factorization with not too much + ! element growth. + if( usedqd ) then + ! the initial sigma was to the outer end of the spectrum + ! the matrix is definite and we need not retreat. + tau = spdiam*eps*n + two*pivmin + tau = max( tau,two*eps*abs(sigma) ) + else + if(mb>1) then + clwdth = w(wend) + werr(wend) - w(wbegin) - werr(wbegin) + avgap = abs(clwdth / real(wend-wbegin,KIND=dp)) + if( sgndef==one ) then + tau = half*max(wgap(wbegin),avgap) + tau = max(tau,werr(wbegin)) + else + tau = half*max(wgap(wend-1),avgap) + tau = max(tau,werr(wend)) + endif + else + tau = werr(wbegin) + endif + endif + loop_80: do idum = 1, maxtry + ! compute l d l^t factorization of tridiagonal matrix t - sigma i. + ! store d in work(1:in), l in work(in+1:2*in), and reciprocals of + ! pivots in work(2*in+1:3*in) + dpivot = d( ibegin ) - sigma + work( 1 ) = dpivot + dmax = abs( work(1) ) + j = ibegin + do i = 1, in - 1 + work( 2*in+i ) = one / work( i ) + tmp = e( j )*work( 2*in+i ) + work( in+i ) = tmp + dpivot = ( d( j+1 )-sigma ) - tmp*e( j ) + work( i+1 ) = dpivot + dmax = max( dmax, abs(dpivot) ) + j = j + 1 + end do + ! check for element growth + if( dmax > maxgrowth*spdiam ) then + norep = .true. + else + norep = .false. + endif + if( usedqd .and. .not.norep ) then + ! ensure the definiteness of the representation + ! all entries of d (of l d l^t) must have the same sign + do i = 1, in + tmp = sgndef*work( i ) + if( tmp1 ) then + ! perturb each entry of the base representation by a small + ! (but random) relative amount to overcome difficulties with + ! glued matrices. + do i = 1, 4 + iseed( i ) = 1 + end do + call stdlib_dlarnv(2, iseed, 2*in-1, work(1)) + do i = 1,in-1 + d(ibegin+i-1) = d(ibegin+i-1)*(one+eps*pert*work(i)) + e(ibegin+i-1) = e(ibegin+i-1)*(one+eps*pert*work(in+i)) + end do + d(iend) = d(iend)*(one+eps*four*work(in)) + endif + ! don't update the gerschgorin intervals because keeping track + ! of the updates would be too much work in stdlib_dlarrv. + ! we update w instead and use it to locate the proper gerschgorin + ! intervals. + ! compute the required eigenvalues of l d l' by bisection or dqds + if ( .not.usedqd ) then + ! if stdlib_dlarrd has been used, shift the eigenvalue approximations + ! according to their representation. this is necessary for + ! a uniform stdlib_dlarrv since dqds computes eigenvalues of the + ! shifted representation. in stdlib_dlarrv, w will always hold the + ! unshifted eigenvalue approximation. + do j=wbegin,wend + w(j) = w(j) - sigma + werr(j) = werr(j) + abs(w(j)) * eps + end do + ! call stdlib_dlarrb to reduce eigenvalue error of the approximations + ! from stdlib_dlarrd + do i = ibegin, iend-1 + work( i ) = d( i ) * e( i )**2 + end do + ! use bisection to find ev from indl to indu + call stdlib_dlarrb(in, d(ibegin), work(ibegin),indl, indu, rtol1, rtol2, indl-1,& + w(wbegin), wgap(wbegin), werr(wbegin),work( 2*n+1 ), iwork, pivmin, spdiam,in, & + iinfo ) + if( iinfo /= 0 ) then + info = -4 + return + end if + ! stdlib_dlarrb computes all gaps correctly except for the last one + ! record distance to vu/gu + wgap( wend ) = max( zero,( vu-sigma ) - ( w( wend ) + werr( wend ) ) ) + do i = indl, indu + m = m + 1 + iblock(m) = jblk + indexw(m) = i + end do + else + ! call dqds to get all eigs (and then possibly delete unwanted + ! eigenvalues). + ! note that dqds finds the eigenvalues of the l d l^t representation + ! of t to high relative accuracy. high relative accuracy + ! might be lost when the shift of the rrr is subtracted to obtain + ! the eigenvalues of t. however, t is not guaranteed to define its + ! eigenvalues to high relative accuracy anyway. + ! set rtol to the order of the tolerance used in stdlib_dlasq2 + ! this is an estimated error, the worst case bound is 4*n*eps + ! which is usually too large and requires unnecessary work to be + ! done by bisection when computing the eigenvectors + rtol = log(real(in,KIND=dp)) * four * eps + j = ibegin + do i = 1, in - 1 + work( 2*i-1 ) = abs( d( j ) ) + work( 2*i ) = e( j )*e( j )*work( 2*i-1 ) + j = j + 1 + end do + work( 2*in-1 ) = abs( d( iend ) ) + work( 2*in ) = zero + call stdlib_dlasq2( in, work, iinfo ) + if( iinfo /= 0 ) then + ! if iinfo = -5 then an index is part of a tight cluster + ! and should be changed. the index is in iwork(1) and the + ! gap is in work(n+1) + info = -5 + return + else + ! test that all eigenvalues are positive as expected + do i = 1, in + if( work( i )zero ) then + do i = indl, indu + m = m + 1 + w( m ) = work( in-i+1 ) + iblock( m ) = jblk + indexw( m ) = i + end do + else + do i = indl, indu + m = m + 1 + w( m ) = -work( i ) + iblock( m ) = jblk + indexw( m ) = i + end do + end if + do i = m - mb + 1, m + ! the value of rtol below should be the tolerance in stdlib_dlasq2 + werr( i ) = rtol * abs( w(i) ) + end do + do i = m - mb + 1, m - 1 + ! compute the right gap between the intervals + wgap( i ) = max( zero,w(i+1)-werr(i+1) - (w(i)+werr(i)) ) + end do + wgap( m ) = max( zero,( vu-sigma ) - ( w( m ) + werr( m ) ) ) + end if + ! proceed with next block + ibegin = iend + 1 + wbegin = wend + 1 + end do loop_170 + return + end subroutine stdlib_dlarre + + !> Using a divide and conquer approach, DLASD0: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M + !> matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + !> The algorithm computes orthogonal matrices U and VT such that + !> B = U * S * VT. The singular values S are overwritten on D. + !> A related subroutine, DLASDA, computes only the singular values, + !> and optionally, the singular vectors in compact form. + + pure subroutine stdlib_dlasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, n, smlsiz, sqre + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,& + nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei + real(dp) :: alpha, beta + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -2 + end if + m = n + sqre + if( ldu Using a divide and conquer approach, DLASDA: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + !> B with diagonal D and offdiagonal E, where M = N + SQRE. The + !> algorithm computes the singular values in the SVD B = U * S * VT. + !> The orthogonal matrices U and VT are optionally computed in + !> compact form. + !> A related subroutine, DLASD0, computes the singular values and + !> the singular vectors in explicit form. + + pure subroutine stdlib_dlasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, & + poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,& + *) + real(dp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), & + s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*) + real(dp), intent(inout) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, & + m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, & + nwork2, smlszp, sqrei, vf, vfi, vl, vli + real(dp) :: alpha, beta + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldu<( n+sqre ) ) then + info = -8 + else if( ldgcol DLASDQ: computes the singular value decomposition (SVD) of a real + !> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + !> E, accumulating the transformations if desired. Letting B denote + !> the input bidiagonal matrix, the algorithm computes orthogonal + !> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose + !> of P). The singular values S are overwritten on D. + !> The input matrix U is changed to U * Q if desired. + !> The input matrix VT is changed to P**T * VT if desired. + !> The input matrix C is changed to Q**T * C if desired. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3, for a detailed description of the algorithm. + + pure subroutine stdlib_dlasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, & + work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre + ! Array Arguments + real(dp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: rotate + integer(ilp) :: i, isub, iuplo, j, np1, sqre1 + real(dp) :: cs, r, smin, sn + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + iuplo = 0 + if( stdlib_lsame( uplo, 'U' ) )iuplo = 1 + if( stdlib_lsame( uplo, 'L' ) )iuplo = 2 + if( iuplo==0 ) then + info = -1 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncvt<0 ) then + info = -4 + else if( nru<0 ) then + info = -5 + else if( ncc<0 ) then + info = -6 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + np1 = n + 1 + sqre1 = sqre + ! if matrix non-square upper bidiagonal, rotate to be lower + ! bidiagonal. the rotations are on the right. + if( ( iuplo==1 ) .and. ( sqre1==1 ) ) then + do i = 1, n - 1 + call stdlib_dlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( rotate ) then + work( i ) = cs + work( n+i ) = sn + end if + end do + call stdlib_dlartg( d( n ), e( n ), cs, sn, r ) + d( n ) = r + e( n ) = zero + if( rotate ) then + work( n ) = cs + work( n+n ) = sn + end if + iuplo = 2 + sqre1 = 0 + ! update singular vectors if desired. + if( ncvt>0 )call stdlib_dlasr( 'L', 'V', 'F', np1, ncvt, work( 1 ),work( np1 ), vt, & + ldvt ) + end if + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left. + if( iuplo==2 ) then + do i = 1, n - 1 + call stdlib_dlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( rotate ) then + work( i ) = cs + work( n+i ) = sn + end if + end do + ! if matrix (n+1)-by-n lower bidiagonal, one additional + ! rotation is needed. + if( sqre1==1 ) then + call stdlib_dlartg( d( n ), e( n ), cs, sn, r ) + d( n ) = r + if( rotate ) then + work( n ) = cs + work( n+n ) = sn + end if + end if + ! update singular vectors if desired. + if( nru>0 ) then + if( sqre1==0 ) then + call stdlib_dlasr( 'R', 'V', 'F', nru, n, work( 1 ),work( np1 ), u, ldu ) + + else + call stdlib_dlasr( 'R', 'V', 'F', nru, np1, work( 1 ),work( np1 ), u, ldu ) + + end if + end if + if( ncc>0 ) then + if( sqre1==0 ) then + call stdlib_dlasr( 'L', 'V', 'F', n, ncc, work( 1 ),work( np1 ), c, ldc ) + + else + call stdlib_dlasr( 'L', 'V', 'F', np1, ncc, work( 1 ),work( np1 ), c, ldc ) + + end if + end if + end if + ! call stdlib_dbdsqr to compute the svd of the reduced real + ! n-by-n upper bidiagonal matrix. + call stdlib_dbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info ) + + ! sort the singular values into ascending order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n + ! scan for smallest d(i). + isub = i + smin = d( i ) + do j = i + 1, n + if( d( j )0 )call stdlib_dswap( ncvt, vt( isub, 1 ), ldvt, vt( i, 1 ), ldvt ) + + if( nru>0 )call stdlib_dswap( nru, u( 1, isub ), 1, u( 1, i ), 1 ) + if( ncc>0 )call stdlib_dswap( ncc, c( isub, 1 ), ldc, c( i, 1 ), ldc ) + end if + end do + return + end subroutine stdlib_dlasdq + + !> DLASQ1: computes the singular values of a real N-by-N bidiagonal + !> matrix with diagonal D and off-diagonal E. The singular values + !> are computed to high relative accuracy, in the absence of + !> denormalization, underflow and overflow. The algorithm was first + !> presented in + !> "Accurate singular values and differential qd algorithms" by K. V. + !> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + !> 1994, + !> and the present implementation is described in "An implementation of + !> the dqds Algorithm (Positive Case)", LAPACK Working Note. + + pure subroutine stdlib_dlasq1( n, d, e, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo + real(dp) :: eps, scale, safmin, sigmn, sigmx + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DLASQ1', -info ) + return + else if( n==0 ) then + return + else if( n==1 ) then + d( 1 ) = abs( d( 1 ) ) + return + else if( n==2 ) then + call stdlib_dlas2( d( 1 ), e( 1 ), d( 2 ), sigmn, sigmx ) + d( 1 ) = sigmx + d( 2 ) = sigmn + return + end if + ! estimate the largest singular value. + sigmx = zero + do i = 1, n - 1 + d( i ) = abs( d( i ) ) + sigmx = max( sigmx, abs( e( i ) ) ) + end do + d( n ) = abs( d( n ) ) + ! early return if sigmx is zero (matrix is already diagonal). + if( sigmx==zero ) then + call stdlib_dlasrt( 'D', n, d, iinfo ) + return + end if + do i = 1, n + sigmx = max( sigmx, d( i ) ) + end do + ! copy d and e into work (in the z format) and scale (squaring the + ! input data makes scaling by a power of the radix pointless). + eps = stdlib_dlamch( 'PRECISION' ) + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + scale = sqrt( eps / safmin ) + call stdlib_dcopy( n, d, 1, work( 1 ), 2 ) + call stdlib_dcopy( n-1, e, 1, work( 2 ), 2 ) + call stdlib_dlascl( 'G', 0, 0, sigmx, scale, 2*n-1, 1, work, 2*n-1,iinfo ) + ! compute the q's and e's. + do i = 1, 2*n - 1 + work( i ) = work( i )**2 + end do + work( 2*n ) = zero + call stdlib_dlasq2( n, work, info ) + if( info==0 ) then + do i = 1, n + d( i ) = sqrt( work( i ) ) + end do + call stdlib_dlascl( 'G', 0, 0, scale, sigmx, n, 1, d, n, iinfo ) + else if( info==2 ) then + ! maximum number of iterations exceeded. move data from work + ! into d and e so the calling subroutine can try to finish + do i = 1, n + d( i ) = sqrt( work( 2*i-1 ) ) + e( i ) = sqrt( work( 2*i ) ) + end do + call stdlib_dlascl( 'G', 0, 0, scale, sigmx, n, 1, d, n, iinfo ) + call stdlib_dlascl( 'G', 0, 0, scale, sigmx, n, 1, e, n, iinfo ) + end if + return + end subroutine stdlib_dlasq1 + + !> DLASQ2: computes all the eigenvalues of the symmetric positive + !> definite tridiagonal matrix associated with the qd array Z to high + !> relative accuracy are computed to high relative accuracy, in the + !> absence of denormalization, underflow and overflow. + !> To see the relation of Z to the tridiagonal matrix, let L be a + !> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + !> let U be an upper bidiagonal matrix with 1's above and diagonal + !> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + !> symmetric tridiagonal to which it is similar. + !> Note : DLASQ2 defines a logical variable, IEEE, which is true + !> on machines which follow ieee-754 floating-point standard in their + !> handling of infinities and NaNs, and false otherwise. This variable + !> is passed to DLASQ3. + + pure subroutine stdlib_dlasq2( n, z, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: z(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: cbias = 1.50_dp + real(dp), parameter :: hundrd = 100.0_dp + + + ! Local Scalars + logical(lk) :: ieee + integer(ilp) :: i0, i1, i4, iinfo, ipn4, iter, iwhila, iwhilb, k, kmin, n0, n1, nbig, & + ndiv, nfail, pp, splt, ttype + real(dp) :: d, dee, deemin, desig, dmin, dmin1, dmin2, dn, dn1, dn2, e, emax, emin, & + eps, g, oldemn, qmax, qmin, s, safmin, sigma, t, tau, temp, tol, tol2, trace, zmax, & + tempe, tempq + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + ! test the input arguments. + ! (in case stdlib_dlasq2 is not called by stdlib_dlasq1) + info = 0 + eps = stdlib_dlamch( 'PRECISION' ) + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + tol = eps*hundrd + tol2 = tol**2 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DLASQ2', 1 ) + return + else if( n==0 ) then + return + else if( n==1 ) then + ! 1-by-1 case. + if( z( 1 )z( 1 ) ) then + d = z( 3 ) + z( 3 ) = z( 1 ) + z( 1 ) = d + end if + z( 5 ) = z( 1 ) + z( 2 ) + z( 3 ) + if( z( 2 )>z( 3 )*tol2 ) then + t = half*( ( z( 1 )-z( 3 ) )+z( 2 ) ) + s = z( 3 )*( z( 2 ) / t ) + if( s<=t ) then + s = z( 3 )*( z( 2 ) / ( t*( one+sqrt( one+s / t ) ) ) ) + else + s = z( 3 )*( z( 2 ) / ( t+sqrt( t )*sqrt( t+s ) ) ) + end if + t = z( 1 ) + ( s+z( 2 ) ) + z( 3 ) = z( 3 )*( z( 1 ) / t ) + z( 1 ) = t + end if + z( 2 ) = z( 3 ) + z( 6 ) = z( 2 ) + z( 1 ) + return + end if + ! check for negative data and compute sums of q's and e's. + z( 2*n ) = zero + emin = z( 2 ) + qmax = zero + zmax = zero + d = zero + e = zero + do k = 1, 2*( n-1 ), 2 + if( z( k )i0 ) then + emin = abs( z( 4*n0-5 ) ) + else + emin = zero + end if + qmin = z( 4*n0-3 ) + qmax = qmin + do i4 = 4*n0, 8, -4 + if( z( i4-5 )<=zero )go to 100 + if( qmin>=four*emax ) then + qmin = min( qmin, z( i4-3 ) ) + emax = max( emax, z( i4-5 ) ) + end if + qmax = max( qmax, z( i4-7 )+z( i4-5 ) ) + emin = min( emin, z( i4-5 ) ) + end do + i4 = 4 + 100 continue + i0 = i4 / 4 + pp = 0 + if( n0-i0>1 ) then + dee = z( 4*i0-3 ) + deemin = dee + kmin = i0 + do i4 = 4*i0+1, 4*n0-3, 4 + dee = z( i4 )*( dee /( dee+z( i4-2 ) ) ) + if( dee<=deemin ) then + deemin = dee + kmin = ( i4+3 )/4 + end if + end do + if( (kmin-i0)*2n0 )go to 150 + ! while submatrix unfinished take a good dqds step. + call stdlib_dlasq3( i0, n0, z, pp, dmin, sigma, desig, qmax, nfail,iter, ndiv, & + ieee, ttype, dmin1, dmin2, dn, dn1,dn2, g, tau ) + pp = 1 - pp + ! when emin is very small check for splits. + if( pp==0 .and. n0-i0>=3 ) then + if( z( 4*n0 )<=tol2*qmax .or.z( 4*n0-1 )<=tol2*sigma ) then + splt = i0 - 1 + qmax = z( 4*i0-3 ) + emin = z( 4*i0-1 ) + oldemn = z( 4*i0 ) + do i4 = 4*i0, 4*( n0-3 ), 4 + if( z( i4 )<=tol2*z( i4-3 ) .or.z( i4-1 )<=tol2*sigma ) then + z( i4-1 ) = -sigma + splt = i4 / 4 + qmax = zero + emin = z( i4+3 ) + oldemn = z( i4+4 ) + else + qmax = max( qmax, z( i4+1 ) ) + emin = min( emin, z( i4-1 ) ) + oldemn = min( oldemn, z( i4 ) ) + end if + end do + z( 4*n0-1 ) = emin + z( 4*n0 ) = oldemn + i0 = splt + 1 + end if + end if + end do loop_140 + info = 2 + ! maximum number of iterations exceeded, restore the shift + ! sigma and place the new d's and e's in a qd array. + ! this might need to be done for several blocks + i1 = i0 + n1 = n0 + 145 continue + tempq = z( 4*i0-3 ) + z( 4*i0-3 ) = z( 4*i0-3 ) + sigma + do k = i0+1, n0 + tempe = z( 4*k-5 ) + z( 4*k-5 ) = z( 4*k-5 ) * (tempq / z( 4*k-7 )) + tempq = z( 4*k-3 ) + z( 4*k-3 ) = z( 4*k-3 ) + sigma + tempe - z( 4*k-5 ) + end do + ! prepare to do this on the previous block if there is one + if( i1>1 ) then + n1 = i1-1 + do while( ( i1>=2 ) .and. ( z(4*i1-5)>=zero ) ) + i1 = i1 - 1 + end do + sigma = -z(4*n1-1) + go to 145 + end if + do k = 1, n + z( 2*k-1 ) = z( 4*k-3 ) + ! only the block 1..n0 is unfinished. the rest of the e's + ! must be essentially zero, although sometimes other data + ! has been stored in them. + if( k DLATRF_AA factorizes a panel of a real symmetric matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_dlasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), h(ldh,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + real(dp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_dsytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j, j:m) - h(j:m, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:m, j) has been initialized to be a(j, j:m) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_dgemv( 'NO TRANSPOSE', mj, j-k1,-one, h( j, k1 ), ldh,a( 1, j ), 1,& + one, h( j, j ), 1 ) + end if + ! copy h(i:m, i) into work + call stdlib_dcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:m) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:m) stores u(j-1, j:m) + alpha = -a( k-1, j ) + call stdlib_daxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = work( 1 ) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_daxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_idamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:m) with a(i1+1:m, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_dswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + ! swap a(i1, i2+1:m) with a(i2, i2+1:m) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_dswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_dsytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j:m, j) - h(j:m, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:m, j) has been initialized to be a(j:m, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_dgemv( 'NO TRANSPOSE', mj, j-k1,-one, h( j, k1 ), ldh,a( j, 1 ), lda,& + one, h( j, j ), 1 ) + end if + ! copy h(j:m, j) into work + call stdlib_dcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:m, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -a( j, k-1 ) + call stdlib_daxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = work( 1 ) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_daxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_idamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:m, i1) with a(i2, i1+1:m) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_dswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + ! swap a(i2+1:m, i1) with a(i2+1:m, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_dswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j DPTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric positive definite tridiagonal matrix by first factoring the + !> matrix using DPTTRF, and then calling DBDSQR to compute the singular + !> values of the bidiagonal factor. + !> This routine computes the eigenvalues of the positive definite + !> tridiagonal matrix to high relative accuracy. This means that if the + !> eigenvalues range over many orders of magnitude in size, then the + !> small eigenvalues and corresponding eigenvectors will be computed + !> more accurately than, for example, with the standard QR method. + !> The eigenvectors of a full or band symmetric positive definite matrix + !> can also be found if DSYTRD, DSPTRD, or DSBTRD has been used to + !> reduce this matrix to tridiagonal form. (The reduction to tridiagonal + !> form, however, may preclude the possibility of obtaining high + !> relative accuracy in the small eigenvalues of the original matrix, if + !> these eigenvalues range over many orders of magnitude.) + + pure subroutine stdlib_dpteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(dp), intent(inout) :: d(*), e(*), z(ldz,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Arrays + real(dp) :: c(1,1), vt(1,1) + ! Local Scalars + integer(ilp) :: i, icompz, nru + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldz0 )z( 1, 1 ) = one + return + end if + if( icompz==2 )call stdlib_dlaset( 'FULL', n, n, zero, one, z, ldz ) + ! call stdlib_dpttrf to factor the matrix. + call stdlib_dpttrf( n, d, e, info ) + if( info/=0 )return + do i = 1, n + d( i ) = sqrt( d( i ) ) + end do + do i = 1, n - 1 + e( i ) = e( i )*d( i ) + end do + ! call stdlib_dbdsqr to compute the singular values/vectors of the + ! bidiagonal factor. + if( icompz>0 ) then + nru = n + else + nru = 0 + end if + call stdlib_dbdsqr( 'LOWER', n, 0, nru, 0, d, e, vt, 1, z, ldz, c, 1,work, info ) + + ! square the singular values. + if( info==0 ) then + do i = 1, n + d( i ) = d( i )*d( i ) + end do + else + info = n + info + end if + return + end subroutine stdlib_dpteqr + + !> DSTEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> DSTEGR is a compatibility wrapper around the improved DSTEMR routine. + !> See DSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : DSTEGR and DSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + + pure subroutine stdlib_dstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: w(*), work(*) + real(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: tryrac + ! Executable Statements + info = 0 + tryrac = .false. + call stdlib_dstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, n, isuppz, & + tryrac, work, lwork,iwork, liwork, info ) + end subroutine stdlib_dstegr + + !> DSTEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.DSTEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + + pure subroutine stdlib_dstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il, iu, ldz, nzc, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: w(*), work(*) + real(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: minrgp = 1.0e-3_dp + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, valeig, wantz, zquery + integer(ilp) :: i, ibegin, iend, ifirst, iil, iindbl, iindw, iindwk, iinfo, iinspl, & + iiu, ilast, in, indd, inde2, inderr, indgp, indgrs, indwrk, itmp, itmp2, j, jblk, jj, & + liwmin, lwmin, nsplit, nzcmin, offset, wbegin, wend + real(dp) :: bignum, cs, eps, pivmin, r1, r2, rmax, rmin, rtol1, rtol2, safmin, scale, & + smlnum, sn, thresh, tmp, tnrm, wl, wu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ).or.( liwork==-1 ) ) + zquery = ( nzc==-1 ) + ! stdlib_dstemr needs work of size 6*n, iwork of size 3*n. + ! in addition, stdlib_dlarre needs work of size 6*n, iwork of size 5*n. + ! furthermore, stdlib_dlarrv needs work of size 12*n, iwork of size 7*n. + if( wantz ) then + lwmin = 18*n + liwmin = 10*n + else + ! need less workspace if only the eigenvalues are wanted + lwmin = 12*n + liwmin = 8*n + endif + wl = zero + wu = zero + iil = 0 + iiu = 0 + nsplit = 0 + if( valeig ) then + ! we do not reference vl, vu in the cases range = 'i','a' + ! the interval (wl, wu] contains all the wanted eigenvalues. + ! it is either given by the user or computed in stdlib_dlarre. + wl = vl + wu = vu + elseif( indeig ) then + ! we do not reference il, iu in the cases range = 'v','a' + iil = il + iiu = iu + endif + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( valeig .and. n>0 .and. wu<=wl ) then + info = -7 + else if( indeig .and. ( iil<1 .or. iil>n ) ) then + info = -8 + else if( indeig .and. ( iiun ) ) then + info = -9 + else if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz.and.(.not.zquery) ) then + z( 1, 1 ) = one + isuppz(1) = 1 + isuppz(2) = 1 + end if + return + end if + if( n==2 ) then + if( .not.wantz ) then + call stdlib_dlae2( d(1), e(1), d(2), r1, r2 ) + else if( wantz.and.(.not.zquery) ) then + call stdlib_dlaev2( d(1), e(1), d(2), r1, r2, cs, sn ) + end if + if( alleig.or.(valeig.and.(r2>wl).and.(r2<=wu)).or.(indeig.and.(iil==1)) ) & + then + m = m+1 + w( m ) = r2 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = -sn + z( 2, m ) = cs + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + if( alleig.or.(valeig.and.(r1>wl).and.(r1<=wu)).or.(indeig.and.(iiu==2)) ) & + then + m = m+1 + w( m ) = r1 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = cs + z( 2, m ) = sn + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + else + ! continue with general n + indgrs = 1 + inderr = 2*n + 1 + indgp = 3*n + 1 + indd = 4*n + 1 + inde2 = 5*n + 1 + indwrk = 6*n + 1 + iinspl = 1 + iindbl = n + 1 + iindw = 2*n + 1 + iindwk = 3*n + 1 + ! scale matrix to allowable range, if necessary. + ! the allowable range is related to the pivmin parameter; see the + ! comments in stdlib_dlarrd. the preference for scaling small values + ! up is heuristic; we expect users' matrices not to be close to the + ! rmax threshold. + scale = one + tnrm = stdlib_dlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + scale = rmax / tnrm + end if + if( scale/=one ) then + call stdlib_dscal( n, scale, d, 1 ) + call stdlib_dscal( n-1, scale, e, 1 ) + tnrm = tnrm*scale + if( valeig ) then + ! if eigenvalues in interval have to be found, + ! scale (wl, wu] accordingly + wl = wl*scale + wu = wu*scale + endif + end if + ! compute the desired eigenvalues of the tridiagonal after splitting + ! into smaller subblocks if the corresponding off-diagonal elements + ! are small + ! thresh is the splitting parameter for stdlib_dlarre + ! a negative thresh forces the old splitting criterion based on the + ! size of the off-diagonal. a positive thresh switches to splitting + ! which preserves relative accuracy. + if( tryrac ) then + ! test whether the matrix warrants the more expensive relative approach. + call stdlib_dlarrr( n, d, e, iinfo ) + else + ! the user does not care about relative accurately eigenvalues + iinfo = -1 + endif + ! set the splitting criterion + if (iinfo==0) then + thresh = eps + else + thresh = -eps + ! relative accuracy is desired but t does not guarantee it + tryrac = .false. + endif + if( tryrac ) then + ! copy original diagonal, needed to guarantee relative accuracy + call stdlib_dcopy(n,d,1,work(indd),1) + endif + ! store the squares of the offdiagonal values of t + do j = 1, n-1 + work( inde2+j-1 ) = e(j)**2 + end do + ! set the tolerance parameters for bisection + if( .not.wantz ) then + ! stdlib_dlarre computes the eigenvalues to full precision. + rtol1 = four * eps + rtol2 = four * eps + else + ! stdlib_dlarre computes the eigenvalues to less than full precision. + ! stdlib_dlarrv will refine the eigenvalue approximations, and we can + ! need less accurate initial bisection in stdlib_dlarre. + ! note: these settings do only affect the subset case and stdlib_dlarre + rtol1 = sqrt(eps) + rtol2 = max( sqrt(eps)*5.0e-3_dp, four * eps ) + endif + call stdlib_dlarre( range, n, wl, wu, iil, iiu, d, e,work(inde2), rtol1, rtol2, & + thresh, nsplit,iwork( iinspl ), m, w, work( inderr ),work( indgp ), iwork( iindbl ),& + iwork( iindw ), work( indgrs ), pivmin,work( indwrk ), iwork( iindwk ), iinfo ) + + if( iinfo/=0 ) then + info = 10 + abs( iinfo ) + return + end if + ! note that if range /= 'v', stdlib_dlarre computes bounds on the desired + ! part of the spectrum. all desired eigenvalues are contained in + ! (wl,wu] + if( wantz ) then + ! compute the desired eigenvectors corresponding to the computed + ! eigenvalues + call stdlib_dlarrv( n, wl, wu, d, e,pivmin, iwork( iinspl ), m,1, m, minrgp, & + rtol1, rtol2,w, work( inderr ), work( indgp ), iwork( iindbl ),iwork( iindw ), & + work( indgrs ), z, ldz,isuppz, work( indwrk ), iwork( iindwk ), iinfo ) + if( iinfo/=0 ) then + info = 20 + abs( iinfo ) + return + end if + else + ! stdlib_dlarre computes eigenvalues of the (shifted) root representation + ! stdlib_dlarrv returns the eigenvalues of the unshifted matrix. + ! however, if the eigenvectors are not desired by the user, we need + ! to apply the corresponding shifts from stdlib_dlarre to obtain the + ! eigenvalues of the original matrix. + do j = 1, m + itmp = iwork( iindbl+j-1 ) + w( j ) = w( j ) + e( iwork( iinspl+itmp-1 ) ) + end do + end if + if ( tryrac ) then + ! refine computed eigenvalues so that they are relatively accurate + ! with respect to the original matrix t. + ibegin = 1 + wbegin = 1 + loop_39: do jblk = 1, iwork( iindbl+m-1 ) + iend = iwork( iinspl+jblk-1 ) + in = iend - ibegin + 1 + wend = wbegin - 1 + ! check if any eigenvalues have to be refined in this block + 36 continue + if( wend1 .or. n==2 ) then + if( .not. wantz ) then + call stdlib_dlasrt( 'I', m, w, iinfo ) + if( iinfo/=0 ) then + info = 3 + return + end if + else + do j = 1, m - 1 + i = 0 + tmp = w( j ) + do jj = j + 1, m + if( w( jj ) DSTEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Eigenvalues and + !> eigenvectors can be selected by specifying either a range of values + !> or a range of indices for the desired eigenvalues. + !> Whenever possible, DSTEVR calls DSTEMR to compute the + !> eigenspectrum using Relatively Robust Representations. DSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. For the i-th + !> unreduced block of T, + !> (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T + !> is a relatively robust representation, + !> (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high + !> relative accuracy by the dqds algorithm, + !> (c) If there is a cluster of close eigenvalues, "choose" sigma_i + !> close to the cluster, and go to step (a), + !> (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, + !> compute the corresponding eigenvector by forming a + !> rank-revealing twisted factorization. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, + !> Computer Science Division Technical Report No. UCB//CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of DSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + pure subroutine stdlib_dstevr( jobz, range, n, d, e, vl, vu, il, iu, abstol,m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, lquery, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, imax, indibl, indifl, indisp, indiwo, iscale, itmp1, j, jj, & + liwmin, lwmin, nsplit + real(dp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, tnrm, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'DSTEVR', 'N', 1, 2, 3, 4 ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( liwork==-1 ) ) + lwmin = max( 1, 20*n ) + liwmin = max( 1, 10*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + if( valeig ) then + vll = vl + vuu = vu + end if + tnrm = stdlib_dlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_dscal( n, sigma, d, 1 ) + call stdlib_dscal( n-1, sigma, e( 1 ), 1 ) + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: these indices are used only + ! if stdlib_dsterf or stdlib_dstemr fail. + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_dstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_dstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_dstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indisp + n + ! if all eigenvalues are desired, then + ! call stdlib_dsterf or stdlib_dstemr. if this fails for some eigenvalue, then + ! try stdlib_dstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ieeeok==1 ) then + call stdlib_dcopy( n-1, e( 1 ), 1, work( 1 ), 1 ) + if( .not.wantz ) then + call stdlib_dcopy( n, d, 1, w, 1 ) + call stdlib_dsterf( n, w, work, info ) + else + call stdlib_dcopy( n, d, 1, work( n+1 ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_dstemr( jobz, 'A', n, work( n+1 ), work, vl, vu, il,iu, m, w, z, ldz,& + n, isuppz, tryrac,work( 2*n+1 ), lwork-2*n, iwork, liwork, info ) + end if + if( info==0 ) then + m = n + go to 10 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_dstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstol, d, e, m,nsplit, w, & + iwork( indibl ), iwork( indisp ), work,iwork( indiwo ), info ) + if( wantz ) then + call stdlib_dstein( n, d, e, m, w, iwork( indibl ), iwork( indisp ),z, ldz, work, & + iwork( indiwo ), iwork( indifl ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 10 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSYEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> DSYEVR first reduces the matrix A to tridiagonal form T with a call + !> to DSYTRD. Then, whenever possible, DSYEVR calls DSTEMR to compute + !> the eigenspectrum using Relatively Robust Representations. DSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see DSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : DSYEVR calls DSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> DSYEVR calls DSTEBZ and DSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of DSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + subroutine stdlib_dsyevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork,iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, iinfo, imax, indd, inddd, inde, indee, indibl, indifl, & + indisp, indiwo, indtau, indwk, indwkn, iscale, j, jj, liwmin, llwork, llwrkn, lwkopt, & + lwmin, nb, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'DSYEVR', 'N', 1, 2, 3, 4 ) + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( liwork==-1 ) ) + lwmin = max( 1, 26*n ) + liwmin = max( 1, 10*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=a( 1, 1 ) ) then + m = 1 + w( 1 ) = a( 1, 1 ) + end if + end if + if( wantz ) then + z( 1, 1 ) = one + isuppz( 1 ) = 1 + isuppz( 2 ) = 1 + end if + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if (valeig) then + vll = vl + vuu = vu + end if + anrm = stdlib_dlansy( 'M', uplo, n, a, lda, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_dscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_dscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: the iwork indices are + ! used only if stdlib_dsterf or stdlib_dstemr fail. + ! work(indtau:indtau+n-1) stores the scalar factors of the + ! elementary reflectors used in stdlib_dsytrd. + indtau = 1 + ! work(indd:indd+n-1) stores the tridiagonal's diagonal entries. + indd = indtau + n + ! work(inde:inde+n-1) stores the off-diagonal entries of the + ! tridiagonal matrix from stdlib_dsytrd. + inde = indd + n + ! work(inddd:inddd+n-1) is a copy of the diagonal entries over + ! -written by stdlib_dstemr (the stdlib_dsterf path copies the diagonal to w). + inddd = inde + n + ! work(indee:indee+n-1) is a copy of the off-diagonal entries over + ! -written while computing the eigenvalues in stdlib_dsterf and stdlib_dstemr. + indee = inddd + n + ! indwk is the starting offset of the left-over workspace, and + ! llwork is the remaining workspace size. + indwk = indee + n + llwork = lwork - indwk + 1 + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_dstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_dstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_dstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indifl + n + ! call stdlib_dsytrd to reduce symmetric matrix to tridiagonal form. + call stdlib_dsytrd( uplo, n, a, lda, work( indd ), work( inde ),work( indtau ), work( & + indwk ), llwork, iinfo ) + ! if all eigenvalues are desired + ! then call stdlib_dsterf or stdlib_dstemr and stdlib_dormtr. + if( ( alleig .or. ( indeig .and. il==1 .and. iu==n ) ) .and.ieeeok==1 ) then + if( .not.wantz ) then + call stdlib_dcopy( n, work( indd ), 1, w, 1 ) + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dsterf( n, w, work( indee ), info ) + else + call stdlib_dcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_dcopy( n, work( indd ), 1, work( inddd ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_dstemr( jobz, 'A', n, work( inddd ), work( indee ),vl, vu, il, iu, m,& + w, z, ldz, n, isuppz,tryrac, work( indwk ), lwork, iwork, liwork,info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_dstemr. + if( wantz .and. info==0 ) then + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_dormtr( 'L', uplo, 'N', n, m, a, lda,work( indtau ), z, ldz, work(& + indwkn ),llwrkn, iinfo ) + end if + end if + if( info==0 ) then + ! everything worked. skip stdlib_dstebz/stdlib_dstein. iwork(:) are + ! undefined. + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_dstein. + ! also call stdlib_dstebz and stdlib_dstein if stdlib_dstemr fails. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwk ),iwork( indiwo ), info ) + + if( wantz ) then + call stdlib_dstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwk ), iwork( indiwo ), iwork( indifl ),info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_dstein. + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_dormtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + ! jump here if stdlib_dstemr/stdlib_dstein succeeded. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. note: we do not sort the ifail portion of iwork. + ! it may not be initialized (if stdlib_dstemr/stdlib_dstein succeeded), and we do + ! not return this detailed information to the user. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSYSV computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_dsysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*), b(ldb,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_sytrf, lwkopt_sytrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DSYTRF_AA: computes the factorization of a real symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_dsytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + real(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'DSYTRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_dlasyf_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_dswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j, j+1 ) + a( j, j+1 ) = one + call stdlib_dcopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_dgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_dgemv( 'NO TRANSPOSE', mj, jb+1,-one, work( j3-j1+1+k1*n ), & + n,a( j1-k2, j3 ), 1,one, a( j3, j3 ), lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_dgemm + call stdlib_dgemm( 'TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-one, a( j1-& + k2, j2 ), lda,work( j3-j1+1+k1*n ), n,one, a( j2, j3 ), lda ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_dcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**t using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_dcopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_dlasyf; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_dlasyf_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_dswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j+1, j ) + a( j+1, j ) = one + call stdlib_dcopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_dgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_dgemv( 'NO TRANSPOSE', mj, jb+1,-one, work( j3-j1+1+k1*n ), & + n,a( j3, j1-k2 ), lda,one, a( j3, j3 ), 1 ) + j3 = j3 + 1 + end do + ! update off-diagonal block in j2-th block column with stdlib_dgemm + call stdlib_dgemm( 'NO TRANSPOSE', 'TRANSPOSE',n-j3+1, nj, jb+1,-one, work(& + j3-j1+1+k1*n ), n,a( j2, j1-k2 ), lda,one, a( j3, j2 ), lda ) + end do + ! recover t( j+1, j ) + a( j+1, j ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_dcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_dsytrf_aa + + + +end module stdlib_linalg_lapack_d diff --git a/src/stdlib_linalg_lapack_q.fypp b/src/stdlib_linalg_lapack_q.fypp new file mode 100644 index 000000000..66a9ecd67 --- /dev/null +++ b/src/stdlib_linalg_lapack_q.fypp @@ -0,0 +1,85277 @@ +#:include "common.fypp" +#:if WITH_QP +module stdlib_linalg_lapack_q + use stdlib_linalg_constants + use stdlib_linalg_blas + use stdlib_linalg_lapack_aux + use stdlib_linalg_lapack_s + use stdlib_linalg_lapack_c + use stdlib_linalg_lapack_d + use stdlib_linalg_lapack_z + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_qbbcsd + public :: stdlib_qbdsdc + public :: stdlib_qbdsqr + public :: stdlib_qdisna + public :: stdlib_qgbbrd + public :: stdlib_qgbcon + public :: stdlib_qgbequ + public :: stdlib_qgbequb + public :: stdlib_qgbrfs + public :: stdlib_qgbsv + public :: stdlib_qgbsvx + public :: stdlib_qgbtf2 + public :: stdlib_qgbtrf + public :: stdlib_qgbtrs + public :: stdlib_qgebak + public :: stdlib_qgebal + public :: stdlib_qgebd2 + public :: stdlib_qgebrd + public :: stdlib_qgecon + public :: stdlib_qgeequ + public :: stdlib_qgeequb + public :: stdlib_qgees + public :: stdlib_qgeesx + public :: stdlib_qgeev + public :: stdlib_qgeevx + public :: stdlib_qgehd2 + public :: stdlib_qgehrd + public :: stdlib_qgejsv + public :: stdlib_qgelq + public :: stdlib_qgelq2 + public :: stdlib_qgelqf + public :: stdlib_qgelqt + public :: stdlib_qgelqt3 + public :: stdlib_qgels + public :: stdlib_qgelsd + public :: stdlib_qgelss + public :: stdlib_qgelsy + public :: stdlib_qgemlq + public :: stdlib_qgemlqt + public :: stdlib_qgemqr + public :: stdlib_qgemqrt + public :: stdlib_qgeql2 + public :: stdlib_qgeqlf + public :: stdlib_qgeqp3 + public :: stdlib_qgeqr + public :: stdlib_qgeqr2 + public :: stdlib_qgeqr2p + public :: stdlib_qgeqrf + public :: stdlib_qgeqrfp + public :: stdlib_qgeqrt + public :: stdlib_qgeqrt2 + public :: stdlib_qgeqrt3 + public :: stdlib_qgerfs + public :: stdlib_qgerq2 + public :: stdlib_qgerqf + public :: stdlib_qgesc2 + public :: stdlib_qgesdd + public :: stdlib_qgesv + public :: stdlib_qgesvd + public :: stdlib_qgesvdq + public :: stdlib_qgesvj + public :: stdlib_qgesvx + public :: stdlib_qgetc2 + public :: stdlib_qgetf2 + public :: stdlib_qgetrf + public :: stdlib_qgetrf2 + public :: stdlib_qgetri + public :: stdlib_qgetrs + public :: stdlib_qgetsls + public :: stdlib_qgetsqrhrt + public :: stdlib_qggbak + public :: stdlib_qggbal + public :: stdlib_qgges + public :: stdlib_qgges3 + public :: stdlib_qggesx + public :: stdlib_qggev + public :: stdlib_qggev3 + public :: stdlib_qggevx + public :: stdlib_qggglm + public :: stdlib_qgghd3 + public :: stdlib_qgghrd + public :: stdlib_qgglse + public :: stdlib_qggqrf + public :: stdlib_qggrqf + public :: stdlib_qgsvj0 + public :: stdlib_qgsvj1 + public :: stdlib_qgtcon + public :: stdlib_qgtrfs + public :: stdlib_qgtsv + public :: stdlib_qgtsvx + public :: stdlib_qgttrf + public :: stdlib_qgttrs + public :: stdlib_qgtts2 + public :: stdlib_qhgeqz + public :: stdlib_qhsein + public :: stdlib_qhseqr + public :: stdlib_qisnan + public :: stdlib_qla_gbamv + public :: stdlib_qla_gbrcond + public :: stdlib_qla_gbrpvgrw + public :: stdlib_qla_geamv + public :: stdlib_qla_gercond + public :: stdlib_qla_gerpvgrw + public :: stdlib_qla_lin_berr + public :: stdlib_qla_porcond + public :: stdlib_qla_porpvgrw + public :: stdlib_qla_syamv + public :: stdlib_qla_syrcond + public :: stdlib_qla_syrpvgrw + public :: stdlib_qla_wwaddw + public :: stdlib_qlabad + public :: stdlib_qlabrd + public :: stdlib_qlacn2 + public :: stdlib_qlacon + public :: stdlib_qlacpy + public :: stdlib_qladiv + public :: stdlib_qladiv1 + public :: stdlib_qladiv2 + public :: stdlib_qlae2 + public :: stdlib_qlaebz + public :: stdlib_qlaed0 + public :: stdlib_qlaed1 + public :: stdlib_qlaed2 + public :: stdlib_qlaed3 + public :: stdlib_qlaed4 + public :: stdlib_qlaed5 + public :: stdlib_qlaed6 + public :: stdlib_qlaed7 + public :: stdlib_qlaed8 + public :: stdlib_qlaed9 + public :: stdlib_qlaeda + public :: stdlib_qlaein + public :: stdlib_qlaev2 + public :: stdlib_qlaexc + public :: stdlib_qlag2 + public :: stdlib_qlag2s + public :: stdlib_qlags2 + public :: stdlib_qlagtf + public :: stdlib_qlagtm + public :: stdlib_qlagts + public :: stdlib_qlagv2 + public :: stdlib_qlahqr + public :: stdlib_qlahr2 + public :: stdlib_qlaic1 + public :: stdlib_qlaisnan + public :: stdlib_qlaln2 + public :: stdlib_qlals0 + public :: stdlib_qlalsa + public :: stdlib_qlalsd + public :: stdlib_qlamch + public :: stdlib_qlamc3 + public :: stdlib_qlamrg + public :: stdlib_qlamswlq + public :: stdlib_qlamtsqr + public :: stdlib_qlaneg + public :: stdlib_qlangb + public :: stdlib_qlange + public :: stdlib_qlangt + public :: stdlib_qlanhs + public :: stdlib_qlansb + public :: stdlib_qlansf + public :: stdlib_qlansp + public :: stdlib_qlanst + public :: stdlib_qlansy + public :: stdlib_qlantb + public :: stdlib_qlantp + public :: stdlib_qlantr + public :: stdlib_qlanv2 + public :: stdlib_qlaorhr_col_getrfnp + public :: stdlib_qlaorhr_col_getrfnp2 + public :: stdlib_qlapll + public :: stdlib_qlapmr + public :: stdlib_qlapmt + public :: stdlib_qlapy2 + public :: stdlib_qlapy3 + public :: stdlib_qlaqgb + public :: stdlib_qlaqge + public :: stdlib_qlaqp2 + public :: stdlib_qlaqps + public :: stdlib_qlaqr0 + public :: stdlib_qlaqr1 + public :: stdlib_qlaqr2 + public :: stdlib_qlaqr3 + public :: stdlib_qlaqr4 + public :: stdlib_qlaqr5 + public :: stdlib_qlaqsb + public :: stdlib_qlaqsp + public :: stdlib_qlaqsy + public :: stdlib_qlaqtr + public :: stdlib_qlaqz0 + public :: stdlib_qlaqz1 + public :: stdlib_qlaqz2 + public :: stdlib_qlaqz3 + public :: stdlib_qlaqz4 + public :: stdlib_qlar1v + public :: stdlib_qlar2v + public :: stdlib_qlarf + public :: stdlib_qlarfb + public :: stdlib_qlarfb_gett + public :: stdlib_qlarfg + public :: stdlib_qlarfgp + public :: stdlib_qlarft + public :: stdlib_qlarfx + public :: stdlib_qlarfy + public :: stdlib_qlargv + public :: stdlib_qlarnv + public :: stdlib_qlarra + public :: stdlib_qlarrb + public :: stdlib_qlarrc + public :: stdlib_qlarrd + public :: stdlib_qlarre + public :: stdlib_qlarrf + public :: stdlib_qlarrj + public :: stdlib_qlarrk + public :: stdlib_qlarrr + public :: stdlib_qlarrv + public :: stdlib_qlartg + public :: stdlib_qlartgp + public :: stdlib_qlartgs + public :: stdlib_qlartv + public :: stdlib_qlaruv + public :: stdlib_qlarz + public :: stdlib_qlarzb + public :: stdlib_qlarzt + public :: stdlib_qlas2 + public :: stdlib_qlascl + public :: stdlib_qlasd0 + public :: stdlib_qlasd1 + public :: stdlib_qlasd2 + public :: stdlib_qlasd3 + public :: stdlib_qlasd4 + public :: stdlib_qlasd5 + public :: stdlib_qlasd6 + public :: stdlib_qlasd7 + public :: stdlib_qlasd8 + public :: stdlib_qlasda + public :: stdlib_qlasdq + public :: stdlib_qlasdt + public :: stdlib_qlaset + public :: stdlib_qlasq1 + public :: stdlib_qlasq2 + public :: stdlib_qlasq3 + public :: stdlib_qlasq4 + public :: stdlib_qlasq5 + public :: stdlib_qlasq6 + public :: stdlib_qlasr + public :: stdlib_qlasrt + public :: stdlib_qlassq + public :: stdlib_qlasv2 + public :: stdlib_qlaswlq + public :: stdlib_qlaswp + public :: stdlib_qlasy2 + public :: stdlib_qlasyf + public :: stdlib_qlasyf_aa + public :: stdlib_qlasyf_rk + public :: stdlib_qlasyf_rook + public :: stdlib_qlat2s + public :: stdlib_qlatbs + public :: stdlib_qlatdf + public :: stdlib_qlatps + public :: stdlib_qlatrd + public :: stdlib_qlatrs + public :: stdlib_qlatrz + public :: stdlib_qlatsqr + public :: stdlib_qlauu2 + public :: stdlib_qlauum + public :: stdlib_qopgtr + public :: stdlib_qopmtr + public :: stdlib_qorbdb + public :: stdlib_qorbdb1 + public :: stdlib_qorbdb2 + public :: stdlib_qorbdb3 + public :: stdlib_qorbdb4 + public :: stdlib_qorbdb5 + public :: stdlib_qorbdb6 + public :: stdlib_qorcsd + public :: stdlib_qorcsd2by1 + public :: stdlib_qorg2l + public :: stdlib_qorg2r + public :: stdlib_qorgbr + public :: stdlib_qorghr + public :: stdlib_qorgl2 + public :: stdlib_qorglq + public :: stdlib_qorgql + public :: stdlib_qorgqr + public :: stdlib_qorgr2 + public :: stdlib_qorgrq + public :: stdlib_qorgtr + public :: stdlib_qorgtsqr + public :: stdlib_qorgtsqr_row + public :: stdlib_qorhr_col + public :: stdlib_qorm22 + public :: stdlib_qorm2l + public :: stdlib_qorm2r + public :: stdlib_qormbr + public :: stdlib_qormhr + public :: stdlib_qorml2 + public :: stdlib_qormlq + public :: stdlib_qormql + public :: stdlib_qormqr + public :: stdlib_qormr2 + public :: stdlib_qormr3 + public :: stdlib_qormrq + public :: stdlib_qormrz + public :: stdlib_qormtr + public :: stdlib_qpbcon + public :: stdlib_qpbequ + public :: stdlib_qpbrfs + public :: stdlib_qpbstf + public :: stdlib_qpbsv + public :: stdlib_qpbsvx + public :: stdlib_qpbtf2 + public :: stdlib_qpbtrf + public :: stdlib_qpbtrs + public :: stdlib_qpftrf + public :: stdlib_qpftri + public :: stdlib_qpftrs + public :: stdlib_qpocon + public :: stdlib_qpoequ + public :: stdlib_qpoequb + public :: stdlib_qporfs + public :: stdlib_qposv + public :: stdlib_qposvx + public :: stdlib_qpotf2 + public :: stdlib_qpotrf + public :: stdlib_qpotrf2 + public :: stdlib_qpotri + public :: stdlib_qpotrs + public :: stdlib_qppcon + public :: stdlib_qppequ + public :: stdlib_qpprfs + public :: stdlib_qppsv + public :: stdlib_qppsvx + public :: stdlib_qpptrf + public :: stdlib_qpptri + public :: stdlib_qpptrs + public :: stdlib_qpstf2 + public :: stdlib_qpstrf + public :: stdlib_qptcon + public :: stdlib_qpteqr + public :: stdlib_qptrfs + public :: stdlib_qptsv + public :: stdlib_qptsvx + public :: stdlib_qpttrf + public :: stdlib_qpttrs + public :: stdlib_qptts2 + public :: stdlib_qrscl + public :: stdlib_qsb2st_kernels + public :: stdlib_qsbev + public :: stdlib_qsbevd + public :: stdlib_qsbevx + public :: stdlib_qsbgst + public :: stdlib_qsbgv + public :: stdlib_qsbgvd + public :: stdlib_qsbgvx + public :: stdlib_qsbtrd + public :: stdlib_qsfrk + public :: stdlib_qsgesv + public :: stdlib_qspcon + public :: stdlib_qspev + public :: stdlib_qspevd + public :: stdlib_qspevx + public :: stdlib_qspgst + public :: stdlib_qspgv + public :: stdlib_qspgvd + public :: stdlib_qspgvx + public :: stdlib_qsposv + public :: stdlib_qsprfs + public :: stdlib_qspsv + public :: stdlib_qspsvx + public :: stdlib_qsptrd + public :: stdlib_qsptrf + public :: stdlib_qsptri + public :: stdlib_qsptrs + public :: stdlib_qstebz + public :: stdlib_qstedc + public :: stdlib_qstegr + public :: stdlib_qstein + public :: stdlib_qstemr + public :: stdlib_qsteqr + public :: stdlib_qsterf + public :: stdlib_qstev + public :: stdlib_qstevd + public :: stdlib_qstevr + public :: stdlib_qstevx + public :: stdlib_qsycon + public :: stdlib_qsycon_rook + public :: stdlib_qsyconv + public :: stdlib_qsyconvf + public :: stdlib_qsyconvf_rook + public :: stdlib_qsyequb + public :: stdlib_qsyev + public :: stdlib_qsyevd + public :: stdlib_qsyevr + public :: stdlib_qsyevx + public :: stdlib_qsygs2 + public :: stdlib_qsygst + public :: stdlib_qsygv + public :: stdlib_qsygvd + public :: stdlib_qsygvx + public :: stdlib_qsyrfs + public :: stdlib_qsysv + public :: stdlib_qsysv_aa + public :: stdlib_qsysv_rk + public :: stdlib_qsysv_rook + public :: stdlib_qsysvx + public :: stdlib_qsyswapr + public :: stdlib_qsytd2 + public :: stdlib_qsytf2 + public :: stdlib_qsytf2_rk + public :: stdlib_qsytf2_rook + public :: stdlib_qsytrd + public :: stdlib_qsytrd_sb2st + public :: stdlib_qsytrd_sy2sb + public :: stdlib_qsytrf + public :: stdlib_qsytrf_aa + public :: stdlib_qsytrf_rk + public :: stdlib_qsytrf_rook + public :: stdlib_qsytri + public :: stdlib_qsytri_rook + public :: stdlib_qsytrs + public :: stdlib_qsytrs2 + public :: stdlib_qsytrs_3 + public :: stdlib_qsytrs_aa + public :: stdlib_qsytrs_rook + public :: stdlib_qtbcon + public :: stdlib_qtbrfs + public :: stdlib_qtbtrs + public :: stdlib_qtfsm + public :: stdlib_qtftri + public :: stdlib_qtfttp + public :: stdlib_qtfttr + public :: stdlib_qtgevc + public :: stdlib_qtgex2 + public :: stdlib_qtgexc + public :: stdlib_qtgsen + public :: stdlib_qtgsja + public :: stdlib_qtgsna + public :: stdlib_qtgsy2 + public :: stdlib_qtgsyl + public :: stdlib_qtpcon + public :: stdlib_qtplqt + public :: stdlib_qtplqt2 + public :: stdlib_qtpmlqt + public :: stdlib_qtpmqrt + public :: stdlib_qtpqrt + public :: stdlib_qtpqrt2 + public :: stdlib_qtprfb + public :: stdlib_qtprfs + public :: stdlib_qtptri + public :: stdlib_qtptrs + public :: stdlib_qtpttf + public :: stdlib_qtpttr + public :: stdlib_qtrcon + public :: stdlib_qtrevc + public :: stdlib_qtrevc3 + public :: stdlib_qtrexc + public :: stdlib_qtrrfs + public :: stdlib_qtrsen + public :: stdlib_qtrsna + public :: stdlib_qtrsyl + public :: stdlib_qtrti2 + public :: stdlib_qtrtri + public :: stdlib_qtrtrs + public :: stdlib_qtrttf + public :: stdlib_qtrttp + public :: stdlib_qtzrzf + public :: stdlib_qzsum1 + public :: stdlib_qlag2q + + ! 128-bit real constants + real(qp), parameter, private :: negone = -1.00_qp + real(qp), parameter, private :: zero = 0.00_qp + real(qp), parameter, private :: half = 0.50_qp + real(qp), parameter, private :: one = 1.00_qp + real(qp), parameter, private :: two = 2.00_qp + real(qp), parameter, private :: three = 3.00_qp + real(qp), parameter, private :: four = 4.00_qp + real(qp), parameter, private :: eight = 8.00_qp + real(qp), parameter, private :: ten = 10.00_qp + + ! 128-bit complex constants + complex(qp), parameter, private :: czero = ( 0.0_qp,0.0_qp) + complex(qp), parameter, private :: chalf = ( 0.5_qp,0.0_qp) + complex(qp), parameter, private :: cone = ( 1.0_qp,0.0_qp) + complex(qp), parameter, private :: cnegone = (-1.0_qp,0.0_qp) + + ! 128-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(qp), parameter, private :: rradix = real(radix(zero),qp) + real(qp), parameter, private :: ulp = epsilon(zero) + real(qp), parameter, private :: eps = ulp*half + real(qp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(qp), parameter, private :: safmax = one/safmin + real(qp), parameter, private :: smlnum = safmin/ulp + real(qp), parameter, private :: bignum = safmax*ulp + real(qp), parameter, private :: rtmin = sqrt(smlnum) + real(qp), parameter, private :: rtmax = sqrt(bignum) + + ! 128-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(qp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(qp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(qp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(qp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> DBBCSD: computes the CS decomposition of an orthogonal matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See DORCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The orthogonal matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + + pure subroutine stdlib_qbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q + ! Array Arguments + real(qp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& + b22e(*), work(*) + real(qp), intent(inout) :: phi(*), theta(*) + real(qp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) + ! =================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 6 + real(qp), parameter :: hundred = 100.0_qp + real(qp), parameter :: meighth = -0.125_qp + real(qp), parameter :: piover2 = 1.57079632679489661923132169163975144210_qp + + + + + ! Local Scalars + logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & + wantu2, wantv1t, wantv2t + integer(ilp) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & + iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini + real(qp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & + sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 + ! Intrinsic Functions + intrinsic :: abs,atan2,cos,max,min,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( m < 0 ) then + info = -6 + else if( p < 0 .or. p > m ) then + info = -7 + else if( q < 0 .or. q > m ) then + info = -8 + else if( q > p .or. q > m-p .or. q > m-q ) then + info = -8 + else if( wantu1 .and. ldu1 < p ) then + info = -12 + else if( wantu2 .and. ldu2 < m-p ) then + info = -14 + else if( wantv1t .and. ldv1t < q ) then + info = -16 + else if( wantv2t .and. ldv2t < m-q ) then + info = -18 + end if + ! quick return if q = 0 + if( info == 0 .and. q == 0 ) then + lworkmin = 1 + work(1) = lworkmin + return + end if + ! compute workspace + if( info == 0 ) then + iu1cs = 1 + iu1sn = iu1cs + q + iu2cs = iu1sn + q + iu2sn = iu2cs + q + iv1tcs = iu2sn + q + iv1tsn = iv1tcs + q + iv2tcs = iv1tsn + q + iv2tsn = iv2tcs + q + lworkopt = iv2tsn + q - 1 + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not. lquery ) then + info = -28 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'DBBCSD', -info ) + return + else if( lquery ) then + return + end if + ! get machine constants + eps = stdlib_qlamch( 'EPSILON' ) + unfl = stdlib_qlamch( 'SAFE MINIMUM' ) + tolmul = max( ten, min( hundred, eps**meighth ) ) + tol = tolmul*eps + thresh = max( tol, maxitr*q*q*unfl ) + ! test for negligible sines or cosines + do i = 1, q + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = 1, q-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! initial deflation + imax = q + do while( imax > 1 ) + if( phi(imax-1) /= zero ) then + exit + end if + imax = imax - 1 + end do + imin = imax - 1 + if ( imin > 1 ) then + do while( phi(imin-1) /= zero ) + imin = imin - 1 + if ( imin <= 1 ) exit + end do + end if + ! initialize iteration counter + maxit = maxitr*q*q + iter = 0 + ! begin main iteration loop + do while( imax > 1 ) + ! compute the matrix entries + b11d(imin) = cos( theta(imin) ) + b21d(imin) = -sin( theta(imin) ) + do i = imin, imax - 1 + b11e(i) = -sin( theta(i) ) * sin( phi(i) ) + b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) + b12d(i) = sin( theta(i) ) * cos( phi(i) ) + b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) + b21e(i) = -cos( theta(i) ) * sin( phi(i) ) + b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) + b22d(i) = cos( theta(i) ) * cos( phi(i) ) + b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) + end do + b12d(imax) = sin( theta(imax) ) + b22d(imax) = cos( theta(imax) ) + ! abort if not converging; otherwise, increment iter + if( iter > maxit ) then + info = 0 + do i = 1, q + if( phi(i) /= zero )info = info + 1 + end do + return + end if + iter = iter + imax - imin + ! compute shifts + thetamax = theta(imin) + thetamin = theta(imin) + do i = imin+1, imax + if( theta(i) > thetamax )thetamax = theta(i) + if( theta(i) < thetamin )thetamin = theta(i) + end do + if( thetamax > piover2 - thresh ) then + ! zero on diagonals of b11 and b22; induce deflation with a + ! zero shift + mu = zero + nu = one + else if( thetamin < thresh ) then + ! zero on diagonals of b12 and b22; induce deflation with a + ! zero shift + mu = one + nu = zero + else + ! compute shifts for b11 and b21 and use the lesser + call stdlib_qlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) + + call stdlib_qlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) + + if( sigma11 <= sigma21 ) then + mu = sigma11 + nu = sqrt( one - mu**2 ) + if( mu < thresh ) then + mu = zero + nu = one + end if + else + nu = sigma21 + mu = sqrt( 1.0_qp - nu**2 ) + if( nu < thresh ) then + mu = one + nu = zero + end if + end if + end if + ! rotate to produce bulges in b11 and b21 + if( mu <= nu ) then + call stdlib_qlartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& + imin-1) ) + else + call stdlib_qlartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& + imin-1) ) + end if + temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) + b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) + b11d(imin) = temp + b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) + b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) + temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) + b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) + b21d(imin) = temp + b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) + b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) + ! compute theta(imin) + theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),sqrt( b11d(imin)**2+& + b11bulge**2 ) ) + ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) + if( b11d(imin)**2+b11bulge**2 > thresh**2 ) then + call stdlib_qlartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& + r ) + else if( mu <= nu ) then + call stdlib_qlartgs( b11e( imin ), b11d( imin + 1 ), mu,work(iu1cs+imin-1), work(& + iu1sn+imin-1) ) + else + call stdlib_qlartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& + iu1sn+imin-1) ) + end if + if( b21d(imin)**2+b21bulge**2 > thresh**2 ) then + call stdlib_qlartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& + r ) + else if( nu < mu ) then + call stdlib_qlartgs( b21e( imin ), b21d( imin + 1 ), nu,work(iu2cs+imin-1), work(& + iu2sn+imin-1) ) + else + call stdlib_qlartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& + imin-1) ) + end if + work(iu2cs+imin-1) = -work(iu2cs+imin-1) + work(iu2sn+imin-1) = -work(iu2sn+imin-1) + temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) + b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) + + b11e(imin) = temp + if( imax > imin+1 ) then + b11bulge = work(iu1sn+imin-1)*b11e(imin+1) + b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) + end if + temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) + b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) + b12d(imin) = temp + b12bulge = work(iu1sn+imin-1)*b12d(imin+1) + b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) + temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) + b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) + + b21e(imin) = temp + if( imax > imin+1 ) then + b21bulge = work(iu2sn+imin-1)*b21e(imin+1) + b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) + end if + temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) + b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) + b22d(imin) = temp + b22bulge = work(iu2sn+imin-1)*b22d(imin+1) + b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) + ! inner loop: chase bulges from b11(imin,imin+2), + ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to + ! bottom-right + do i = imin+1, imax-1 + ! compute phi(i-1) + x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) + x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge + y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) + y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge + phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11e(i-1)**2 + b11bulge**2 <= thresh**2 + restart21 = b21e(i-1)**2 + b21bulge**2 <= thresh**2 + restart12 = b12d(i-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), + ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart21 ) then + call stdlib_qlartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) + else if( .not. restart11 .and. restart21 ) then + call stdlib_qlartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & + r ) + else if( restart11 .and. .not. restart21 ) then + call stdlib_qlartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & + r ) + else if( mu <= nu ) then + call stdlib_qlartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) + + else + call stdlib_qlartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) + + end if + work(iv1tcs+i-1) = -work(iv1tcs+i-1) + work(iv1tsn+i-1) = -work(iv1tsn+i-1) + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_qlartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& + 1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& + 1), r ) + else if( nu < mu ) then + call stdlib_qlartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& + 1-1) ) + else + call stdlib_qlartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& + 1-1) ) + end if + temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) + b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) + b11d(i) = temp + b11bulge = work(iv1tsn+i-1)*b11d(i+1) + b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) + temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) + b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) + b21d(i) = temp + b21bulge = work(iv1tsn+i-1)*b21d(i+1) + b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) + temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) + b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) + b12e(i-1) = temp + b12bulge = work(iv2tsn+i-1-1)*b12e(i) + b12e(i) = work(iv2tcs+i-1-1)*b12e(i) + temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) + b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) + b22e(i-1) = temp + b22bulge = work(iv2tsn+i-1-1)*b22e(i) + b22e(i) = work(iv2tcs+i-1-1)*b22e(i) + ! compute theta(i) + x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) + x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge + y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) + y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge + theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11d(i)**2 + b11bulge**2 <= thresh**2 + restart12 = b12e(i-1)**2 + b12bulge**2 <= thresh**2 + restart21 = b21d(i)**2 + b21bulge**2 <= thresh**2 + restart22 = b22e(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), + ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart12 ) then + call stdlib_qlartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) + else if( .not. restart11 .and. restart12 ) then + call stdlib_qlartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) + + else if( restart11 .and. .not. restart12 ) then + call stdlib_qlartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) + + else if( mu <= nu ) then + call stdlib_qlartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) + + else + call stdlib_qlartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) + + end if + if( .not. restart21 .and. .not. restart22 ) then + call stdlib_qlartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) + else if( .not. restart21 .and. restart22 ) then + call stdlib_qlartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) + + else if( restart21 .and. .not. restart22 ) then + call stdlib_qlartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) + + else if( nu < mu ) then + call stdlib_qlartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) + + else + call stdlib_qlartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) + + end if + work(iu2cs+i-1) = -work(iu2cs+i-1) + work(iu2sn+i-1) = -work(iu2sn+i-1) + temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) + b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) + b11e(i) = temp + if( i < imax - 1 ) then + b11bulge = work(iu1sn+i-1)*b11e(i+1) + b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) + end if + temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) + b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) + b21e(i) = temp + if( i < imax - 1 ) then + b21bulge = work(iu2sn+i-1)*b21e(i+1) + b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) + end if + temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) + b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) + b12d(i) = temp + b12bulge = work(iu1sn+i-1)*b12d(i+1) + b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) + temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) + b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) + b22d(i) = temp + b22bulge = work(iu2sn+i-1)*b22d(i+1) + b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) + end do + ! compute phi(imax-1) + x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) + y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) + y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge + phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) ) + ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) + restart12 = b12d(imax-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(imax-1)**2 + b22bulge**2 <= thresh**2 + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_qlartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& + imax-1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& + imax-1-1), r ) + else if( nu < mu ) then + call stdlib_qlartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& + iv2tsn+imax-1-1) ) + else + call stdlib_qlartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& + iv2tsn+imax-1-1) ) + end if + temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) + b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) + + b12e(imax-1) = temp + temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) + b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) + + b22e(imax-1) = temp + ! update singular vectors + if( wantu1 ) then + if( colmajor ) then + call stdlib_qlasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& + iu1sn+imin-1),u1(1,imin), ldu1 ) + else + call stdlib_qlasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& + iu1sn+imin-1),u1(imin,1), ldu1 ) + end if + end if + if( wantu2 ) then + if( colmajor ) then + call stdlib_qlasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& + iu2sn+imin-1),u2(1,imin), ldu2 ) + else + call stdlib_qlasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& + iu2sn+imin-1),u2(imin,1), ldu2 ) + end if + end if + if( wantv1t ) then + if( colmajor ) then + call stdlib_qlasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& + iv1tsn+imin-1),v1t(imin,1), ldv1t ) + else + call stdlib_qlasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& + iv1tsn+imin-1),v1t(1,imin), ldv1t ) + end if + end if + if( wantv2t ) then + if( colmajor ) then + call stdlib_qlasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& + iv2tsn+imin-1),v2t(imin,1), ldv2t ) + else + call stdlib_qlasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& + iv2tsn+imin-1),v2t(1,imin), ldv2t ) + end if + end if + ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) + if( b11e(imax-1)+b21e(imax-1) > 0 ) then + b11d(imax) = -b11d(imax) + b21d(imax) = -b21d(imax) + if( wantv1t ) then + if( colmajor ) then + call stdlib_qscal( q, negone, v1t(imax,1), ldv1t ) + else + call stdlib_qscal( q, negone, v1t(1,imax), 1 ) + end if + end if + end if + ! compute theta(imax) + x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) + y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) + theta(imax) = atan2( abs(y1), abs(x1) ) + ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), + ! and b22(imax,imax-1) + if( b11d(imax)+b12e(imax-1) < 0 ) then + b12d(imax) = -b12d(imax) + if( wantu1 ) then + if( colmajor ) then + call stdlib_qscal( p, negone, u1(1,imax), 1 ) + else + call stdlib_qscal( p, negone, u1(imax,1), ldu1 ) + end if + end if + end if + if( b21d(imax)+b22e(imax-1) > 0 ) then + b22d(imax) = -b22d(imax) + if( wantu2 ) then + if( colmajor ) then + call stdlib_qscal( m-p, negone, u2(1,imax), 1 ) + else + call stdlib_qscal( m-p, negone, u2(imax,1), ldu2 ) + end if + end if + end if + ! fix signs on b12(imax,imax) and b22(imax,imax) + if( b12d(imax)+b22d(imax) < 0 ) then + if( wantv2t ) then + if( colmajor ) then + call stdlib_qscal( m-q, negone, v2t(imax,1), ldv2t ) + else + call stdlib_qscal( m-q, negone, v2t(1,imax), 1 ) + end if + end if + end if + ! test for negligible sines or cosines + do i = imin, imax + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = imin, imax-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! deflate + if (imax > 1) then + do while( phi(imax-1) == zero ) + imax = imax - 1 + if (imax <= 1) exit + end do + end if + if( imin > imax - 1 )imin = imax - 1 + if (imin > 1) then + do while (phi(imin-1) /= zero) + imin = imin - 1 + if (imin <= 1) exit + end do + end if + ! repeat main iteration loop + end do + ! postprocessing: order theta from least to greatest + do i = 1, q + mini = i + thetamin = theta(i) + do j = i+1, q + if( theta(j) < thetamin ) then + mini = j + thetamin = theta(j) + end if + end do + if( mini /= i ) then + theta(mini) = theta(i) + theta(i) = thetamin + if( colmajor ) then + if( wantu1 )call stdlib_qswap( p, u1(1,i), 1, u1(1,mini), 1 ) + if( wantu2 )call stdlib_qswap( m-p, u2(1,i), 1, u2(1,mini), 1 ) + if( wantv1t )call stdlib_qswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t ) + + if( wantv2t )call stdlib_qswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),ldv2t ) + + else + if( wantu1 )call stdlib_qswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 ) + if( wantu2 )call stdlib_qswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 ) + if( wantv1t )call stdlib_qswap( q, v1t(1,i), 1, v1t(1,mini), 1 ) + if( wantv2t )call stdlib_qswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 ) + end if + end if + end do + return + end subroutine stdlib_qbbcsd + + !> DBDSDC: computes the singular value decomposition (SVD) of a real + !> N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + !> using a divide and conquer method, where S is a diagonal matrix + !> with non-negative diagonal elements (the singular values of B), and + !> U and VT are orthogonal matrices of left and right singular vectors, + !> respectively. DBDSDC can be used to compute all singular values, + !> and optionally, singular vectors or singular vectors in compact form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See DLASD3 for details. + !> The code currently calls DLASDQ if singular values only are desired. + !> However, it can be slightly modified to compute singular values + !> using the divide and conquer method. + + pure subroutine stdlib_qbdsdc( uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq,work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, n + ! Array Arguments + integer(ilp), intent(out) :: iq(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: q(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + ! changed dimension statement in comment describing e from (n) to + ! (n-1). sven, 17 feb 05. + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: difl, difr, givcol, givnum, givptr, i, ic, icompq, ierr, ii, is, iu, & + iuplo, ivt, j, k, kk, mlvl, nm1, nsize, perm, poles, qstart, smlsiz, smlszp, sqre, & + start, wstart, z + real(qp) :: cs, eps, orgnrm, p, r, sn + ! Intrinsic Functions + intrinsic :: abs,real,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + iuplo = 0 + if( stdlib_lsame( uplo, 'U' ) )iuplo = 1 + if( stdlib_lsame( uplo, 'L' ) )iuplo = 2 + if( stdlib_lsame( compq, 'N' ) ) then + icompq = 0 + else if( stdlib_lsame( compq, 'P' ) ) then + icompq = 1 + else if( stdlib_lsame( compq, 'I' ) ) then + icompq = 2 + else + icompq = -1 + end if + if( iuplo==0 ) then + info = -1 + else if( icompq<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ( ldu<1 ) .or. ( ( icompq==2 ) .and. ( ldu=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - start + 1 + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n). solve this 1-by-1 problem + ! first. + nsize = i - start + 1 + if( icompq==2 ) then + u( n, n ) = sign( one, d( n ) ) + vt( n, n ) = one + else if( icompq==1 ) then + q( n+( qstart-1 )*n ) = sign( one, d( n ) ) + q( n+( smlsiz+qstart-1 )*n ) = one + end if + d( n ) = abs( d( n ) ) + end if + if( icompq==2 ) then + call stdlib_qlasd0( nsize, sqre, d( start ), e( start ),u( start, start ), & + ldu, vt( start, start ),ldvt, smlsiz, iwork, work( wstart ), info ) + else + call stdlib_qlasda( icompq, smlsiz, nsize, sqre, d( start ),e( start ), q( & + start+( iu+qstart-2 )*n ), n,q( start+( ivt+qstart-2 )*n ),iq( start+k*n ), q(& + start+( difl+qstart-2 )*n ), q( start+( difr+qstart-2 )*n ),q( start+( z+& + qstart-2 )*n ),q( start+( poles+qstart-2 )*n ),iq( start+givptr*n ), iq( & + start+givcol*n ),n, iq( start+perm*n ),q( start+( givnum+qstart-2 )*n ),q( & + start+( ic+qstart-2 )*n ),q( start+( is+qstart-2 )*n ),work( wstart ), iwork,& + info ) + end if + if( info/=0 ) then + return + end if + start = i + 1 + end if + end do loop_30 + ! unscale + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, ierr ) + 40 continue + ! use selection sort to minimize swaps of singular vectors + do ii = 2, n + i = ii - 1 + kk = i + p = d( i ) + do j = ii, n + if( d( j )>p ) then + kk = j + p = d( j ) + end if + end do + if( kk/=i ) then + d( kk ) = d( i ) + d( i ) = p + if( icompq==1 ) then + iq( i ) = kk + else if( icompq==2 ) then + call stdlib_qswap( n, u( 1, i ), 1, u( 1, kk ), 1 ) + call stdlib_qswap( n, vt( i, 1 ), ldvt, vt( kk, 1 ), ldvt ) + end if + else if( icompq==1 ) then + iq( i ) = i + end if + end do + ! if icompq = 1, use iq(n,1) as the indicator for uplo + if( icompq==1 ) then + if( iuplo==1 ) then + iq( n ) = 1 + else + iq( n ) = 0 + end if + end if + ! if b is lower bidiagonal, update u by those givens rotations + ! which rotated b to be upper bidiagonal + if( ( iuplo==2 ) .and. ( icompq==2 ) )call stdlib_qlasr( 'L', 'V', 'B', n, n, work( 1 )& + , work( n ), u, ldu ) + return + end subroutine stdlib_qbdsdc + + !> DBDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**T + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**T*VT instead of + !> P**T, for given real input matrices U and VT. When U and VT are the + !> orthogonal matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by DGEBRD, then + !> A = (U*Q) * S * (P**T*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**T*C + !> for a given real input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + + pure subroutine stdlib_qbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, work, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: hndrth = 0.01_qp + real(qp), parameter :: hndrd = 100.0_qp + real(qp), parameter :: meigth = -0.125_qp + integer(ilp), parameter :: maxitr = 6 + + + + + + + + + ! Local Scalars + logical(lk) :: lower, rotate + integer(ilp) :: i, idir, isub, iter, iterdivn, j, ll, lll, m, maxitdivn, nm1, nm12, & + nm13, oldll, oldm + real(qp) :: abse, abss, cosl, cosr, cs, eps, f, g, h, mu, oldcs, oldsn, r, shift, & + sigmn, sigmx, sinl, sinr, sll, smax, smin, sminl, sminoa, sn, thresh, tol, tolmul, & + unfl + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.lower ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ncvt<0 ) then + info = -3 + else if( nru<0 ) then + info = -4 + else if( ncc<0 ) then + info = -5 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + ! if no singular vectors desired, use qd algorithm + if( .not.rotate ) then + call stdlib_qlasq1( n, d, e, work, info ) + ! if info equals 2, dqds didn't finish, try to finish + if( info /= 2 ) return + info = 0 + end if + nm1 = n - 1 + nm12 = nm1 + nm1 + nm13 = nm12 + nm1 + idir = 0 + ! get machine constants + eps = stdlib_qlamch( 'EPSILON' ) + unfl = stdlib_qlamch( 'SAFE MINIMUM' ) + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left + if( lower ) then + do i = 1, n - 1 + call stdlib_qlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + work( i ) = cs + work( nm1+i ) = sn + end do + ! update singular vectors if desired + if( nru>0 )call stdlib_qlasr( 'R', 'V', 'F', nru, n, work( 1 ), work( n ), u,ldu ) + + if( ncc>0 )call stdlib_qlasr( 'L', 'V', 'F', n, ncc, work( 1 ), work( n ), c,ldc ) + + end if + ! compute singular values to relative accuracy tol + ! (by setting tol to be negative, algorithm will compute + ! singular values to absolute accuracy abs(tol)*norm(input matrix)) + tolmul = max( ten, min( hndrd, eps**meigth ) ) + tol = tolmul*eps + ! compute approximate maximum, minimum singular values + smax = zero + do i = 1, n + smax = max( smax, abs( d( i ) ) ) + end do + do i = 1, n - 1 + smax = max( smax, abs( e( i ) ) ) + end do + sminl = zero + if( tol>=zero ) then + ! relative accuracy desired + sminoa = abs( d( 1 ) ) + if( sminoa==zero )go to 50 + mu = sminoa + do i = 2, n + mu = abs( d( i ) )*( mu / ( mu+abs( e( i-1 ) ) ) ) + sminoa = min( sminoa, mu ) + if( sminoa==zero )go to 50 + end do + 50 continue + sminoa = sminoa / sqrt( real( n,KIND=qp) ) + thresh = max( tol*sminoa, maxitr*(n*(n*unfl)) ) + else + ! absolute accuracy desired + thresh = max( abs( tol )*smax, maxitr*(n*(n*unfl)) ) + end if + ! prepare for main iteration loop for the singular values + ! (maxit is the maximum number of passes through the inner + ! loop permitted before nonconvergence signalled.) + maxitdivn = maxitr*n + iterdivn = 0 + iter = -1 + oldll = -1 + oldm = -1 + ! m points to last element of unconverged part of matrix + m = n + ! begin main iteration loop + 60 continue + ! check for convergence or exceeding iteration count + if( m<=1 )go to 160 + if( iter>=n ) then + iter = iter - n + iterdivn = iterdivn + 1 + if( iterdivn>=maxitdivn )go to 200 + end if + ! find diagonal block of matrix to work on + if( tol0 )call stdlib_qrot( ncvt, vt( m-1, 1 ), ldvt, vt( m, 1 ), ldvt, cosr,sinr & + ) + if( nru>0 )call stdlib_qrot( nru, u( 1, m-1 ), 1, u( 1, m ), 1, cosl, sinl ) + if( ncc>0 )call stdlib_qrot( ncc, c( m-1, 1 ), ldc, c( m, 1 ), ldc, cosl,sinl ) + + m = m - 2 + go to 60 + end if + ! if working on new submatrix, choose shift direction + ! (from larger end diagonal element towards smaller) + if( ll>oldm .or. m=abs( d( m ) ) ) then + ! chase bulge from top (big end) to bottom (small end) + idir = 1 + else + ! chase bulge from bottom (big end) to top (small end) + idir = 2 + end if + end if + ! apply convergence tests + if( idir==1 ) then + ! run convergence test in forward direction + ! first apply standard test to bottom of matrix + if( abs( e( m-1 ) )<=abs( tol )*abs( d( m ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion forward + mu = abs( d( ll ) ) + sminl = mu + do lll = ll, m - 1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll+1 ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + else + ! run convergence test in backward direction + ! first apply standard test to top of matrix + if( abs( e( ll ) )<=abs( tol )*abs( d( ll ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion backward + mu = abs( d( m ) ) + sminl = mu + do lll = m - 1, ll, -1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + end if + oldll = ll + oldm = m + ! compute shift. first, test if shifting would ruin relative + ! accuracy, and if so set the shift to zero. + if( tol>=zero .and. n*tol*( sminl / smax )<=max( eps, hndrth*tol ) ) then + ! use a zero shift to avoid loss of relative accuracy + shift = zero + else + ! compute the shift from 2-by-2 block at end of matrix + if( idir==1 ) then + sll = abs( d( ll ) ) + call stdlib_qlas2( d( m-1 ), e( m-1 ), d( m ), shift, r ) + else + sll = abs( d( m ) ) + call stdlib_qlas2( d( ll ), e( ll ), d( ll+1 ), shift, r ) + end if + ! test if shift negligible, and if so set to zero + if( sll>zero ) then + if( ( shift / sll )**2ll )e( i-1 ) = oldsn*r + call stdlib_qlartg( oldcs*r, d( i+1 )*sn, oldcs, oldsn, d( i ) ) + work( i-ll+1 ) = cs + work( i-ll+1+nm1 ) = sn + work( i-ll+1+nm12 ) = oldcs + work( i-ll+1+nm13 ) = oldsn + end do + h = d( m )*cs + d( m ) = h*oldcs + e( m-1 ) = h*oldsn + ! update singular vectors + if( ncvt>0 )call stdlib_qlasr( 'L', 'V', 'F', m-ll+1, ncvt, work( 1 ),work( n ), & + vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_qlasr( 'R', 'V', 'F', nru, m-ll+1, work( nm12+1 ),work( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_qlasr( 'L', 'V', 'F', m-ll+1, ncc, work( nm12+1 ),work( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + cs = one + oldcs = one + do i = m, ll + 1, -1 + call stdlib_qlartg( d( i )*cs, e( i-1 ), cs, sn, r ) + if( i0 )call stdlib_qlasr( 'L', 'V', 'B', m-ll+1, ncvt, work( nm12+1 ),work( & + nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_qlasr( 'R', 'V', 'B', nru, m-ll+1, work( 1 ),work( n ), u(& + 1, ll ), ldu ) + if( ncc>0 )call stdlib_qlasr( 'L', 'V', 'B', m-ll+1, ncc, work( 1 ),work( n ), c(& + ll, 1 ), ldc ) + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + end if + else + ! use nonzero shift + if( idir==1 ) then + ! chase bulge from top to bottom + ! save cosines and sines for later singular vector updates + f = ( abs( d( ll ) )-shift )*( sign( one, d( ll ) )+shift / d( ll ) ) + g = e( ll ) + do i = ll, m - 1 + call stdlib_qlartg( f, g, cosr, sinr, r ) + if( i>ll )e( i-1 ) = r + f = cosr*d( i ) + sinr*e( i ) + e( i ) = cosr*e( i ) - sinr*d( i ) + g = sinr*d( i+1 ) + d( i+1 ) = cosr*d( i+1 ) + call stdlib_qlartg( f, g, cosl, sinl, r ) + d( i ) = r + f = cosl*e( i ) + sinl*d( i+1 ) + d( i+1 ) = cosl*d( i+1 ) - sinl*e( i ) + if( i0 )call stdlib_qlasr( 'L', 'V', 'F', m-ll+1, ncvt, work( 1 ),work( n ), & + vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_qlasr( 'R', 'V', 'F', nru, m-ll+1, work( nm12+1 ),work( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_qlasr( 'L', 'V', 'F', m-ll+1, ncc, work( nm12+1 ),work( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + f = ( abs( d( m ) )-shift )*( sign( one, d( m ) )+shift /d( m ) ) + g = e( m-1 ) + do i = m, ll + 1, -1 + call stdlib_qlartg( f, g, cosr, sinr, r ) + if( ill+1 ) then + g = sinl*e( i-2 ) + e( i-2 ) = cosl*e( i-2 ) + end if + work( i-ll ) = cosr + work( i-ll+nm1 ) = -sinr + work( i-ll+nm12 ) = cosl + work( i-ll+nm13 ) = -sinl + end do + e( ll ) = f + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + ! update singular vectors if desired + if( ncvt>0 )call stdlib_qlasr( 'L', 'V', 'B', m-ll+1, ncvt, work( nm12+1 ),work( & + nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_qlasr( 'R', 'V', 'B', nru, m-ll+1, work( 1 ),work( n ), u(& + 1, ll ), ldu ) + if( ncc>0 )call stdlib_qlasr( 'L', 'V', 'B', m-ll+1, ncc, work( 1 ),work( n ), c(& + ll, 1 ), ldc ) + end if + end if + ! qr iteration finished, go back and check convergence + go to 60 + ! all singular values converged, so make them positive + 160 continue + do i = 1, n + if( d( i )0 )call stdlib_qscal( ncvt, negone, vt( i, 1 ), ldvt ) + end if + end do + ! sort the singular values into decreasing order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n - 1 + ! scan for smallest d(i) + isub = 1 + smin = d( 1 ) + do j = 2, n + 1 - i + if( d( j )<=smin ) then + isub = j + smin = d( j ) + end if + end do + if( isub/=n+1-i ) then + ! swap singular values and vectors + d( isub ) = d( n+1-i ) + d( n+1-i ) = smin + if( ncvt>0 )call stdlib_qswap( ncvt, vt( isub, 1 ), ldvt, vt( n+1-i, 1 ),ldvt ) + + if( nru>0 )call stdlib_qswap( nru, u( 1, isub ), 1, u( 1, n+1-i ), 1 ) + if( ncc>0 )call stdlib_qswap( ncc, c( isub, 1 ), ldc, c( n+1-i, 1 ), ldc ) + + end if + end do + go to 220 + ! maximum number of iterations exceeded, failure to converge + 200 continue + info = 0 + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + 220 continue + return + end subroutine stdlib_qbdsqr + + !> DDISNA: computes the reciprocal condition numbers for the eigenvectors + !> of a real symmetric or complex Hermitian matrix or for the left or + !> right singular vectors of a general m-by-n matrix. The reciprocal + !> condition number is the 'gap' between the corresponding eigenvalue or + !> singular value and the nearest other one. + !> The bound on the error, measured by angle in radians, in the I-th + !> computed vector is given by + !> DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) + !> where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed + !> to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of + !> the error bound. + !> DDISNA may also be used to compute error bounds for eigenvectors of + !> the generalized symmetric definite eigenproblem. + + pure subroutine stdlib_qdisna( job, m, n, d, sep, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: m, n + ! Array Arguments + real(qp), intent(in) :: d(*) + real(qp), intent(out) :: sep(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: decr, eigen, incr, left, right, sing + integer(ilp) :: i, k + real(qp) :: anorm, eps, newgap, oldgap, safmin, thresh + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + eigen = stdlib_lsame( job, 'E' ) + left = stdlib_lsame( job, 'L' ) + right = stdlib_lsame( job, 'R' ) + sing = left .or. right + if( eigen ) then + k = m + else if( sing ) then + k = min( m, n ) + end if + if( .not.eigen .and. .not.sing ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( k<0 ) then + info = -3 + else + incr = .true. + decr = .true. + do i = 1, k - 1 + if( incr )incr = incr .and. d( i )<=d( i+1 ) + if( decr )decr = decr .and. d( i )>=d( i+1 ) + end do + if( sing .and. k>0 ) then + if( incr )incr = incr .and. zero<=d( 1 ) + if( decr )decr = decr .and. d( k )>=zero + end if + if( .not.( incr .or. decr ) )info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DDISNA', -info ) + return + end if + ! quick return if possible + if( k==0 )return + ! compute reciprocal condition numbers + if( k==1 ) then + sep( 1 ) = stdlib_qlamch( 'O' ) + else + oldgap = abs( d( 2 )-d( 1 ) ) + sep( 1 ) = oldgap + do i = 2, k - 1 + newgap = abs( d( i+1 )-d( i ) ) + sep( i ) = min( oldgap, newgap ) + oldgap = newgap + end do + sep( k ) = oldgap + end if + if( sing ) then + if( ( left .and. m>n ) .or. ( right .and. m DGBBRD: reduces a real general m-by-n band matrix A to upper + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> The routine computes B, and optionally forms Q or P**T, or computes + !> Q**T*C for a given matrix C. + + pure subroutine stdlib_qgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & + ldc, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc + ! Array Arguments + real(qp), intent(inout) :: ab(ldab,*), c(ldc,*) + real(qp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantb, wantc, wantpt, wantq + integer(ilp) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,& + mu, mu0, nr, nrt + real(qp) :: ra, rb, rc, rs + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + wantb = stdlib_lsame( vect, 'B' ) + wantq = stdlib_lsame( vect, 'Q' ) .or. wantb + wantpt = stdlib_lsame( vect, 'P' ) .or. wantb + wantc = ncc>0 + klu1 = kl + ku + 1 + info = 0 + if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncc<0 ) then + info = -4 + else if( kl<0 ) then + info = -5 + else if( ku<0 ) then + info = -6 + else if( ldab1 ) then + ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce + ! first to lower bidiagonal form and then transform to upper + ! bidiagonal + if( ku>0 ) then + ml0 = 1 + mu0 = 2 + else + ml0 = 2 + mu0 = 1 + end if + ! wherever possible, plane rotations are generated and applied in + ! vector operations of length nr over the index set j1:j2:klu1. + ! the sines of the plane rotations are stored in work(1:max(m,n)) + ! and the cosines in work(max(m,n)+1:2*max(m,n)). + mn = max( m, n ) + klm = min( m-1, kl ) + kun = min( n-1, ku ) + kb = klm + kun + kb1 = kb + 1 + inca = kb1*ldab + nr = 0 + j1 = klm + 2 + j2 = 1 - kun + loop_90: do i = 1, minmn + ! reduce i-th column and i-th row of matrix to bidiagonal form + ml = klm + 1 + mu = kun + 1 + loop_80: do kk = 1, kb + j1 = j1 + kb + j2 = j2 + kb + ! generate plane rotations to annihilate nonzero elements + ! which have been created below the band + if( nr>0 )call stdlib_qlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & + work( mn+j1 ), kb1 ) + ! apply plane rotations from the left + do l = 1, kb + if( j2-klm+l-1>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_qlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & + klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 ) + end do + if( ml>ml0 ) then + if( ml<=m-i+1 ) then + ! generate plane rotation to annihilate a(i+ml-1,i) + ! within the band, and apply rotation from the left + call stdlib_qlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), & + work( i+ml-1 ),ra ) + ab( ku+ml-1, i ) = ra + if( in ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j-1,j+ku) above the band + ! and store it in work(n+1:2*n) + work( j+kun ) = work( j )*ab( 1, j+kun ) + ab( 1, j+kun ) = work( mn+j )*ab( 1, j+kun ) + end do + ! generate plane rotations to annihilate nonzero elements + ! which have been generated above the band + if( nr>0 )call stdlib_qlargv( nr, ab( 1, j1+kun-1 ), inca,work( j1+kun ), kb1,& + work( mn+j1+kun ),kb1 ) + ! apply plane rotations from the right + do l = 1, kb + if( j2+l-1>m ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_qlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& + kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 ) + end do + if( ml==ml0 .and. mu>mu0 ) then + if( mu<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+mu-1) + ! within the band, and apply rotation from the right + call stdlib_qlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( & + mn+i+mu-1 ), work( i+mu-1 ),ra ) + ab( ku-mu+3, i+mu-2 ) = ra + call stdlib_qrot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1,ab( ku-& + mu+3, i+mu-1 ), 1,work( mn+i+mu-1 ), work( i+mu-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kb1 + end if + if( wantpt ) then + ! accumulate product of plane rotations in p**t + do j = j1, j2, kb1 + call stdlib_qrot( n, pt( j+kun-1, 1 ), ldpt,pt( j+kun, 1 ), ldpt, work( & + mn+j+kun ),work( j+kun ) ) + end do + end if + if( j2+kb>m ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j+kl+ku,j+ku-1) below the + ! band and store it in work(1:n) + work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) + ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun ) + end do + if( ml>ml0 ) then + ml = ml - 1 + else + mu = mu - 1 + end if + end do loop_80 + end do loop_90 + end if + if( ku==0 .and. kl>0 ) then + ! a has been reduced to lower bidiagonal form + ! transform lower bidiagonal form to upper bidiagonal by applying + ! plane rotations from the left, storing diagonal elements in d + ! and off-diagonal elements in e + do i = 1, min( m-1, n ) + call stdlib_qlartg( ab( 1, i ), ab( 2, i ), rc, rs, ra ) + d( i ) = ra + if( i0 ) then + ! a has been reduced to upper bidiagonal form + if( m1 ) then + rb = -rs*ab( ku, i ) + e( i-1 ) = rc*ab( ku, i ) + end if + if( wantpt )call stdlib_qrot( n, pt( i, 1 ), ldpt, pt( m+1, 1 ), ldpt,rc, rs ) + + end do + else + ! copy off-diagonal elements to e and diagonal elements to d + do i = 1, minmn - 1 + e( i ) = ab( ku, i+1 ) + end do + do i = 1, minmn + d( i ) = ab( ku+1, i ) + end do + end if + else + ! a is diagonal. set elements of e to zero and copy diagonal + ! elements to d. + do i = 1, minmn - 1 + e( i ) = zero + end do + do i = 1, minmn + d( i ) = ab( 1, i ) + end do + end if + return + end subroutine stdlib_qgbbrd + + !> DGBCON: estimates the reciprocal of the condition number of a real + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by DGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_qgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, onenrm + character :: normin + integer(ilp) :: ix, j, jp, kase, kase1, kd, lm + real(qp) :: ainvnm, scale, smlnum, t + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,min + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( anorm0 + kase = 0 + 10 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(l). + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + jp = ipiv( j ) + t = work( jp ) + if( jp/=j ) then + work( jp ) = work( j ) + work( j ) = t + end if + call stdlib_qaxpy( lm, -t, ab( kd+1, j ), 1, work( j+1 ), 1 ) + end do + end if + ! multiply by inv(u). + call stdlib_qlatbs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, & + ldab, work, scale, work( 2*n+1 ),info ) + else + ! multiply by inv(u**t). + call stdlib_qlatbs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, ldab, & + work, scale, work( 2*n+1 ),info ) + ! multiply by inv(l**t). + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + work( j ) = work( j ) - stdlib_qdot( lm, ab( kd+1, j ), 1,work( j+1 ), 1 ) + + jp = ipiv( j ) + if( jp/=j ) then + t = work( jp ) + work( jp ) = work( j ) + work( j ) = t + end if + end do + end if + end if + ! divide x by 1/scale if doing so will not cause overflow. + normin = 'Y' + if( scale/=one ) then + ix = stdlib_iqamax( n, work, 1 ) + if( scale DGBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_qgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(qp) :: bignum, rcmax, rcmin, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab DGBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from DGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_qgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(qp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + ! Intrinsic Functions + intrinsic :: abs,max,min,log + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabzero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = max( j-ku, 1 ), min( j+kl, m ) + c( j ) = max( c( j ), abs( ab( kd+i-j, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_qgbequb + + !> DGBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_qgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, x, & + ldx, ferr, berr, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*) + real(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transt + integer(ilp) :: count, i, j, k, kase, kk, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldabsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_qgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, info ) + + call stdlib_qaxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_qgbtrs( transt, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, & + info ) + do i = 1, n + work( n+i ) = work( n+i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( n+i )*work( i ) + end do + call stdlib_qgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, & + info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_qgbrfs + + !> DGBSV: computes the solution to a real system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_qgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( kl<0 ) then + info = -2 + else if( ku<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( ldb DGBSVX: uses the LU factorization to compute the solution to a real + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a band matrix of order N with KL subdiagonals and KU + !> superdiagonals, and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_qgbsvx( fact, trans, n, kl, ku, nrhs, ab, ldab, afb,ldafb, ipiv, equed, r, & + c, b, ldb, x, ldx,rcond, ferr, berr, work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*), c(*), r(*) + real(qp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j, j1, j2 + real(qp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kl<0 ) then + info = -4 + else if( ku<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -14 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + anorm = zero + do j = 1, info + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + anorm = max( anorm, abs( ab( i, j ) ) ) + end do + end do + rpvgrw = stdlib_qlantb( 'M', 'U', 'N', info, min( info-1, kl+ku ),afb( max( 1, & + kl+ku+2-info ), 1 ), ldafb,work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = anorm / rpvgrw + end if + work( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_qlangb( norm, n, kl, ku, ab, ldab, work ) + rpvgrw = stdlib_qlantb( 'M', 'U', 'N', n, kl+ku, afb, ldafb, work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_qlangb( 'M', n, kl, ku, ab, ldab, work ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_qgbcon( norm, n, kl, ku, afb, ldafb, ipiv, anorm, rcond,work, iwork, info ) + + ! compute the solution matrix x. + call stdlib_qlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_qgbtrs( trans, n, kl, ku, nrhs, afb, ldafb, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_qgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb, ipiv,b, ldb, x, ldx, & + ferr, berr, work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DGBTF2: computes an LU factorization of a real m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_qgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jp, ju, km, kv + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in. + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab0 ) then + ! compute multipliers. + call stdlib_qscal( km, one / ab( kv+1, j ), ab( kv+2, j ), 1 ) + ! update trailing submatrix within the band. + if( ju>j )call stdlib_qger( km, ju-j, -one, ab( kv+2, j ), 1,ab( kv, j+1 ), & + ldab-1, ab( kv+1, j+1 ),ldab-1 ) + end if + else + ! if pivot is zero, set info to the index of the pivot + ! unless a zero pivot has already been found. + if( info==0 )info = j + end if + end do loop_40 + return + end subroutine stdlib_qgbtf2 + + !> DGBTRF: computes an LU factorization of a real m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_qgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp, ju, k2, km, kv, nb, & + nw + real(qp) :: temp + ! Local Arrays + real(qp) :: work13(ldwork,nbmax), work31(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabkl ) then + ! use unblocked code + call stdlib_qgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + else + ! use blocked code + ! zero the superdiagonal elements of the work array work13 + do j = 1, nb + do i = 1, j - 1 + work13( i, j ) = zero + end do + end do + ! zero the subdiagonal elements of the work array work31 + do j = 1, nb + do i = j + 1, nb + work31( i, j ) = zero + end do + end do + ! gaussian elimination with partial pivoting + ! set fill-in elements in columns ku+2 to kv to zero + do j = ku + 2, min( kv, n ) + do i = kv - j + 2, kl + ab( i, j ) = zero + end do + end do + ! ju is the index of the last column affected by the current + ! stage of the factorization + ju = 1 + loop_180: do j = 1, min( m, n ), nb + jb = min( nb, min( m, n )-j+1 ) + ! the active part of the matrix is partitioned + ! a11 a12 a13 + ! a21 a22 a23 + ! a31 a32 a33 + ! here a11, a21 and a31 denote the current block of jb columns + ! which is about to be factorized. the number of rows in the + ! partitioning are jb, i2, i3 respectively, and the numbers + ! of columns are jb, j2, j3. the superdiagonal elements of a13 + ! and the subdiagonal elements of a31 lie outside the band. + i2 = min( kl-jb, m-j-jb+1 ) + i3 = min( jb, m-j-kl+1 ) + ! j2 and j3 are computed after ju has been updated. + ! factorize the current block of jb columns + loop_80: do jj = j, j + jb - 1 + ! set fill-in elements in column jj+kv to zero + if( jj+kv<=n ) then + do i = 1, kl + ab( i, jj+kv ) = zero + end do + end if + ! find pivot and test for singularity. km is the number of + ! subdiagonal elements in the current column. + km = min( kl, m-jj ) + jp = stdlib_iqamax( km+1, ab( kv+1, jj ), 1 ) + ipiv( jj ) = jp + jj - j + if( ab( kv+jp, jj )/=zero ) then + ju = max( ju, min( jj+ku+jp-1, n ) ) + if( jp/=1 ) then + ! apply interchange to columns j to j+jb-1 + if( jp+jj-1jj )call stdlib_qger( km, jm-jj, -one, ab( kv+2, jj ), 1,ab( kv, jj+& + 1 ), ldab-1,ab( kv+1, jj+1 ), ldab-1 ) + else + ! if pivot is zero, set info to the index of the pivot + ! unless a zero pivot has already been found. + if( info==0 )info = jj + end if + ! copy current column of a31 into the work array work31 + nw = min( jj-j+1, i3 ) + if( nw>0 )call stdlib_qcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,work31( 1, jj-j+1 )& + , 1 ) + end do loop_80 + if( j+jb<=n ) then + ! apply the row interchanges to the other blocks. + j2 = min( ju-j+1, kv ) - jb + j3 = max( 0, ju-j-kv+1 ) + ! use stdlib_qlaswp to apply the row interchanges to a12, a22, and + ! a32. + call stdlib_qlaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,ipiv( j ), 1 ) + + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + ! apply the row interchanges to a13, a23, and a33 + ! columnwise. + k2 = j - 1 + jb + j2 + do i = 1, j3 + jj = k2 + i + do ii = j + i - 1, j + jb - 1 + ip = ipiv( ii ) + if( ip/=ii ) then + temp = ab( kv+1+ii-jj, jj ) + ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj ) + ab( kv+1+ip-jj, jj ) = temp + end if + end do + end do + ! update the relevant part of the trailing submatrix + if( j2>0 ) then + ! update a12 + call stdlib_qtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j2, one, ab(& + kv+1, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1 ) + if( i2>0 ) then + ! update a22 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j2,jb, -one, ab( & + kv+1+jb, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1, one,ab( kv+1, j+jb ), & + ldab-1 ) + end if + if( i3>0 ) then + ! update a32 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j2,jb, -one, & + work31, ldwork,ab( kv+1-jb, j+jb ), ldab-1, one,ab( kv+kl+1-jb, j+jb ), & + ldab-1 ) + end if + end if + if( j3>0 ) then + ! copy the lower triangle of a13 into the work array + ! work13 + do jj = 1, j3 + do ii = jj, jb + work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 ) + end do + end do + ! update a13 in the work array + call stdlib_qtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j3, one, ab(& + kv+1, j ), ldab-1,work13, ldwork ) + if( i2>0 ) then + ! update a23 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j3,jb, -one, ab( & + kv+1+jb, j ), ldab-1,work13, ldwork, one, ab( 1+jb, j+kv ),ldab-1 ) + + end if + if( i3>0 ) then + ! update a33 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j3,jb, -one, & + work31, ldwork, work13,ldwork, one, ab( 1+kl, j+kv ), ldab-1 ) + end if + ! copy the lower triangle of a13 back into place + do jj = 1, j3 + do ii = jj, jb + ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj ) + end do + end do + end if + else + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + end if + ! partially undo the interchanges in the current block to + ! restore the upper triangular form of a31 and copy the upper + ! triangle of a31 back into place + do jj = j + jb - 1, j, -1 + jp = ipiv( jj ) - jj + 1 + if( jp/=1 ) then + ! apply interchange to columns j to jj-1 + if( jp+jj-10 )call stdlib_qcopy( nw, work31( 1, jj-j+1 ), 1,ab( kv+kl+1-jj+j, jj )& + , 1 ) + end do + end do loop_180 + end if + return + end subroutine stdlib_qgbtrf + + !> DGBTRS: solves a system of linear equations + !> A * X = B or A**T * X = B + !> with a general band matrix A using the LU factorization computed + !> by DGBTRF. + + pure subroutine stdlib_qgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, notran + integer(ilp) :: i, j, kd, l, lm + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab<( 2*kl+ku+1 ) ) then + info = -7 + else if( ldb0 + if( notran ) then + ! solve a*x = b. + ! solve l*x = b, overwriting b with x. + ! l is represented as a product of permutations and unit lower + ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1), + ! where each transformation l(i) is a rank-one modification of + ! the identity matrix. + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + l = ipiv( j ) + if( l/=j )call stdlib_qswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + call stdlib_qger( lm, nrhs, -one, ab( kd+1, j ), 1, b( j, 1 ),ldb, b( j+1, 1 )& + , ldb ) + end do + end if + do i = 1, nrhs + ! solve u*x = b, overwriting b with x. + call stdlib_qtbsv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, kl+ku,ab, ldab, b( 1, & + i ), 1 ) + end do + else + ! solve a**t*x = b. + do i = 1, nrhs + ! solve u**t*x = b, overwriting b with x. + call stdlib_qtbsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, kl+ku, ab,ldab, b( 1, i )& + , 1 ) + end do + ! solve l**t*x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_qgemv( 'TRANSPOSE', lm, nrhs, -one, b( j+1, 1 ),ldb, ab( kd+1, j )& + , 1, one, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_qswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_qgbtrs + + !> DGEBAK: forms the right or left eigenvectors of a real general matrix + !> by backward transformation on the computed eigenvectors of the + !> balanced matrix output by DGEBAL. + + pure subroutine stdlib_qgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(in) :: scale(*) + real(qp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, ii, k + real(qp) :: s + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! decode and test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( m<0 ) then + info = -7 + else if( ldv=ilo .and. i<=ihi )cycle loop_40 + if( i=ilo .and. i<=ihi )cycle loop_50 + if( i DGEBAL: balances a general real matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + + pure subroutine stdlib_qgebal( job, n, a, lda, ilo, ihi, scale, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: scale(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sclfac = 2.0e+0_qp + real(qp), parameter :: factor = 0.95e+0_qp + + + + ! Local Scalars + logical(lk) :: noconv + integer(ilp) :: i, ica, iexc, ira, j, k, l, m + real(qp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2 + ! Intrinsic Functions + intrinsic :: abs,max,min + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=g .or. max( f, c, ca )>=sfmax2 .or.min( r, g, ra )<=sfmin2 )go to 170 + if( stdlib_qisnan( c+f+ca+r+g+ra ) ) then + ! exit if nan to avoid infinite loop + info = -3 + call stdlib_xerbla( 'DGEBAL', -info ) + return + end if + f = f*sclfac + c = c*sclfac + ca = ca*sclfac + r = r / sclfac + g = g / sclfac + ra = ra / sclfac + go to 160 + 170 continue + g = c / sclfac + 180 continue + if( g=sfmax2 .or.min( f, c, g, ca )<=sfmin2 )go to 190 + f = f / sclfac + c = c / sclfac + g = g / sclfac + ca = ca / sclfac + r = r*sclfac + ra = ra*sclfac + go to 180 + ! now balance. + 190 continue + if( ( c+r )>=factor*s )cycle loop_200 + if( fone .and. scale( i )>one ) then + if( scale( i )>=sfmax1 / f )cycle loop_200 + end if + g = one / f + scale( i ) = scale( i )*f + noconv = .true. + call stdlib_qscal( n-k+1, g, a( i, k ), lda ) + call stdlib_qscal( l, f, a( 1, i ), 1 ) + end do loop_200 + if( noconv )go to 140 + 210 continue + ilo = k + ihi = l + return + end subroutine stdlib_qgebal + + !> DGEBD2: reduces a real general m by n matrix A to upper or lower + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_qgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! reduce to upper bidiagonal form + do i = 1, n + ! generate elementary reflector h(i) to annihilate a(i+1:m,i) + call stdlib_qlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,tauq( i ) ) + + d( i ) = a( i, i ) + a( i, i ) = one + ! apply h(i) to a(i:m,i+1:n) from the left + if( i DGEBRD: reduces a general real M-by-N matrix A to upper or lower + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_qgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb = max( 1, stdlib_ilaenv( 1, 'DGEBRD', ' ', m, n, -1, -1 ) ) + lwkopt = ( m+n )*nb + work( 1 ) = real( lwkopt,KIND=qp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=( m+n )*nbmin ) then + nb = lwork / ( m+n ) + else + nb = 1 + nx = minmn + end if + end if + end if + else + nx = minmn + end if + do i = 1, minmn - nx, nb + ! reduce rows and columns i:i+nb-1 to bidiagonal form and return + ! the matrices x and y which are needed to update the unreduced + ! part of the matrix + call stdlib_qlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & + taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) + ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update + ! of the form a := a - v*y**t - x*u**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+& + nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda ) + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, & + work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda ) + ! copy diagonal and off-diagonal elements of b back into a + if( m>=n ) then + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j, j+1 ) = e( j ) + end do + else + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j+1, j ) = e( j ) + end do + end if + end do + ! use unblocked code to reduce the remainder of the matrix + call stdlib_qgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & + work, iinfo ) + work( 1 ) = ws + return + end subroutine stdlib_qgebrd + + !> DGECON: estimates the reciprocal of the condition number of a general + !> real matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by DGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_qgecon( norm, n, a, lda, anorm, rcond, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, scale, sl, smlnum, su + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_qgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: bignum, rcmax, rcmin, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from DGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_qgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + ! Intrinsic Functions + intrinsic :: abs,max,min,log + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldazero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = 1, m + c( j ) = max( c( j ), abs( a( i, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_qgeequb + + !> DGEES: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues, the real Schur form T, and, optionally, the matrix of + !> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> real Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A matrix is in real Schur form if it is upper quasi-triangular with + !> 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the + !> form + !> [ a b ] + !> [ c a ] + !> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). + + subroutine stdlib_qgees( jobvs, sort, select, n, a, lda, sdim, wr, wi,vs, ldvs, work, lwork, & + bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: vs(ldvs,*), wi(*), work(*), wr(*) + ! Function Arguments + procedure(stdlib_select_q) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, lastsl, lquery, lst2sl, scalea, wantst, wantvs + integer(ilp) :: hswork, i, i1, i2, ibal, icond, ierr, ieval, ihi, ilo, inxt, ip, itau, & + iwrk, maxwrk, minwrk + real(qp) :: anrm, bignum, cscale, eps, s, sep, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_qlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need n) + ibal = 1 + call stdlib_qgebal( 'P', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (workspace: need 3*n, prefer 2*n+n*nb) + itau = n + ibal + iwrk = n + itau + call stdlib_qgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_qlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate orthogonal matrix in vs + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_qorghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'S', jobvs, n, ilo, ihi, a, lda, wr, wi, vs, ldvs,work( iwrk ), & + lwork-iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea ) then + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n, 1, wr, n, ierr ) + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n, 1, wi, n, ierr ) + end if + do i = 1, n + bwork( i ) = select( wr( i ), wi( i ) ) + end do + ! reorder eigenvalues and transform schur vectors + ! (workspace: none needed) + call stdlib_qtrsen( 'N', jobvs, bwork, n, a, lda, vs, ldvs, wr, wi,sdim, s, sep, & + work( iwrk ), lwork-iwrk+1, idum, 1,icond ) + if( icond>0 )info = n + icond + end if + if( wantvs ) then + ! undo balancing + ! (workspace: need n) + call stdlib_qgebak( 'P', 'R', n, ilo, ihi, work( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_qlascl( 'H', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_qcopy( n, a, lda+1, wr, 1 ) + if( cscale==smlnum ) then + ! if scaling back towards underflow, adjust wi if an + ! offdiagonal element of a 2-by-2 block in the schur form + ! underflows. + if( ieval>0 ) then + i1 = ieval + 1 + i2 = ihi - 1 + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi,max( ilo-1, 1 ), & + ierr ) + else if( wantst ) then + i1 = 1 + i2 = n - 1 + else + i1 = ilo + i2 = ihi - 1 + end if + inxt = i1 - 1 + loop_20: do i = i1, i2 + if( i1 )call stdlib_qswap( i-1, a( 1, i ), 1, a( 1, i+1 ), 1 ) + if( n>i+1 )call stdlib_qswap( n-i-1, a( i, i+2 ), lda,a( i+1, i+2 ), & + lda ) + if( wantvs ) then + call stdlib_qswap( n, vs( 1, i ), 1, vs( 1, i+1 ), 1 ) + end if + a( i, i+1 ) = a( i+1, i ) + a( i+1, i ) = zero + end if + inxt = i + 2 + end if + end do loop_20 + end if + ! undo scaling for the imaginary part of the eigenvalues + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n-ieval, 1,wi( ieval+1 ), max( n-ieval,& + 1 ), ierr ) + end if + if( wantst .and. info==0 ) then + ! check if reordering successful + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = select( wr( i ), wi( i ) ) + if( wi( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_qgees + + !> DGEESX: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues, the real Schur form T, and, optionally, the matrix of + !> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> real Schur form so that selected eigenvalues are at the top left; + !> computes a reciprocal condition number for the average of the + !> selected eigenvalues (RCONDE); and computes a reciprocal condition + !> number for the right invariant subspace corresponding to the + !> selected eigenvalues (RCONDV). The leading columns of Z form an + !> orthonormal basis for this invariant subspace. + !> For further explanation of the reciprocal condition numbers RCONDE + !> and RCONDV, see Section 4.10_qp of the LAPACK Users' Guide (where + !> these quantities are called s and sep respectively). + !> A real matrix is in real Schur form if it is upper quasi-triangular + !> with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in + !> the form + !> [ a b ] + !> [ c a ] + !> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). + + subroutine stdlib_qgeesx( jobvs, sort, select, sense, n, a, lda, sdim,wr, wi, vs, ldvs, & + rconde, rcondv, work, lwork,iwork, liwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, liwork, lwork, n + real(qp), intent(out) :: rconde, rcondv + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: vs(ldvs,*), wi(*), work(*), wr(*) + ! Function Arguments + procedure(stdlib_select_q) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, lastsl, lquery, lst2sl, scalea, wantsb, wantse, wantsn, wantst, & + wantsv, wantvs + integer(ilp) :: hswork, i, i1, i2, ibal, icond, ierr, ieval, ihi, ilo, inxt, ip, itau, & + iwrk, liwrk, lwrk, maxwrk, minwrk + real(qp) :: anrm, bignum, cscale, eps, smlnum + ! Local Arrays + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_qlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (rworkspace: need n) + ibal = 1 + call stdlib_qgebal( 'P', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (rworkspace: need 3*n, prefer 2*n+n*nb) + itau = n + ibal + iwrk = n + itau + call stdlib_qgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_qlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate orthogonal matrix in vs + ! (rworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_qorghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (rworkspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'S', jobvs, n, ilo, ihi, a, lda, wr, wi, vs, ldvs,work( iwrk ), & + lwork-iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea ) then + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n, 1, wr, n, ierr ) + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n, 1, wi, n, ierr ) + end if + do i = 1, n + bwork( i ) = select( wr( i ), wi( i ) ) + end do + ! reorder eigenvalues, transform schur vectors, and compute + ! reciprocal condition numbers + ! (rworkspace: if sense is not 'n', need n+2*sdim*(n-sdim) + ! otherwise, need n ) + ! (iworkspace: if sense is 'v' or 'b', need sdim*(n-sdim) + ! otherwise, need 0 ) + call stdlib_qtrsen( sense, jobvs, bwork, n, a, lda, vs, ldvs, wr, wi,sdim, rconde, & + rcondv, work( iwrk ), lwork-iwrk+1,iwork, liwork, icond ) + if( .not.wantsn )maxwrk = max( maxwrk, n+2*sdim*( n-sdim ) ) + if( icond==-15 ) then + ! not enough real workspace + info = -16 + else if( icond==-17 ) then + ! not enough integer workspace + info = -18 + else if( icond>0 ) then + ! stdlib_qtrsen failed to reorder or to restore standard schur form + info = icond + n + end if + end if + if( wantvs ) then + ! undo balancing + ! (rworkspace: need n) + call stdlib_qgebak( 'P', 'R', n, ilo, ihi, work( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_qlascl( 'H', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_qcopy( n, a, lda+1, wr, 1 ) + if( ( wantsv .or. wantsb ) .and. info==0 ) then + dum( 1 ) = rcondv + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + rcondv = dum( 1 ) + end if + if( cscale==smlnum ) then + ! if scaling back towards underflow, adjust wi if an + ! offdiagonal element of a 2-by-2 block in the schur form + ! underflows. + if( ieval>0 ) then + i1 = ieval + 1 + i2 = ihi - 1 + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + else if( wantst ) then + i1 = 1 + i2 = n - 1 + else + i1 = ilo + i2 = ihi - 1 + end if + inxt = i1 - 1 + loop_20: do i = i1, i2 + if( i1 )call stdlib_qswap( i-1, a( 1, i ), 1, a( 1, i+1 ), 1 ) + if( n>i+1 )call stdlib_qswap( n-i-1, a( i, i+2 ), lda,a( i+1, i+2 ), & + lda ) + if( wantvs ) then + call stdlib_qswap( n, vs( 1, i ), 1, vs( 1, i+1 ), 1 ) + end if + a( i, i+1 ) = a( i+1, i ) + a( i+1, i ) = zero + end if + inxt = i + 2 + end if + end do loop_20 + end if + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n-ieval, 1,wi( ieval+1 ), max( n-ieval,& + 1 ), ierr ) + end if + if( wantst .and. info==0 ) then + ! check if reordering successful + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = select( wr( i ), wi( i ) ) + if( wi( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + work( 1 ) = maxwrk + if( wantsv .or. wantsb ) then + iwork( 1 ) = max( 1, sdim*( n-sdim ) ) + else + iwork( 1 ) = 1 + end if + return + end subroutine stdlib_qgeesx + + !> DGEEV: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate-transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + + subroutine stdlib_qgeev( jobvl, jobvr, n, a, lda, wr, wi, vl, ldvl, vr,ldvr, work, lwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: vl(ldvl,*), vr(ldvr,*), wi(*), work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr + character :: side + integer(ilp) :: hswork, i, ibal, ierr, ihi, ilo, itau, iwrk, k, lwork_trevc, maxwrk, & + minwrk, nout + real(qp) :: anrm, bignum, cs, cscale, eps, r, scl, smlnum, sn + ! Local Arrays + logical(lk) :: select(1) + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -1 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_qlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix + ! (workspace: need n) + ibal = 1 + call stdlib_qgebal( 'B', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (workspace: need 3*n, prefer 2*n+n*nb) + itau = ibal + n + iwrk = itau + n + call stdlib_qgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_qlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate orthogonal matrix in vl + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_qorghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vl, ldvl,work( iwrk ), & + lwork-iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_qlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_qlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate orthogonal matrix in vr + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_qorghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + else + ! compute eigenvalues only + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'E', 'N', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + end if + ! if info /= 0 from stdlib_qhseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (workspace: need 4*n, prefer n + n + 2*n*nb) + call stdlib_qtrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1, ierr ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + ! (workspace: need n) + call stdlib_qgebak( 'B', 'L', n, ilo, ihi, work( ibal ), n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_qnrm2( n, vl( 1, i ), 1 ) + call stdlib_qscal( n, scl, vl( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_qlapy2( stdlib_qnrm2( n, vl( 1, i ), 1 ),stdlib_qnrm2( n, & + vl( 1, i+1 ), 1 ) ) + call stdlib_qscal( n, scl, vl( 1, i ), 1 ) + call stdlib_qscal( n, scl, vl( 1, i+1 ), 1 ) + do k = 1, n + work( iwrk+k-1 ) = vl( k, i )**2 + vl( k, i+1 )**2 + end do + k = stdlib_iqamax( n, work( iwrk ), 1 ) + call stdlib_qlartg( vl( k, i ), vl( k, i+1 ), cs, sn, r ) + call stdlib_qrot( n, vl( 1, i ), 1, vl( 1, i+1 ), 1, cs, sn ) + vl( k, i+1 ) = zero + end if + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + ! (workspace: need n) + call stdlib_qgebak( 'B', 'R', n, ilo, ihi, work( ibal ), n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_qnrm2( n, vr( 1, i ), 1 ) + call stdlib_qscal( n, scl, vr( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_qlapy2( stdlib_qnrm2( n, vr( 1, i ), 1 ),stdlib_qnrm2( n, & + vr( 1, i+1 ), 1 ) ) + call stdlib_qscal( n, scl, vr( 1, i ), 1 ) + call stdlib_qscal( n, scl, vr( 1, i+1 ), 1 ) + do k = 1, n + work( iwrk+k-1 ) = vr( k, i )**2 + vr( k, i+1 )**2 + end do + k = stdlib_iqamax( n, work( iwrk ), 1 ) + call stdlib_qlartg( vr( k, i ), vr( k, i+1 ), cs, sn, r ) + call stdlib_qrot( n, vr( 1, i ), 1, vr( 1, i+1 ), 1, cs, sn ) + vr( k, i+1 ) = zero + end if + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wr( info+1 ),max( n-info, 1 & + ), ierr ) + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wi( info+1 ),max( n-info, 1 & + ), ierr ) + if( info>0 ) then + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wr, n,ierr ) + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_qgeev + + !> DGEEVX: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues + !> (RCONDE), and reciprocal condition numbers for the right + !> eigenvectors (RCONDV). + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate-transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + !> Balancing a matrix means permuting the rows and columns to make it + !> more nearly upper triangular, and applying a diagonal similarity + !> transformation D * A * D**(-1), where D is a diagonal matrix, to + !> make its rows and columns closer in norm and the condition numbers + !> of its eigenvalues and eigenvectors smaller. The computed + !> reciprocal condition numbers correspond to the balanced matrix. + !> Permuting rows and columns will not change the condition numbers + !> (in exact arithmetic) but diagonal scaling will. For further + !> explanation of balancing, see section 4.10.2_qp of the LAPACK + !> Users' Guide. + + subroutine stdlib_qgeevx( balanc, jobvl, jobvr, sense, n, a, lda, wr, wi,vl, ldvl, vr, ldvr, & + ilo, ihi, scale, abnrm,rconde, rcondv, work, lwork, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + real(qp), intent(out) :: abnrm + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: rconde(*), rcondv(*), scale(*), vl(ldvl,*), vr(ldvr,*), wi(*),& + work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr, wntsnb, wntsne, wntsnn, wntsnv + character :: job, side + integer(ilp) :: hswork, i, icond, ierr, itau, iwrk, k, lwork_trevc, maxwrk, minwrk, & + nout + real(qp) :: anrm, bignum, cs, cscale, eps, r, scl, smlnum, sn + ! Local Arrays + logical(lk) :: select(1) + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + wntsnn = stdlib_lsame( sense, 'N' ) + wntsne = stdlib_lsame( sense, 'E' ) + wntsnv = stdlib_lsame( sense, 'V' ) + wntsnb = stdlib_lsame( sense, 'B' ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'S' ).or. & + stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) )then + info = -1 + else if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -2 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -3 + else if( .not.( wntsnn .or. wntsne .or. wntsnb .or. wntsnv ) .or.( ( wntsne .or. & + wntsnb ) .and. .not.( wantvl .and.wantvr ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_qlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix and compute abnrm + call stdlib_qgebal( balanc, n, a, lda, ilo, ihi, scale, ierr ) + abnrm = stdlib_qlange( '1', n, n, a, lda, dum ) + if( scalea ) then + dum( 1 ) = abnrm + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + abnrm = dum( 1 ) + end if + ! reduce to upper hessenberg form + ! (workspace: need 2*n, prefer n+n*nb) + itau = 1 + iwrk = itau + n + call stdlib_qgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_qlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate orthogonal matrix in vl + ! (workspace: need 2*n-1, prefer n+(n-1)*nb) + call stdlib_qorghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vl, ldvl,work( iwrk ), & + lwork-iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_qlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_qlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate orthogonal matrix in vr + ! (workspace: need 2*n-1, prefer n+(n-1)*nb) + call stdlib_qorghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + else + ! compute eigenvalues only + ! if condition numbers desired, compute schur form + if( wntsnn ) then + job = 'E' + else + job = 'S' + end if + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_qhseqr( job, 'N', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + end if + ! if info /= 0 from stdlib_qhseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (workspace: need 3*n, prefer n + 2*n*nb) + call stdlib_qtrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1, ierr ) + end if + ! compute condition numbers if desired + ! (workspace: need n*n+6*n unless sense = 'e') + if( .not.wntsnn ) then + call stdlib_qtrsna( sense, 'A', select, n, a, lda, vl, ldvl, vr, ldvr,rconde, & + rcondv, n, nout, work( iwrk ), n, iwork,icond ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + call stdlib_qgebak( balanc, 'L', n, ilo, ihi, scale, n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_qnrm2( n, vl( 1, i ), 1 ) + call stdlib_qscal( n, scl, vl( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_qlapy2( stdlib_qnrm2( n, vl( 1, i ), 1 ),stdlib_qnrm2( n, & + vl( 1, i+1 ), 1 ) ) + call stdlib_qscal( n, scl, vl( 1, i ), 1 ) + call stdlib_qscal( n, scl, vl( 1, i+1 ), 1 ) + do k = 1, n + work( k ) = vl( k, i )**2 + vl( k, i+1 )**2 + end do + k = stdlib_iqamax( n, work, 1 ) + call stdlib_qlartg( vl( k, i ), vl( k, i+1 ), cs, sn, r ) + call stdlib_qrot( n, vl( 1, i ), 1, vl( 1, i+1 ), 1, cs, sn ) + vl( k, i+1 ) = zero + end if + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + call stdlib_qgebak( balanc, 'R', n, ilo, ihi, scale, n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_qnrm2( n, vr( 1, i ), 1 ) + call stdlib_qscal( n, scl, vr( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_qlapy2( stdlib_qnrm2( n, vr( 1, i ), 1 ),stdlib_qnrm2( n, & + vr( 1, i+1 ), 1 ) ) + call stdlib_qscal( n, scl, vr( 1, i ), 1 ) + call stdlib_qscal( n, scl, vr( 1, i+1 ), 1 ) + do k = 1, n + work( k ) = vr( k, i )**2 + vr( k, i+1 )**2 + end do + k = stdlib_iqamax( n, work, 1 ) + call stdlib_qlartg( vr( k, i ), vr( k, i+1 ), cs, sn, r ) + call stdlib_qrot( n, vr( 1, i ), 1, vr( 1, i+1 ), 1, cs, sn ) + vr( k, i+1 ) = zero + end if + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wr( info+1 ),max( n-info, 1 & + ), ierr ) + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, n-info, 1, wi( info+1 ),max( n-info, 1 & + ), ierr ) + if( info==0 ) then + if( ( wntsnv .or. wntsnb ) .and. icond==0 )call stdlib_qlascl( 'G', 0, 0, cscale,& + anrm, n, 1, rcondv, n,ierr ) + else + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wr, n,ierr ) + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_qgeevx + + !> DGEHD2: reduces a real general matrix A to upper Hessenberg form H by + !> an orthogonal similarity transformation: Q**T * A * Q = H . + + pure subroutine stdlib_qgehd2( n, ilo, ihi, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda DGEHRD: reduces a real general matrix A to upper Hessenberg form H by + !> an orthogonal similarity transformation: Q**T * A * Q = H . + + pure subroutine stdlib_qgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iwt, j, ldwork, lwkopt, nb, nbmin, nh, nx + real(qp) :: ei + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda1 .and. nb=(n*nbmin + tsize) ) then + nb = (lwork-tsize) / n + else + nb = 1 + end if + end if + end if + end if + ldwork = n + if( nb=nh ) then + ! use unblocked code below + i = ilo + else + ! use blocked code + iwt = 1 + n*nb + do i = ilo, ihi - 1 - nx, nb + ib = min( nb, ihi-i ) + ! reduce columns i:i+ib-1 to hessenberg form, returning the + ! matrices v and t of the block reflector h = i - v*t*v**t + ! which performs the reduction, and also the matrix y = a*v*t + call stdlib_qlahr2( ihi, i, ib, a( 1, i ), lda, tau( i ),work( iwt ), ldt, work, & + ldwork ) + ! apply the block reflector h to a(1:ihi,i+ib:ihi) from the + ! right, computing a := a - y * v**t. v(i+ib,ib-1) must be set + ! to 1 + ei = a( i+ib, i+ib-1 ) + a( i+ib, i+ib-1 ) = one + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE',ihi, ihi-i-ib+1,ib, -one, work, & + ldwork, a( i+ib, i ), lda, one,a( 1, i+ib ), lda ) + a( i+ib, i+ib-1 ) = ei + ! apply the block reflector h to a(1:i,i+1:i+ib-1) from the + ! right + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE','UNIT', i, ib-1,one, a( i+1, i )& + , lda, work, ldwork ) + do j = 0, ib-2 + call stdlib_qaxpy( i, -one, work( ldwork*j+1 ), 1,a( 1, i+j+1 ), 1 ) + end do + ! apply the block reflector h to a(i+1:ihi,i+ib:n) from the + ! left + call stdlib_qlarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE',ihi-i, n-i-ib+1, & + ib, a( i+1, i ), lda,work( iwt ), ldt, a( i+1, i+ib ), lda,work, ldwork ) + end do + end if + ! use unblocked code to reduce the rest of the matrix + call stdlib_qgehd2( n, i, ihi, a, lda, tau, work, iinfo ) + work( 1 ) = lwkopt + return + end subroutine stdlib_qgehrd + + !> DGEJSV: computes the singular value decomposition (SVD) of a real M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^t, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and + !> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + !> DGEJSV can sometimes compute tiny singular values and their singular vectors much + !> more accurately than other SVD routines, see below under Further Details. + + pure subroutine stdlib_qgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & + v, ldv,work, lwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldv, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: sva(n), u(ldu,*), v(ldv,*), work(lwork) + integer(ilp), intent(out) :: iwork(*) + character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv + ! =========================================================================== + + ! Local Scalars + real(qp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & + entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc + integer(ilp) :: ierr, n1, nr, numrank, p, q, warning + logical(lk) :: almort, defr, errest, goscal, jracc, kill, lsvec, l2aber, l2kill, & + l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp + ! Intrinsic Functions + intrinsic :: abs,log,max,min,real,idnint,sign,sqrt + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + jracc = stdlib_lsame( jobv, 'J' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc + rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) + l2rank = stdlib_lsame( joba, 'R' ) + l2aber = stdlib_lsame( joba, 'A' ) + errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) + l2tran = stdlib_lsame( jobt, 'T' ) + l2kill = stdlib_lsame( jobr, 'R' ) + defr = stdlib_lsame( jobr, 'N' ) + l2pert = stdlib_lsame( jobp, 'P' ) + if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & + then + info = - 1 + else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.stdlib_lsame( jobu, 'W' )) )& + then + info = - 2 + else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.stdlib_lsame( jobv, 'W' )) & + .or. ( jracc .and. (.not.lsvec) ) ) then + info = - 3 + else if ( .not. ( l2kill .or. defr ) ) then + info = - 4 + else if ( .not. ( l2tran .or. stdlib_lsame( jobt, 'N' ) ) ) then + info = - 5 + else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = - 6 + else if ( m < 0 ) then + info = - 7 + else if ( ( n < 0 ) .or. ( n > m ) ) then + info = - 8 + else if ( lda < m ) then + info = - 10 + else if ( lsvec .and. ( ldu < m ) ) then + info = - 13 + else if ( rsvec .and. ( ldv < n ) ) then + info = - 15 + else if ( (.not.(lsvec .or. rsvec .or. errest).and.(lwork < max(7,4*n+1,2*m+n))) .or.(& + .not.(lsvec .or. rsvec) .and. errest .and.(lwork < max(7,4*n+n*n,2*m+n))) .or.(lsvec & + .and. (.not.rsvec) .and. (lwork < max(7,2*m+n,4*n+1))).or.(rsvec .and. (.not.lsvec) & + .and. (lwork < max(7,2*m+n,4*n+1))).or.(lsvec .and. rsvec .and. (.not.jracc) .and.(& + lwork big ) then + info = - 9 + call stdlib_xerbla( 'DGEJSV', -info ) + return + end if + aaqq = sqrt(aaqq) + if ( ( aapp < (big / aaqq) ) .and. noscal ) then + sva(p) = aapp * aaqq + else + noscal = .false. + sva(p) = aapp * ( aaqq * scalem ) + if ( goscal ) then + goscal = .false. + call stdlib_qscal( p-1, scalem, sva, 1 ) + end if + end if + end do + if ( noscal ) scalem = one + aapp = zero + aaqq = big + do p = 1, n + aapp = max( aapp, sva(p) ) + if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) + end do + ! quick return for zero m x n matrix + ! #:) + if ( aapp == zero ) then + if ( lsvec ) call stdlib_qlaset( 'G', m, n1, zero, one, u, ldu ) + if ( rsvec ) call stdlib_qlaset( 'G', n, n, zero, one, v, ldv ) + work(1) = one + work(2) = one + if ( errest ) work(3) = one + if ( lsvec .and. rsvec ) then + work(4) = one + work(5) = one + end if + if ( l2tran ) then + work(6) = zero + work(7) = zero + end if + iwork(1) = 0 + iwork(2) = 0 + iwork(3) = 0 + return + end if + ! issue warning if denormalized column norms detected. override the + ! high relative accuracy request. issue licence to kill columns + ! (set them to zero) whose norm is less than sigma_max / big (roughly). + ! #:( + warning = 0 + if ( aaqq <= sfmin ) then + l2rank = .true. + l2kill = .true. + warning = 1 + end if + ! quick return for one-column matrix + ! #:) + if ( n == 1 ) then + if ( lsvec ) then + call stdlib_qlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr ) + call stdlib_qlacpy( 'A', m, 1, a, lda, u, ldu ) + ! computing all m left singular vectors of the m x 1 matrix + if ( n1 /= n ) then + call stdlib_qgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr ) + call stdlib_qorgqr( m,n1,1, u,ldu,work,work(n+1),lwork-n,ierr ) + call stdlib_qcopy( m, a(1,1), 1, u(1,1), 1 ) + end if + end if + if ( rsvec ) then + v(1,1) = one + end if + if ( sva(1) < (big*scalem) ) then + sva(1) = sva(1) / scalem + scalem = one + end if + work(1) = one / scalem + work(2) = one + if ( sva(1) /= zero ) then + iwork(1) = 1 + if ( ( sva(1) / scalem) >= sfmin ) then + iwork(2) = 1 + else + iwork(2) = 0 + end if + else + iwork(1) = 0 + iwork(2) = 0 + end if + iwork(3) = 0 + if ( errest ) work(3) = one + if ( lsvec .and. rsvec ) then + work(4) = one + work(5) = one + end if + if ( l2tran ) then + work(6) = zero + work(7) = zero + end if + return + end if + transp = .false. + l2tran = l2tran .and. ( m == n ) + aatmax = -one + aatmin = big + if ( rowpiv .or. l2tran ) then + ! compute the row norms, needed to determine row pivoting sequence + ! (in the case of heavily row weighted a, row pivoting is strongly + ! advised) and to collect information needed to compare the + ! structures of a * a^t and a^t * a (in the case l2tran==.true.). + if ( l2tran ) then + do p = 1, m + xsc = zero + temp1 = one + call stdlib_qlassq( n, a(p,1), lda, xsc, temp1 ) + ! stdlib_qlassq gets both the ell_2 and the ell_infinity norm + ! in one pass through the vector + work(m+n+p) = xsc * scalem + work(n+p) = xsc * (scalem*sqrt(temp1)) + aatmax = max( aatmax, work(n+p) ) + if (work(n+p) /= zero) aatmin = min(aatmin,work(n+p)) + end do + else + do p = 1, m + work(m+n+p) = scalem*abs( a(p,stdlib_iqamax(n,a(p,1),lda)) ) + aatmax = max( aatmax, work(m+n+p) ) + aatmin = min( aatmin, work(m+n+p) ) + end do + end if + end if + ! for square matrix a try to determine whether a^t would be better + ! input for the preconditioned jacobi svd, with faster convergence. + ! the decision is based on an o(n) function of the vector of column + ! and row norms of a, based on the shannon entropy. this should give + ! the right choice in most cases when the difference actually matters. + ! it may fail and pick the slower converging side. + entra = zero + entrat = zero + if ( l2tran ) then + xsc = zero + temp1 = one + call stdlib_qlassq( n, sva, 1, xsc, temp1 ) + temp1 = one / temp1 + entra = zero + do p = 1, n + big1 = ( ( sva(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entra = entra + big1 * log(big1) + end do + entra = - entra / log(real(n,KIND=qp)) + ! now, sva().^2/trace(a^t * a) is a point in the probability simplex. + ! it is derived from the diagonal of a^t * a. do the same with the + ! diagonal of a * a^t, compute the entropy of the corresponding + ! probability distribution. note that a * a^t and a^t * a have the + ! same trace. + entrat = zero + do p = n+1, n+m + big1 = ( ( work(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entrat = entrat + big1 * log(big1) + end do + entrat = - entrat / log(real(m,KIND=qp)) + ! analyze the entropies and decide a or a^t. smaller entropy + ! usually means better input for the algorithm. + transp = ( entrat < entra ) + ! if a^t is better than a, transpose a. + if ( transp ) then + ! in an optimal implementation, this trivial transpose + ! should be replaced with faster transpose. + do p = 1, n - 1 + do q = p + 1, n + temp1 = a(q,p) + a(q,p) = a(p,q) + a(p,q) = temp1 + end do + end do + do p = 1, n + work(m+n+p) = sva(p) + sva(p) = work(n+p) + end do + temp1 = aapp + aapp = aatmax + aatmax = temp1 + temp1 = aaqq + aaqq = aatmin + aatmin = temp1 + kill = lsvec + lsvec = rsvec + rsvec = kill + if ( lsvec ) n1 = n + rowpiv = .true. + end if + end if + ! end if l2tran + ! scale the matrix so that its maximal singular value remains less + ! than sqrt(big) -- the matrix is scaled so that its maximal column + ! has euclidean norm equal to sqrt(big/n). the only reason to keep + ! sqrt(big) instead of big is the fact that stdlib_qgejsv uses lapack and + ! blas routines that, in some implementations, are not capable of + ! working in the full interval [sfmin,big] and that they may provoke + ! overflows in the intermediate results. if the singular values spread + ! from sfmin to big, then stdlib_qgesvj will compute them. so, in that case, + ! one should use stdlib_qgesvj instead of stdlib_qgejsv. + big1 = sqrt( big ) + temp1 = sqrt( big / real(n,KIND=qp) ) + call stdlib_qlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr ) + if ( aaqq > (aapp * sfmin) ) then + aaqq = ( aaqq / aapp ) * temp1 + else + aaqq = ( aaqq * temp1 ) / aapp + end if + temp1 = temp1 * scalem + call stdlib_qlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr ) + ! to undo scaling at the end of this procedure, multiply the + ! computed singular values with uscal2 / uscal1. + uscal1 = temp1 + uscal2 = aapp + if ( l2kill ) then + ! l2kill enforces computation of nonzero singular values in + ! the restricted range of condition number of the initial a, + ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). + xsc = sqrt( sfmin ) + else + xsc = small + ! now, if the condition number of a is too big, + ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, + ! as a precaution measure, the full svd is computed using stdlib_qgesvj + ! with accumulated jacobi rotations. this provides numerically + ! more robust computation, at the cost of slightly increased run + ! time. depending on the concrete implementation of blas and lapack + ! (i.e. how they behave in presence of extreme ill-conditioning) the + ! implementor may decide to remove this switch. + if ( ( aaqq= (temp1*abs(a(1,1))) ) then + nr = nr + 1 + else + go to 3002 + end if + end do + 3002 continue + else if ( l2rank ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r1 is used as the criterion for + ! close-to-rank-deficient. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & + l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! the goal is high relative accuracy. however, if the matrix + ! has high scaled condition number the relative accuracy is in + ! general not feasible. later on, a condition number estimator + ! will be deployed to estimate the scaled condition number. + ! here we just remove the underflowed part of the triangular + ! factor. this prevents the situation in which the code is + ! working hard to get the accuracy not warranted by the data. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to & + 3302 + nr = nr + 1 + end do + 3302 continue + end if + almort = .false. + if ( nr == n ) then + maxprj = one + do p = 2, n + temp1 = abs(a(p,p)) / sva(iwork(p)) + maxprj = min( maxprj, temp1 ) + end do + if ( maxprj**2 >= one - real(n,KIND=qp)*epsln ) almort = .true. + end if + sconda = - one + condr1 = - one + condr2 = - one + if ( errest ) then + if ( n == nr ) then + if ( rsvec ) then + ! V Is Available As Workspace + call stdlib_qlacpy( 'U', n, n, a, lda, v, ldv ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_qscal( p, one/temp1, v(1,p), 1 ) + end do + call stdlib_qpocon( 'U', n, v, ldv, one, temp1,work(n+1), iwork(2*n+m+1), & + ierr ) + else if ( lsvec ) then + ! U Is Available As Workspace + call stdlib_qlacpy( 'U', n, n, a, lda, u, ldu ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_qscal( p, one/temp1, u(1,p), 1 ) + end do + call stdlib_qpocon( 'U', n, u, ldu, one, temp1,work(n+1), iwork(2*n+m+1), & + ierr ) + else + call stdlib_qlacpy( 'U', n, n, a, lda, work(n+1), n ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_qscal( p, one/temp1, work(n+(p-1)*n+1), 1 ) + end do + ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths + call stdlib_qpocon( 'U', n, work(n+1), n, one, temp1,work(n+n*n+1), iwork(2*n+& + m+1), ierr ) + end if + sconda = one / sqrt(temp1) + ! sconda is an estimate of sqrt(||(r^t * r)^(-1)||_1). + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + else + sconda = - one + end if + end if + l2pert = l2pert .and. ( abs( a(1,1)/a(nr,nr) ) > sqrt(big1) ) + ! if there is no violent scaling, artificial perturbation is not needed. + ! phase 3: + if ( .not. ( rsvec .or. lsvec ) ) then + ! singular values only + ! .. transpose a(1:nr,1:n) + do p = 1, min( n-1, nr ) + call stdlib_qcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 ) + end do + ! the following two do-loops introduce small relative perturbation + ! into the strict upper triangle of the lower triangular matrix. + ! small entries below the main diagonal are also changed. + ! this modification is useful if the computing environment does not + ! provide/allow flush to zero underflow, for it prevents many + ! annoying denormalized numbers in case of strongly scaled matrices. + ! the perturbation is structured so that it does not introduce any + ! new perturbation of the singular values, and it does not destroy + ! the job done by the preconditioner. + ! the licence for this perturbation is in the variable l2pert, which + ! should be .false. if flush to zero underflow is active. + if ( .not. almort ) then + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=qp) + do q = 1, nr + temp1 = xsc*abs(a(q,q)) + do p = 1, n + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & + temp1, a(p,q) ) + end do + end do + else + call stdlib_qlaset( 'U', nr-1,nr-1, zero,zero, a(1,2),lda ) + end if + ! Second Preconditioning Using The Qr Factorization + call stdlib_qgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr ) + ! And Transpose Upper To Lower Triangular + do p = 1, nr - 1 + call stdlib_qcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 ) + end do + end if + ! row-cyclic jacobi svd algorithm with column pivoting + ! .. again some perturbation (a "background noise") is added + ! to drown denormals + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=qp) + do q = 1, nr + temp1 = xsc*abs(a(q,q)) + do p = 1, nr + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & + temp1, a(p,q) ) + end do + end do + else + call stdlib_qlaset( 'U', nr-1, nr-1, zero, zero, a(1,2), lda ) + end if + ! .. and one-sided jacobi rotations are started on a lower + ! triangular matrix (plus perturbation which is ignored in + ! the part which destroys triangular form (confusing?!)) + call stdlib_qgesvj( 'L', 'NOU', 'NOV', nr, nr, a, lda, sva,n, v, ldv, work, & + lwork, info ) + scalem = work(1) + numrank = nint(work(2),KIND=ilp) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! -> singular values and right singular vectors <- + if ( almort ) then + ! In This Case Nr Equals N + do p = 1, nr + call stdlib_qcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + call stdlib_qlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_qgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,work, lwork, info ) + + scalem = work(1) + numrank = nint(work(2),KIND=ilp) + else + ! .. two more qr factorizations ( one qrf is not enough, two require + ! accumulated product of jacobi rotations, three are perfect ) + call stdlib_qlaset( 'LOWER', nr-1, nr-1, zero, zero, a(2,1), lda ) + call stdlib_qgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr) + call stdlib_qlacpy( 'LOWER', nr, nr, a, lda, v, ldv ) + call stdlib_qlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_qgeqrf( nr, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr + call stdlib_qcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 ) + end do + call stdlib_qlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_qgesvj( 'LOWER', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, work(n+1), & + lwork, info ) + scalem = work(n+1) + numrank = nint(work(n+2),KIND=ilp) + if ( nr < n ) then + call stdlib_qlaset( 'A',n-nr, nr, zero,zero, v(nr+1,1), ldv ) + call stdlib_qlaset( 'A',nr, n-nr, zero,zero, v(1,nr+1), ldv ) + call stdlib_qlaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv ) + end if + call stdlib_qormlq( 'LEFT', 'TRANSPOSE', n, n, nr, a, lda, work,v, ldv, work(n+1), & + lwork-n, ierr ) + end if + do p = 1, n + call stdlib_qcopy( n, v(p,1), ldv, a(iwork(p),1), lda ) + end do + call stdlib_qlacpy( 'ALL', n, n, a, lda, v, ldv ) + if ( transp ) then + call stdlib_qlacpy( 'ALL', n, n, v, ldv, u, ldu ) + end if + else if ( lsvec .and. ( .not. rsvec ) ) then + ! Singular Values And Left Singular Vectors + ! Second Preconditioning Step To Avoid Need To Accumulate + ! jacobi rotations in the jacobi iterations. + do p = 1, nr + call stdlib_qcopy( n-p+1, a(p,p), lda, u(p,p), 1 ) + end do + call stdlib_qlaset( 'UPPER', nr-1, nr-1, zero, zero, u(1,2), ldu ) + call stdlib_qgeqrf( n, nr, u, ldu, work(n+1), work(2*n+1),lwork-2*n, ierr ) + do p = 1, nr - 1 + call stdlib_qcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 ) + end do + call stdlib_qlaset( 'UPPER', nr-1, nr-1, zero, zero, u(1,2), ldu ) + call stdlib_qgesvj( 'LOWER', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, work(n+1), & + lwork-n, info ) + scalem = work(n+1) + numrank = nint(work(n+2),KIND=ilp) + if ( nr < m ) then + call stdlib_qlaset( 'A', m-nr, nr,zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_qlaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1), ldu ) + call stdlib_qlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + call stdlib_qormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + if ( rowpiv )call stdlib_qlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + do p = 1, n1 + xsc = one / stdlib_qnrm2( m, u(1,p), 1 ) + call stdlib_qscal( m, xsc, u(1,p), 1 ) + end do + if ( transp ) then + call stdlib_qlacpy( 'ALL', n, n, u, ldu, v, ldv ) + end if + else + ! Full Svd + if ( .not. jracc ) then + if ( .not. almort ) then + ! second preconditioning step (qrf [with pivoting]) + ! note that the composition of transpose, qrf and transpose is + ! equivalent to an lqf call. since in many libraries the qrf + ! seems to be better optimized than the lqf, we do explicit + ! transpose and use the qrf. this is subject to changes in an + ! optimized implementation of stdlib_qgejsv. + do p = 1, nr + call stdlib_qcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + ! The Following Two Loops Perturb Small Entries To Avoid + ! denormals in the second qr factorization, where they are + ! as good as zeros. this is done to avoid painfully slow + ! computation with denormals. the relative size of the perturbation + ! is a parameter that can be changed by the implementer. + ! this perturbation device will be obsolete on machines with + ! properly implemented arithmetic. + ! to switch it off, set l2pert=.false. to remove it from the + ! code, remove the action under l2pert=.true., leave the else part. + ! the following two loops should be blocked and fused with the + ! transposed copy above. + if ( l2pert ) then + xsc = sqrt(small) + do q = 1, nr + temp1 = xsc*abs( v(q,q) ) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + sign( temp1, v(p,q) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_qlaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv ) + end if + ! estimate the row scaled condition number of r1 + ! (if r1 is rectangular, n > nr, then the condition number + ! of the leading nr x nr submatrix is estimated.) + call stdlib_qlacpy( 'L', nr, nr, v, ldv, work(2*n+1), nr ) + do p = 1, nr + temp1 = stdlib_qnrm2(nr-p+1,work(2*n+(p-1)*nr+p),1) + call stdlib_qscal(nr-p+1,one/temp1,work(2*n+(p-1)*nr+p),1) + end do + call stdlib_qpocon('LOWER',nr,work(2*n+1),nr,one,temp1,work(2*n+nr*nr+1),iwork(m+& + 2*n+1),ierr) + condr1 = one / sqrt(temp1) + ! Here Need A Second Opinion On The Condition Number + ! Then Assume Worst Case Scenario + ! r1 is ok for inverse <=> condr1 < real(n,KIND=qp) + ! more conservative <=> condr1 < sqrt(real(n,KIND=qp)) + cond_ok = sqrt(real(nr,KIND=qp)) + ! [tp] cond_ok is a tuning parameter. + if ( condr1 < cond_ok ) then + ! .. the second qrf without pivoting. note: in an optimized + ! implementation, this qrf should be implemented as the qrf + ! of a lower triangular matrix. + ! r1^t = q2 * r2 + call stdlib_qgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + + if ( l2pert ) then + xsc = sqrt(small)/epsln + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) + end do + end do + end if + if ( nr /= n )call stdlib_qlacpy( 'A', n, nr, v, ldv, work(2*n+1), n ) + ! .. save ... + ! This Transposed Copy Should Be Better Than Naive + do p = 1, nr - 1 + call stdlib_qcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 ) + end do + condr2 = condr1 + else + ! .. ill-conditioned case: second qrf with pivoting + ! note that windowed pivoting would be equally good + ! numerically, and more run-time efficient. so, in + ! an optimal implementation, the next call to stdlib_qgeqp3 + ! should be replaced with eg. call sgeqpx (acm toms #782) + ! with properly (carefully) chosen parameters. + ! r1^t * p2 = q2 * r2 + do p = 1, nr + iwork(n+p) = 0 + end do + call stdlib_qgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),work(2*n+1), lwork-& + 2*n, ierr ) + ! * call stdlib_qgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1), + ! * $ lwork-2*n, ierr ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) + end do + end do + end if + call stdlib_qlacpy( 'A', n, nr, v, ldv, work(2*n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + v(p,q) = - sign( temp1, v(q,p) ) + end do + end do + else + call stdlib_qlaset( 'L',nr-1,nr-1,zero,zero,v(2,1),ldv ) + end if + ! now, compute r2 = l3 * q3, the lq factorization. + call stdlib_qgelqf( nr, nr, v, ldv, work(2*n+n*nr+1),work(2*n+n*nr+nr+1), & + lwork-2*n-n*nr-nr, ierr ) + ! And Estimate The Condition Number + call stdlib_qlacpy( 'L',nr,nr,v,ldv,work(2*n+n*nr+nr+1),nr ) + do p = 1, nr + temp1 = stdlib_qnrm2( p, work(2*n+n*nr+nr+p), nr ) + call stdlib_qscal( p, one/temp1, work(2*n+n*nr+nr+p), nr ) + end do + call stdlib_qpocon( 'L',nr,work(2*n+n*nr+nr+1),nr,one,temp1,work(2*n+n*nr+nr+& + nr*nr+1),iwork(m+2*n+1),ierr ) + condr2 = one / sqrt(temp1) + if ( condr2 >= cond_ok ) then + ! Save The Householder Vectors Used For Q3 + ! (this overwrites the copy of r2, as it will not be + ! needed in this branch, but it does not overwritte the + ! huseholder vectors of q2.). + call stdlib_qlacpy( 'U', nr, nr, v, ldv, work(2*n+1), n ) + ! And The Rest Of The Information On Q3 Is In + ! work(2*n+n*nr+1:2*n+n*nr+n) + end if + end if + if ( l2pert ) then + xsc = sqrt(small) + do q = 2, nr + temp1 = xsc * v(q,q) + do p = 1, q - 1 + ! v(p,q) = - sign( temp1, v(q,p) ) + v(p,q) = - sign( temp1, v(p,q) ) + end do + end do + else + call stdlib_qlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + end if + ! second preconditioning finished; continue with jacobi svd + ! the input matrix is lower trinagular. + ! recover the right singular vectors as solution of a well + ! conditioned triangular matrix equation. + if ( condr1 < cond_ok ) then + call stdlib_qgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,ldu,work(2*n+n*nr+nr+1),& + lwork-2*n-n*nr-nr,info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + do p = 1, nr + call stdlib_qcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_qscal( nr, sva(p), v(1,p), 1 ) + end do + ! Pick The Right Matrix Equation And Solve It + if ( nr == n ) then + ! :)) .. best case, r1 is inverted. the solution of this matrix + ! equation is q2*v2 = the product of the jacobi rotations + ! used in stdlib_qgesvj, premultiplied with the orthogonal matrix + ! from the second qr factorization. + call stdlib_qtrsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv ) + else + ! .. r1 is well conditioned, but non-square. transpose(r2) + ! is inverted to get the product of the jacobi rotations + ! used in stdlib_qgesvj. the q-factor from the second qr + ! factorization is then built in explicitly. + call stdlib_qtrsm('L','U','T','N',nr,nr,one,work(2*n+1),n,v,ldv) + if ( nr < n ) then + call stdlib_qlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_qlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_qlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + end if + call stdlib_qormqr('L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) + end if + else if ( condr2 < cond_ok ) then + ! :) .. the input matrix a is very likely a relative of + ! the kahan matrix :) + ! the matrix r2 is inverted. the solution of the matrix equation + ! is q3^t*v3 = the product of the jacobi rotations (appplied to + ! the lower triangular l3 from the lq factorization of + ! r2=l3*q3), pre-multiplied with the transposed q3. + call stdlib_qgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, work(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr, info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + do p = 1, nr + call stdlib_qcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_qscal( nr, sva(p), u(1,p), 1 ) + end do + call stdlib_qtrsm('L','U','N','N',nr,nr,one,work(2*n+1),n,u,ldu) + ! Apply The Permutation From The Second Qr Factorization + do q = 1, nr + do p = 1, nr + work(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = work(2*n+n*nr+nr+p) + end do + end do + if ( nr < n ) then + call stdlib_qlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_qlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_qlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_qormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + else + ! last line of defense. + ! #:( this is a rather pathological case: no scaled condition + ! improvement after two pivoted qr factorizations. other + ! possibility is that the rank revealing qr factorization + ! or the condition estimator has failed, or the cond_ok + ! is set very close to one (which is unnecessary). normally, + ! this branch should never be executed, but in rare cases of + ! failure of the rrqr or condition estimator, the last line of + ! defense ensures that stdlib_qgejsv completes the task. + ! compute the full svd of l3 using stdlib_qgesvj with explicit + ! accumulation of jacobi rotations. + call stdlib_qgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, work(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr, info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + if ( nr < n ) then + call stdlib_qlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_qlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_qlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_qormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + call stdlib_qormlq( 'L', 'T', nr, nr, nr, work(2*n+1), n,work(2*n+n*nr+1), u, & + ldu, work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) + do q = 1, nr + do p = 1, nr + work(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = work(2*n+n*nr+nr+p) + end do + end do + end if + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=qp)) * epsln + do q = 1, n + do p = 1, n + work(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = work(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_qnrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_qscal( n, xsc, & + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_qlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_qlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_qlaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! matrix u. this applies to all cases. + call stdlib_qormqr( 'LEFT', 'NO_TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + ! the columns of u are normalized. the cost is o(m*n) flops. + temp1 = sqrt(real(m,KIND=qp)) * epsln + do p = 1, nr + xsc = one / stdlib_qnrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_qscal( m, xsc, & + u(1,p), 1 ) + end do + ! if the initial qrf is computed with row pivoting, the left + ! singular vectors must be adjusted. + if ( rowpiv )call stdlib_qlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + else + ! The Initial Matrix A Has Almost Orthogonal Columns And + ! the second qrf is not needed + call stdlib_qlacpy( 'UPPER', n, n, a, lda, work(n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, n + temp1 = xsc * work( n + (p-1)*n + p ) + do q = 1, p - 1 + work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q)) + end do + end do + else + call stdlib_qlaset( 'LOWER',n-1,n-1,zero,zero,work(n+2),n ) + end if + call stdlib_qgesvj( 'UPPER', 'U', 'N', n, n, work(n+1), n, sva,n, u, ldu, work(n+& + n*n+1), lwork-n-n*n, info ) + scalem = work(n+n*n+1) + numrank = nint(work(n+n*n+2),KIND=ilp) + do p = 1, n + call stdlib_qcopy( n, work(n+(p-1)*n+1), 1, u(1,p), 1 ) + call stdlib_qscal( n, sva(p), work(n+(p-1)*n+1), 1 ) + end do + call stdlib_qtrsm( 'LEFT', 'UPPER', 'NOTRANS', 'NO UD', n, n,one, a, lda, work(n+& + 1), n ) + do p = 1, n + call stdlib_qcopy( n, work(n+p), n, v(iwork(p),1), ldv ) + end do + temp1 = sqrt(real(n,KIND=qp))*epsln + do p = 1, n + xsc = one / stdlib_qnrm2( n, v(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_qscal( n, xsc, & + v(1,p), 1 ) + end do + ! assemble the left singular vector matrix u (m x n). + if ( n < m ) then + call stdlib_qlaset( 'A', m-n, n, zero, zero, u(n+1,1), ldu ) + if ( n < n1 ) then + call stdlib_qlaset( 'A',n, n1-n, zero, zero, u(1,n+1),ldu ) + call stdlib_qlaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu ) + end if + end if + call stdlib_qormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + temp1 = sqrt(real(m,KIND=qp))*epsln + do p = 1, n1 + xsc = one / stdlib_qnrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_qscal( m, xsc, & + u(1,p), 1 ) + end do + if ( rowpiv )call stdlib_qlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + end if + ! end of the >> almost orthogonal case << in the full svd + else + ! this branch deploys a preconditioned jacobi svd with explicitly + ! accumulated rotations. it is included as optional, mainly for + ! experimental purposes. it does perform well, and can also be used. + ! in this implementation, this branch will be automatically activated + ! if the condition number sigma_max(a) / sigma_min(a) is predicted + ! to be greater than the overflow threshold. this is because the + ! a posteriori computation of the singular vectors assumes robust + ! implementation of blas and some lapack procedures, capable of working + ! in presence of extreme values. since that is not always the case, ... + do p = 1, nr + call stdlib_qcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 1, nr + temp1 = xsc*abs( v(q,q) ) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = sign(& + temp1, v(p,q) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_qlaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv ) + end if + call stdlib_qgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + call stdlib_qlacpy( 'L', n, nr, v, ldv, work(2*n+1), n ) + do p = 1, nr + call stdlib_qcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 2, nr + do p = 1, q - 1 + temp1 = xsc * min(abs(u(p,p)),abs(u(q,q))) + u(p,q) = - sign( temp1, u(q,p) ) + end do + end do + else + call stdlib_qlaset('U', nr-1, nr-1, zero, zero, u(1,2), ldu ) + end if + call stdlib_qgesvj( 'G', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, work(2*n+n*nr+1), & + lwork-2*n-n*nr, info ) + scalem = work(2*n+n*nr+1) + numrank = nint(work(2*n+n*nr+2),KIND=ilp) + if ( nr < n ) then + call stdlib_qlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_qlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_qlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_qormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+n*nr+nr+1)& + ,lwork-2*n-n*nr-nr,ierr ) + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=qp)) * epsln + do q = 1, n + do p = 1, n + work(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = work(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_qnrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_qscal( n, xsc, & + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_qlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_qlaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1),ldu ) + call stdlib_qlaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu ) + end if + end if + call stdlib_qormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + if ( rowpiv )call stdlib_qlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + end if + if ( transp ) then + ! .. swap u and v because the procedure worked on a^t + do p = 1, n + call stdlib_qswap( n, u(1,p), 1, v(1,p), 1 ) + end do + end if + end if + ! end of the full svd + ! undo scaling, if necessary (and possible) + if ( uscal2 <= (big/sva(1))*uscal1 ) then + call stdlib_qlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr ) + uscal1 = one + uscal2 = one + end if + if ( nr < n ) then + do p = nr+1, n + sva(p) = zero + end do + end if + work(1) = uscal2 * scalem + work(2) = uscal1 + if ( errest ) work(3) = sconda + if ( lsvec .and. rsvec ) then + work(4) = condr1 + work(5) = condr2 + end if + if ( l2tran ) then + work(6) = entra + work(7) = entrat + end if + iwork(1) = nr + iwork(2) = numrank + iwork(3) = warning + return + end subroutine stdlib_qgejsv + + !> DGELQ: computes an LQ factorization of a real M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_qgelq( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks, lwmin, lwopt, lwreq + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'DGELQ ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'DGELQ ', ' ', m, n, 2, -1 ) + else + mb = 1 + nb = n + end if + if( mb>min( m, n ) .or. mb<1 ) mb = 1 + if( nb>n .or. nb<=m ) nb = n + mintsz = m + 5 + if ( nb>m .and. n>m ) then + if( mod( n - m, nb - m )==0 ) then + nblcks = ( n - m ) / ( nb - m ) + else + nblcks = ( n - m ) / ( nb - m ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + if( ( n<=m ) .or. ( nb<=m ) .or. ( nb>=n ) ) then + lwmin = max( 1, n ) + lwopt = max( 1, mb*n ) + else + lwmin = max( 1, m ) + lwopt = max( 1, mb*m ) + end if + lminws = .false. + if( ( tsize=lwmin ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=n ) ) then + lwreq = max( 1, mb*n ) + else + lwreq = max( 1, mb*m ) + end if + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) ) then + call stdlib_qgelqt( m, n, mb, a, lda, t( 6 ), mb, work, info ) + else + call stdlib_qlaswlq( m, n, mb, nb, a, lda, t( 6 ), mb, work,lwork, info ) + end if + work( 1 ) = lwreq + return + end subroutine stdlib_qgelq + + !> DGELQ2: computes an LQ factorization of a real m-by-n matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a n-by-n orthogonal matrix; + !> L is a lower-triangular m-by-m matrix; + !> 0 is a m-by-(n-m) zero matrix, if m < n. + + pure subroutine stdlib_qgelq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGELQF: computes an LQ factorization of a real M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_qgelqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 ) + lwkopt = m*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb DGELQT: computes a blocked LQ factorization of a real M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_qgelqt( m, n, mb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, mb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, iinfo, k + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( mb<1 .or. ( mb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda DGELQT3: recursively computes a LQ factorization of a real M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_qgelqt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, m1, m2, iinfo + ! Executable Statements + info = 0 + if( m < 0 ) then + info = -1 + else if( n < m ) then + info = -2 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, m ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DGELQT3', -info ) + return + end if + if( m==1 ) then + ! compute householder transform when m=1 + call stdlib_qlarfg( n, a(1,1), a( 1, min( 2, n ) ), lda, t(1,1) ) + else + ! otherwise, split a into blocks... + m1 = m/2 + m2 = m-m1 + i1 = min( m1+1, m ) + j1 = min( m+1, n ) + ! compute a(1:m1,1:n) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_qgelqt3( m1, n, a, lda, t, ldt, iinfo ) + ! compute a(j1:m,1:n) = q1^h a(j1:m,1:n) [workspace: t(1:n1,j1:n)] + do i=1,m2 + do j=1,m1 + t( i+m1, j ) = a( i+m1, j ) + end do + end do + call stdlib_qtrmm( 'R', 'U', 'T', 'U', m2, m1, one,a, lda, t( i1, 1 ), ldt ) + call stdlib_qgemm( 'N', 'T', m2, m1, n-m1, one, a( i1, i1 ), lda,a( 1, i1 ), lda, & + one, t( i1, 1 ), ldt) + call stdlib_qtrmm( 'R', 'U', 'N', 'N', m2, m1, one,t, ldt, t( i1, 1 ), ldt ) + call stdlib_qgemm( 'N', 'N', m2, n-m1, m1, -one, t( i1, 1 ), ldt,a( 1, i1 ), lda, & + one, a( i1, i1 ), lda ) + call stdlib_qtrmm( 'R', 'U', 'N', 'U', m2, m1 , one,a, lda, t( i1, 1 ), ldt ) + + do i=1,m2 + do j=1,m1 + a( i+m1, j ) = a( i+m1, j ) - t( i+m1, j ) + t( i+m1, j )=0 + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_qgelqt3( m2, n-m1, a( i1, i1 ), lda,t( i1, i1 ), ldt, iinfo ) + ! compute t3 = t(j1:n1,1:n) = -t1 y1^h y2 t2 + do i=1,m2 + do j=1,m1 + t( j, i+m1 ) = (a( j, i+m1 )) + end do + end do + call stdlib_qtrmm( 'R', 'U', 'T', 'U', m1, m2, one,a( i1, i1 ), lda, t( 1, i1 ), & + ldt ) + call stdlib_qgemm( 'N', 'T', m1, m2, n-m, one, a( 1, j1 ), lda,a( i1, j1 ), lda, & + one, t( 1, i1 ), ldt ) + call stdlib_qtrmm( 'L', 'U', 'N', 'N', m1, m2, -one, t, ldt,t( 1, i1 ), ldt ) + + call stdlib_qtrmm( 'R', 'U', 'N', 'N', m1, m2, one,t( i1, i1 ), ldt, t( 1, i1 ), & + ldt ) + ! y = (y1,y2); l = [ l1 0 ]; t = [t1 t3] + ! [ a(1:n1,j1:n) l2 ] [ 0 t2] + end if + return + end subroutine stdlib_qgelqt3 + + !> DGELS: solves overdetermined or underdetermined real linear systems + !> involving an M-by-N matrix A, or its transpose, using a QR or LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of + !> an underdetermined system A**T * X = B. + !> 4. If TRANS = 'T' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_qgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, tpsd + integer(ilp) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize + real(qp) :: anrm, bignum, bnrm, smlnum + ! Local Arrays + real(qp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input arguments. + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + nb = stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'DORMQR', 'LN', m, nrhs, n,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'DORMQR', 'LT', m, nrhs, n,-1 ) ) + end if + else + nb = stdlib_ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'DORMLQ', 'LT', n, nrhs, m,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'DORMLQ', 'LN', n, nrhs, m,-1 ) ) + end if + end if + wsize = max( 1, mn+max( mn, nrhs )*nb ) + work( 1 ) = real( wsize,KIND=qp) + end if + if( info/=0 ) then + call stdlib_xerbla( 'DGELS ', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( min( m, n, nrhs )==0 ) then + call stdlib_qlaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb ) + return + end if + ! get machine parameters + smlnum = stdlib_qlamch( 'S' ) / stdlib_qlamch( 'P' ) + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! scale a, b if max element outside range [smlnum,bignum] + anrm = stdlib_qlange( 'M', m, n, a, lda, rwork ) + iascl = 0 + if( anrm>zero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_qlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + go to 50 + end if + brow = m + if( tpsd )brow = n + bnrm = stdlib_qlange( 'M', brow, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if( m>=n ) then + ! compute qr factorization of a + call stdlib_qgeqrf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least n, optimally n*nb + if( .not.tpsd ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_qormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb, & + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_qtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = n + else + ! underdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_qtrtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = zero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_qormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_qgelqf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least m, optimally m*nb. + if( .not.tpsd ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_qtrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = zero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_qormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb, & + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_qormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_qtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_qlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_qlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( wsize,KIND=qp) + return + end subroutine stdlib_qgels + + !> DGELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_qgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(qp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: s(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, & + maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd + real(qp) :: anrm, bignum, bnrm, eps, sfmin, smlnum + ! Intrinsic Functions + intrinsic :: real,int,log,max,min + ! Executable Statements + ! test the input arguments. + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + mnthr = stdlib_ilaenv( 6, 'DGELSD', ' ', m, n, nrhs, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns. + mm = n + maxwrk = max( maxwrk, n+n*stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n,-1, -1 ) ) + + maxwrk = max( maxwrk, n+nrhs*stdlib_ilaenv( 1, 'DORMQR', 'LT', m, nrhs, n, -1 ) ) + + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined. + maxwrk = max( maxwrk, 3*n+( mm+n )*stdlib_ilaenv( 1, 'DGEBRD', ' ', mm, n, -1, -& + 1 ) ) + maxwrk = max( maxwrk, 3*n+nrhs*stdlib_ilaenv( 1, 'DORMBR', 'QLT', mm, nrhs, n, -& + 1 ) ) + maxwrk = max( maxwrk, 3*n+( n-1 )*stdlib_ilaenv( 1, 'DORMBR', 'PLN', n, nrhs, n, & + -1 ) ) + wlalsd = 9*n+2*n*smlsiz+8*n*nlvl+n*nrhs+(smlsiz+1)**2 + maxwrk = max( maxwrk, 3*n+wlalsd ) + minwrk = max( 3*n+mm, 3*n+nrhs, 3*n+wlalsd ) + end if + if( n>m ) then + wlalsd = 9*m+2*m*smlsiz+8*m*nlvl+m*nrhs+(smlsiz+1)**2 + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows. + maxwrk = m + m*stdlib_ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 ) + maxwrk = max( maxwrk, m*m+4*m+2*m*stdlib_ilaenv( 1, 'DGEBRD', ' ', m, m, -1, -& + 1 ) ) + maxwrk = max( maxwrk, m*m+4*m+nrhs*stdlib_ilaenv( 1, 'DORMBR', 'QLT', m, nrhs,& + m, -1 ) ) + maxwrk = max( maxwrk, m*m+4*m+( m-1 )*stdlib_ilaenv( 1, 'DORMBR', 'PLN', m, & + nrhs, m, -1 ) ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m+m+m*nrhs ) + else + maxwrk = max( maxwrk, m*m+2*m ) + end if + maxwrk = max( maxwrk, m+nrhs*stdlib_ilaenv( 1, 'DORMLQ', 'LT', n, nrhs, m, -1 & + ) ) + maxwrk = max( maxwrk, m*m+4*m+wlalsd ) + ! xxx: ensure the path 2a case below is triggered. the workspace + ! calculation should use queries for all routines eventually. + maxwrk = max( maxwrk,4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) + else + ! path 2 - remaining underdetermined cases. + maxwrk = 3*m + ( n+m )*stdlib_ilaenv( 1, 'DGEBRD', ' ', m, n,-1, -1 ) + maxwrk = max( maxwrk, 3*m+nrhs*stdlib_ilaenv( 1, 'DORMBR', 'QLT', m, nrhs, n, & + -1 ) ) + maxwrk = max( maxwrk, 3*m+m*stdlib_ilaenv( 1, 'DORMBR', 'PLN', n, nrhs, m, -1 & + ) ) + maxwrk = max( maxwrk, 3*m+wlalsd ) + end if + minwrk = max( 3*m+nrhs, 3*m+m, 3*m+wlalsd ) + end if + minwrk = min( minwrk, maxwrk ) + work( 1 ) = maxwrk + iwork( 1 ) = liwork + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_qlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + call stdlib_qlaset( 'F', minmn, 1, zero, zero, s, 1 ) + rank = 0 + go to 10 + end if + ! scale b if max entry outside range [smlnum,bignum]. + bnrm = stdlib_qlange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_qlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! if m < n make sure certain entries of b are zero. + if( m=n ) then + ! path 1 - overdetermined or exactly determined. + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns. + mm = n + itau = 1 + nwork = itau + n + ! compute a=q*r. + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + info ) + ! multiply b by transpose(q). + ! (workspace: need n+nrhs, prefer n+nrhs*nb) + call stdlib_qormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + nwork ), lwork-nwork+1, info ) + ! zero out below r. + if( n>1 ) then + call stdlib_qlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + end if + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a. + ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) + call stdlib_qgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r. + ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) + call stdlib_qormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_qlalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of r. + call stdlib_qormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m, wlalsd ) ) & + then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm. + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4*m+m*lda+& + wlalsd ) )ldwork = lda + itau = 1 + nwork = m + 1 + ! compute a=l*q. + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) + + il = nwork + ! copy l to work(il), zeroing out above its diagonal. + call stdlib_qlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) + ie = il + ldwork*m + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il). + ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) + call stdlib_qgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & + itaup ), work( nwork ),lwork-nwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l. + ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_qormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_qlalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of l. + call stdlib_qormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! zero out below first m rows of b. + call stdlib_qlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1 ), ldb ) + nwork = itau + m + ! multiply transpose(q) by b. + ! (workspace: need m+nrhs, prefer m+nrhs*nb) + call stdlib_qormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& + , lwork-nwork+1, info ) + else + ! path 2 - remaining underdetermined cases. + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a. + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_qgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors. + ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) + call stdlib_qormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_qlalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of a. + call stdlib_qormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + end if + ! undo scaling. + if( iascl==1 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_qlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_qlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 10 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwork + return + end subroutine stdlib_qgelsd + + !> DGELSS: computes the minimum norm solution to a real linear least + !> squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + + subroutine stdlib_qgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(qp), intent(in) :: rcond + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: s(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & + ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr + integer(ilp) :: lwork_qgeqrf, lwork_qormqr, lwork_qgebrd, lwork_qormbr, lwork_qorgbr, & + lwork_qormlq, lwork_qgelqf + real(qp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr + ! Local Arrays + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + mm = m + mnthr = stdlib_ilaenv( 6, 'DGELSS', ' ', m, n, nrhs, -1 ) + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns + ! compute space needed for stdlib_qgeqrf + call stdlib_qgeqrf( m, n, a, lda, dum(1), dum(1), -1, info ) + lwork_qgeqrf=dum(1) + ! compute space needed for stdlib_qormqr + call stdlib_qormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1), b,ldb, dum(1), -1, & + info ) + lwork_qormqr=dum(1) + mm = n + maxwrk = max( maxwrk, n + lwork_qgeqrf ) + maxwrk = max( maxwrk, n + lwork_qormqr ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + ! compute workspace needed for stdlib_qbdsqr + bdspac = max( 1, 5*n ) + ! compute space needed for stdlib_qgebrd + call stdlib_qgebrd( mm, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, info & + ) + lwork_qgebrd=dum(1) + ! compute space needed for stdlib_qormbr + call stdlib_qormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1),b, ldb, dum(1),& + -1, info ) + lwork_qormbr=dum(1) + ! compute space needed for stdlib_qorgbr + call stdlib_qorgbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, info ) + lwork_qorgbr=dum(1) + ! compute total workspace needed + maxwrk = max( maxwrk, 3*n + lwork_qgebrd ) + maxwrk = max( maxwrk, 3*n + lwork_qormbr ) + maxwrk = max( maxwrk, 3*n + lwork_qorgbr ) + maxwrk = max( maxwrk, bdspac ) + maxwrk = max( maxwrk, n*nrhs ) + minwrk = max( 3*n + mm, 3*n + nrhs, bdspac ) + maxwrk = max( minwrk, maxwrk ) + end if + if( n>m ) then + ! compute workspace needed for stdlib_qbdsqr + bdspac = max( 1, 5*m ) + minwrk = max( 3*m+nrhs, 3*m+n, bdspac ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows + ! compute space needed for stdlib_qgelqf + call stdlib_qgelqf( m, n, a, lda, dum(1), dum(1),-1, info ) + lwork_qgelqf=dum(1) + ! compute space needed for stdlib_qgebrd + call stdlib_qgebrd( m, m, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, & + info ) + lwork_qgebrd=dum(1) + ! compute space needed for stdlib_qormbr + call stdlib_qormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_qormbr=dum(1) + ! compute space needed for stdlib_qorgbr + call stdlib_qorgbr( 'P', m, m, m, a, lda, dum(1),dum(1), -1, info ) + lwork_qorgbr=dum(1) + ! compute space needed for stdlib_qormlq + call stdlib_qormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1),b, ldb, dum(1), -& + 1, info ) + lwork_qormlq=dum(1) + ! compute total workspace needed + maxwrk = m + lwork_qgelqf + maxwrk = max( maxwrk, m*m + 4*m + lwork_qgebrd ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_qormbr ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_qorgbr ) + maxwrk = max( maxwrk, m*m + m + bdspac ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + lwork_qormlq ) + else + ! path 2 - underdetermined + ! compute space needed for stdlib_qgebrd + call stdlib_qgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, & + info ) + lwork_qgebrd=dum(1) + ! compute space needed for stdlib_qormbr + call stdlib_qormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_qormbr=dum(1) + ! compute space needed for stdlib_qorgbr + call stdlib_qorgbr( 'P', m, n, m, a, lda, dum(1),dum(1), -1, info ) + lwork_qorgbr=dum(1) + maxwrk = 3*m + lwork_qgebrd + maxwrk = max( maxwrk, 3*m + lwork_qormbr ) + maxwrk = max( maxwrk, 3*m + lwork_qorgbr ) + maxwrk = max( maxwrk, bdspac ) + maxwrk = max( maxwrk, n*nrhs ) + end if + end if + maxwrk = max( minwrk, maxwrk ) + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_qlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + call stdlib_qlaset( 'F', minmn, 1, zero, zero, s, minmn ) + rank = 0 + go to 70 + end if + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! overdetermined case + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + info ) + ! multiply b by transpose(q) + ! (workspace: need n+nrhs, prefer n+nrhs*nb) + call stdlib_qormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + iwork ), lwork-iwork+1, info ) + ! zero out below r + if( n>1 )call stdlib_qlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) + call stdlib_qgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r + ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) + call stdlib_qormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in a + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + iwork = ie + n + ! perform bidiagonal qr iteration + ! multiply b by transpose of left singular vectors + ! compute right singular vectors in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, n, 0, nrhs, s, work( ie ), a, lda, dum,1, b, ldb, work( & + iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_qrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_qlaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors + ! (workspace: need n, prefer n*nrhs) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_qgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) + + call stdlib_qlacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_qgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1, i ),ldb, zero, work,& + n ) + call stdlib_qlacpy( 'G', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_qgemv( 'T', n, n, one, a, lda, b, 1, zero, work, 1 ) + call stdlib_qcopy( n, work, 1, b, 1 ) + end if + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & + lda + itau = 1 + iwork = m + 1 + ! compute a=l*q + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) + + il = iwork + ! copy l to work(il), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) + ie = il + ldwork*m + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(il) + ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) + call stdlib_qgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & + itaup ), work( iwork ),lwork-iwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l + ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_qormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( iwork ),lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in work(il) + ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & + lwork-iwork+1, info ) + iwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of l in work(il) and + ! multiplying b by transpose of left singular vectors + ! (workspace: need m*m+m+bdspac) + call stdlib_qbdsqr( 'U', m, m, 0, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& + ldb, work( iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_qrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_qlaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + iwork = ie + ! multiply b by right singular vectors of l in work(il) + ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) + if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1 ) then + call stdlib_qgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & + work( iwork ), ldb ) + call stdlib_qlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = ( lwork-iwork+1 ) / m + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_qgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1, i ), ldb,& + zero, work( iwork ), m ) + call stdlib_qlacpy( 'G', m, bl, work( iwork ), m, b( 1, i ),ldb ) + end do + else + call stdlib_qgemv( 'T', m, m, one, work( il ), ldwork, b( 1, 1 ),1, zero, work( & + iwork ), 1 ) + call stdlib_qcopy( m, work( iwork ), 1, b( 1, 1 ), 1 ) + end if + ! zero out below first m rows of b + call stdlib_qlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1 ), ldb ) + iwork = itau + m + ! multiply transpose(q) by b + ! (workspace: need m+nrhs, prefer m+nrhs*nb) + call stdlib_qormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& + , lwork-iwork+1, info ) + else + ! path 2 - remaining underdetermined cases + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_qgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors + ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) + call stdlib_qormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_qorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + iwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of a in a and + ! multiplying b by transpose of left singular vectors + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'L', m, n, 0, nrhs, s, work( ie ), a, lda, dum,1, b, ldb, work( & + iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_qrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_qlaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors of a + ! (workspace: need n, prefer n*nrhs) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_qgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) + + call stdlib_qlacpy( 'F', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_qgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1, i ),ldb, zero, work,& + n ) + call stdlib_qlacpy( 'F', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_qgemv( 'T', m, n, one, a, lda, b, 1, zero, work, 1 ) + call stdlib_qcopy( n, work, 1, b, 1 ) + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_qlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_qlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 70 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_qgelss + + !> DGELSY: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by orthogonal transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**T [ inv(T11)*Q1**T*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + + subroutine stdlib_qgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(qp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: imax = 1 + integer(ilp), parameter :: imin = 2 + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, & + nb3, nb4 + real(qp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, & + wsize + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + mn = min( m, n ) + ismin = mn + 1 + ismax = 2*mn + 1 + ! test the input arguments. + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_qlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + rank = 0 + go to 70 + end if + bnrm = stdlib_qlange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! compute qr factorization with column pivoting of a: + ! a * p = q * r + call stdlib_qgeqp3( m, n, a, lda, jpvt, work( 1 ), work( mn+1 ),lwork-mn, info ) + + wsize = mn + work( mn+1 ) + ! workspace: mn+2*n+nb*(n+1). + ! details of householder rotations stored in work(1:mn). + ! determine rank using incremental condition estimation + work( ismin ) = one + work( ismax ) = one + smax = abs( a( 1, 1 ) ) + smin = smax + if( abs( a( 1, 1 ) )==zero ) then + rank = 0 + call stdlib_qlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + go to 70 + else + rank = 1 + end if + 10 continue + if( rank DGEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by short wide LQ + !> factorization (DGELQ) + + pure subroutine stdlib_qgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + real(qp), intent(in) :: a(lda,*), t(*) + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * mb + mn = m + else + lw = m * mb + mn = n + end if + if( ( nb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, nb - k ) == 0 ) then + nblcks = ( mn - k ) / ( nb - k ) + else + nblcks = ( mn - k ) / ( nb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_qgemlqt( side, trans, m, n, k, mb, a, lda,t( 6 ), mb, c, ldc, work, info & + ) + else + call stdlib_qlamswlq( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),mb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_qgemlq + + !> DGEMLQT: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'T': Q**T C C Q**T + !> where Q is a real orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**T + !> generated using the compact WY representation as returned by DGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_qgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, mb, ldt + ! Array Arguments + real(qp), intent(in) :: v(ldv,*), t(ldt,*) + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( mb<1 .or. (mb>k .and. k>0)) then + info = -6 + else if( ldv DGEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (DGEQR) + + pure subroutine stdlib_qgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + real(qp), intent(in) :: a(lda,*), t(*) + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * nb + mn = m + else + lw = mb * nb + mn = n + end if + if( ( mb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, mb - k )==0 ) then + nblcks = ( mn - k ) / ( mb - k ) + else + nblcks = ( mn - k ) / ( mb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_qgemqrt( side, trans, m, n, k, nb, a, lda, t( 6 ),nb, c, ldc, work, info & + ) + else + call stdlib_qlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),nb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_qgemqr + + !> DGEMQRT: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'T': Q**T C C Q**T + !> where Q is a real orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**T + !> generated using the compact WY representation as returned by DGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_qgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, nb, ldt + ! Array Arguments + real(qp), intent(in) :: v(ldv,*), t(ldt,*) + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( nb<1 .or. (nb>k .and. k>0)) then + info = -6 + else if( ldv DGEQL2: computes a QL factorization of a real m by n matrix A: + !> A = Q * L. + + pure subroutine stdlib_qgeql2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEQLF: computes a QL factorization of a real M-by-N matrix A: + !> A = Q * L. + + pure subroutine stdlib_qgeqlf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_qlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h**t to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_qlarfb( 'LEFT', 'TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-1, & + n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_qgeql2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_qgeqlf + + !> DGEQP3: computes a QR factorization with column pivoting of a + !> matrix A: A*P = Q*R using Level 3 BLAS. + + pure subroutine stdlib_qgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: inb = 1 + integer(ilp), parameter :: inbmin = 2 + integer(ilp), parameter :: ixover = 3 + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, & + sminmn, sn, topbmn + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + ! ==================== + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 ) then + na = min( m, nfxd ) + ! cc call stdlib_qgeqr2( m, na, a, lda, tau, work, info ) + call stdlib_qgeqrf( m, na, a, lda, tau, work, lwork, info ) + iws = max( iws, int( work( 1 ),KIND=ilp) ) + if( na1 ) .and. ( nb=nbmin ) .and. ( nb DGEQR: computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_qgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'DGEQR ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'DGEQR ', ' ', m, n, 2, -1 ) + else + mb = m + nb = 1 + end if + if( mb>m .or. mb<=n ) mb = m + if( nb>min( m, n ) .or. nb<1 ) nb = 1 + mintsz = n + 5 + if( mb>n .and. m>n ) then + if( mod( m - n, mb - n )==0 ) then + nblcks = ( m - n ) / ( mb - n ) + else + nblcks = ( m - n ) / ( mb - n ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + lminws = .false. + if( ( tsize=n ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=m ) ) then + call stdlib_qgeqrt( m, n, nb, a, lda, t( 6 ), nb, work, info ) + else + call stdlib_qlatsqr( m, n, mb, nb, a, lda, t( 6 ), nb, work,lwork, info ) + end if + work( 1 ) = max( 1, nb*n ) + return + end subroutine stdlib_qgeqr + + !> DGEQR2: computes a QR factorization of a real m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + pure subroutine stdlib_qgeqr2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEQR2P: computes a QR factorization of a real m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + subroutine stdlib_qgeqr2p( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGEQRF: computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_qgeqrf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + k = min( m, n ) + info = 0 + nb = stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb DGEQR2P computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + subroutine stdlib_qgeqrfp( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb DGEQRT: computes a blocked QR factorization of a real M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_qgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk), parameter :: use_recursive_qr = .true. + integer(ilp) :: i, ib, iinfo, k + + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nb<1 .or. ( nb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda DGEQRT2: computes a QR factorization of a real M-by-N matrix A, + !> using the compact WY representation of Q. + + pure subroutine stdlib_qgeqrt2( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(qp) :: aii, alpha + ! Executable Statements + ! test the input arguments + info = 0 + if( n<0 ) then + info = -2 + else if( m t(i,1) + call stdlib_qlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,t( i, 1 ) ) + if( i DGEQRT3: recursively computes a QR factorization of a real M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_qgeqrt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, n1, n2, iinfo + ! Executable Statements + info = 0 + if( n < 0 ) then + info = -2 + else if( m < n ) then + info = -1 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, n ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DGEQRT3', -info ) + return + end if + if( n==1 ) then + ! compute householder transform when n=1 + call stdlib_qlarfg( m, a(1,1), a( min( 2, m ), 1 ), 1, t(1,1) ) + else + ! otherwise, split a into blocks... + n1 = n/2 + n2 = n-n1 + j1 = min( n1+1, n ) + i1 = min( n+1, m ) + ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_qgeqrt3( m, n1, a, lda, t, ldt, iinfo ) + ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)] + do j=1,n2 + do i=1,n1 + t( i, j+n1 ) = a( i, j+n1 ) + end do + end do + call stdlib_qtrmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1, j1 ), ldt ) + call stdlib_qgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1 ), lda,a( j1, j1 ), lda, & + one, t( 1, j1 ), ldt) + call stdlib_qtrmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1, j1 ), ldt ) + call stdlib_qgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1 ), lda,t( 1, j1 ), ldt, & + one, a( j1, j1 ), lda ) + call stdlib_qtrmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1, j1 ), ldt ) + do j=1,n2 + do i=1,n1 + a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 ) + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_qgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo ) + ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2 + do i=1,n1 + do j=1,n2 + t( i, j+n1 ) = (a( j+n1, i )) + end do + end do + call stdlib_qtrmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1, j1 ), & + ldt ) + call stdlib_qgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1 ), lda,a( i1, j1 ), lda, & + one, t( 1, j1 ), ldt ) + call stdlib_qtrmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1, j1 ), ldt ) + + call stdlib_qtrmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1, j1 ), & + ldt ) + ! y = (y1,y2); r = [ r1 a(1:n1,j1:n) ]; t = [t1 t3] + ! [ 0 r2 ] [ 0 t2] + end if + return + end subroutine stdlib_qgeqrt3 + + !> DGERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + + pure subroutine stdlib_qgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*) + real(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transt + integer(ilp) :: count, i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_qgetrs( trans, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + call stdlib_qaxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_qgetrs( transt, n, 1, af, ldaf, ipiv, work( n+1 ),n, info ) + + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_qgetrs( trans, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_qgerfs + + !> DGERQ2: computes an RQ factorization of a real m by n matrix A: + !> A = R * Q. + + pure subroutine stdlib_qgerq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DGERQF: computes an RQ factorization of a real M-by-N matrix A: + !> A = R * Q. + + pure subroutine stdlib_qgerqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_qlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_qlarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-& + k+i+ib-1, ib,a( m-k+i, 1 ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_qgerq2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_qgerqf + + !> DGESC2: solves a system of linear equations + !> A * X = scale* RHS + !> with a general N-by-N matrix A using the LU factorization with + !> complete pivoting computed by DGETC2. + + pure subroutine stdlib_qgesc2( n, a, lda, rhs, ipiv, jpiv, scale ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: rhs(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: bignum, eps, smlnum, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! set constant to control overflow + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! apply permutations ipiv to rhs + call stdlib_qlaswp( 1, rhs, lda, 1, n-1, ipiv, 1 ) + ! solve for l part + do i = 1, n - 1 + do j = i + 1, n + rhs( j ) = rhs( j ) - a( j, i )*rhs( i ) + end do + end do + ! solve for u part + scale = one + ! check for scaling + i = stdlib_iqamax( n, rhs, 1 ) + if( two*smlnum*abs( rhs( i ) )>abs( a( n, n ) ) ) then + temp = ( one / two ) / abs( rhs( i ) ) + call stdlib_qscal( n, temp, rhs( 1 ), 1 ) + scale = scale*temp + end if + do i = n, 1, -1 + temp = one / a( i, i ) + rhs( i ) = rhs( i )*temp + do j = i + 1, n + rhs( i ) = rhs( i ) - rhs( j )*( a( i, j )*temp ) + end do + end do + ! apply permutations jpiv to the solution (rhs) + call stdlib_qlaswp( 1, rhs, lda, 1, n-1, jpiv, -1 ) + return + end subroutine stdlib_qgesc2 + + !> DGESDD: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, optionally computing the left and right singular + !> vectors. If singular vectors are desired, it uses a + !> divide-and-conquer algorithm. + !> The SVD is written + !> A = U * SIGMA * transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + !> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**T, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_qgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs + integer(ilp) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & + ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl + integer(ilp) :: lwork_qgebrd_mn, lwork_qgebrd_mm, lwork_qgebrd_nn, lwork_qgelqf_mn, & + lwork_qgeqrf_mn, lwork_qorgbr_p_mm, lwork_qorgbr_q_nn, lwork_qorglq_mn, & + lwork_qorglq_nn, lwork_qorgqr_mm, lwork_qorgqr_mn, lwork_qormbr_prt_mm, & + lwork_qormbr_qln_mm, lwork_qormbr_prt_mn, lwork_qormbr_qln_mn, lwork_qormbr_prt_nn, & + lwork_qormbr_qln_nn + real(qp) :: anrm, bignum, eps, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: int,max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntqa = stdlib_lsame( jobz, 'A' ) + wntqs = stdlib_lsame( jobz, 'S' ) + wntqas = wntqa .or. wntqs + wntqo = stdlib_lsame( jobz, 'O' ) + wntqn = stdlib_lsame( jobz, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n .and. ldvt=n .and. minmn>0 ) then + ! compute space needed for stdlib_qbdsdc + if( wntqn ) then + ! stdlib_qbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_qp) + ! keep 7*n for backwards compatibility. + bdspac = 7*n + else + bdspac = 3*n*n + 4*n + end if + ! compute space preferred for each routine + call stdlib_qgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_qgebrd_mn = int( dum(1),KIND=ilp) + call stdlib_qgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_qgebrd_nn = int( dum(1),KIND=ilp) + call stdlib_qgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_qgeqrf_mn = int( dum(1),KIND=ilp) + call stdlib_qorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,ierr ) + lwork_qorgbr_q_nn = int( dum(1),KIND=ilp) + call stdlib_qorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_qorgqr_mm = int( dum(1),KIND=ilp) + call stdlib_qorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_qorgqr_mn = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'P', 'R', 'T', n, n, n, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_qormbr_prt_nn = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_qormbr_qln_nn = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_qormbr_qln_mn = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_qormbr_qln_mm = int( dum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + wrkbl = n + lwork_qgeqrf_mn + wrkbl = max( wrkbl, 3*n + lwork_qgebrd_nn ) + maxwrk = max( wrkbl, bdspac + n ) + minwrk = bdspac + n + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + wrkbl = n + lwork_qgeqrf_mn + wrkbl = max( wrkbl, n + lwork_qorgqr_mn ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + 2*n*n + minwrk = bdspac + 2*n*n + 3*n + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + wrkbl = n + lwork_qgeqrf_mn + wrkbl = max( wrkbl, n + lwork_qorgqr_mn ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + n*n + minwrk = bdspac + n*n + 3*n + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + wrkbl = n + lwork_qgeqrf_mn + wrkbl = max( wrkbl, n + lwork_qorgqr_mm ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + n*n + minwrk = n*n + max( 3*n + bdspac, n + m ) + end if + else + ! path 5 (m >= n, but not much larger) + wrkbl = 3*n + lwork_qgebrd_mn + if( wntqn ) then + ! path 5n (m >= n, jobz='n') + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + else if( wntqo ) then + ! path 5o (m >= n, jobz='o') + wrkbl = max( wrkbl, 3*n + lwork_qormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_qln_mn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + m*n + minwrk = 3*n + max( m, n*n + bdspac ) + else if( wntqs ) then + ! path 5s (m >= n, jobz='s') + wrkbl = max( wrkbl, 3*n + lwork_qormbr_qln_mn ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_prt_nn ) + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + else if( wntqa ) then + ! path 5a (m >= n, jobz='a') + wrkbl = max( wrkbl, 3*n + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*n + lwork_qormbr_prt_nn ) + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + end if + end if + else if( minmn>0 ) then + ! compute space needed for stdlib_qbdsdc + if( wntqn ) then + ! stdlib_qbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_qp) + ! keep 7*n for backwards compatibility. + bdspac = 7*m + else + bdspac = 3*m*m + 4*m + end if + ! compute space preferred for each routine + call stdlib_qgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_qgebrd_mn = int( dum(1),KIND=ilp) + call stdlib_qgebrd( m, m, a, m, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_qgebrd_mm = int( dum(1),KIND=ilp) + call stdlib_qgelqf( m, n, a, m, dum(1), dum(1), -1, ierr ) + lwork_qgelqf_mn = int( dum(1),KIND=ilp) + call stdlib_qorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr ) + lwork_qorglq_nn = int( dum(1),KIND=ilp) + call stdlib_qorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr ) + lwork_qorglq_mn = int( dum(1),KIND=ilp) + call stdlib_qorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr ) + lwork_qorgbr_p_mm = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'P', 'R', 'T', m, m, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_qormbr_prt_mm = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'P', 'R', 'T', m, n, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_qormbr_prt_mn = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'P', 'R', 'T', n, n, m, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_qormbr_prt_nn = int( dum(1),KIND=ilp) + call stdlib_qormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_qormbr_qln_mm = int( dum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + wrkbl = m + lwork_qgelqf_mn + wrkbl = max( wrkbl, 3*m + lwork_qgebrd_mm ) + maxwrk = max( wrkbl, bdspac + m ) + minwrk = bdspac + m + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + wrkbl = m + lwork_qgelqf_mn + wrkbl = max( wrkbl, m + lwork_qorglq_mn ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + 2*m*m + minwrk = bdspac + 2*m*m + 3*m + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + wrkbl = m + lwork_qgelqf_mn + wrkbl = max( wrkbl, m + lwork_qorglq_mn ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*m + minwrk = bdspac + m*m + 3*m + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + wrkbl = m + lwork_qgelqf_mn + wrkbl = max( wrkbl, m + lwork_qorglq_nn ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*m + minwrk = m*m + max( 3*m + bdspac, m + n ) + end if + else + ! path 5t (n > m, but not much larger) + wrkbl = 3*m + lwork_qgebrd_mn + if( wntqn ) then + ! path 5tn (n > m, jobz='n') + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + else if( wntqo ) then + ! path 5to (n > m, jobz='o') + wrkbl = max( wrkbl, 3*m + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_prt_mn ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*n + minwrk = 3*m + max( n, m*m + bdspac ) + else if( wntqs ) then + ! path 5ts (n > m, jobz='s') + wrkbl = max( wrkbl, 3*m + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_prt_mn ) + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + else if( wntqa ) then + ! path 5ta (n > m, jobz='a') + wrkbl = max( wrkbl, 3*m + lwork_qormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_qormbr_prt_nn ) + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + end if + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = stdlib_qroundup_lwork( maxwrk ) + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + n + ! compute a=q*r + ! workspace: need n [tau] + n [work] + ! workspace: prefer n [tau] + n*nb [work] + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! zero out below r + call stdlib_qlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! workspace: need 3*n [e, tauq, taup] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_qgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + nwork = ie + n + ! perform bidiagonal svd, computing singular values only + ! workspace: need n [e] + bdspac + call stdlib_qbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 2 (m >> n, jobz = 'o') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is ldwrkr by n + if( lwork >= lda*n + n*n + 3*n + bdspac ) then + ldwrkr = lda + else + ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n + end if + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_qlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),ldwrkr ) + ! generate q in a + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_qgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! work(iu) is n by n + iu = nwork + nwork = iu + n*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & + idum, work( nwork ), iwork,info ) + ! overwrite work(iu) by left singular vectors of r + ! and vt by right singular vectors of r + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iu ), n, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in work(ir) and copying to a + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] + do i = 1, m, ldwrkr + chunk = min( m - i + 1, ldwrkr ) + call stdlib_qgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( iu ), & + n, zero, work( ir ),ldwrkr ) + call stdlib_qlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is n by n + ldwrkr = n + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_qlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),ldwrkr ) + ! generate q in a + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_qgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagoal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of r and vt + ! by right singular vectors of r + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! workspace: need n*n [r] + call stdlib_qlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) + call stdlib_qgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& + ldu ) + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + itau = iu + ldwrku*n + nwork = itau + n + ! compute a=q*r, copying result to u + ! workspace: need n*n [u] + n [tau] + n [work] + ! workspace: prefer n*n [u] + n [tau] + n*nb [work] + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! workspace: need n*n [u] + n [tau] + m [work] + ! workspace: prefer n*n [u] + n [tau] + m*nb [work] + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ! produce r in a, zeroing out other entries + call stdlib_qlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_qgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & + idum, work( nwork ), iwork,info ) + ! overwrite work(iu) by left singular vectors of r and vt + ! by right singular vectors of r + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & + ldwrku,work( nwork ), lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! workspace: need n*n [u] + call stdlib_qgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& + lda ) + ! copy left singular vectors of a from a to u + call stdlib_qlacpy( 'F', m, n, a, lda, u, ldu ) + end if + else + ! m < mnthr + ! path 5 (m >= n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! workspace: need 3*n [e, tauq, taup] + m [work] + ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] + call stdlib_qgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5n (m >= n, jobz='n') + ! perform bidiagonal svd, only computing singular values + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 5o (m >= n, jobz='o') + iu = nwork + if( lwork >= m*n + 3*n + bdspac ) then + ! work( iu ) is m by n + ldwrku = m + nwork = iu + ldwrku*n + call stdlib_qlaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) + ! ir is unused; silence compile warnings + ir = -1 + else + ! work( iu ) is n by n + ldwrku = n + nwork = iu + ldwrku*n + ! work(ir) is ldwrkr by n + ir = nwork + ldwrkr = ( lwork - n*n - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & + dum, idum, work( nwork ),iwork, info ) + ! overwrite vt by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_qormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + if( lwork >= m*n + 3*n + bdspac ) then + ! path 5o-fast + ! overwrite work(iu) by left singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & + ), ldwrku,work( nwork ), lwork - nwork + 1, ierr ) + ! copy left singular vectors of a from work(iu) to a + call stdlib_qlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) + else + ! path 5o-slow + ! generate q in a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_qorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & + lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of + ! bidiagonal matrix in work(iu), storing result in + ! work(ir) and copying to a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] + do i = 1, m, ldwrkr + chunk = min( m - i + 1, ldwrkr ) + call stdlib_qgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( iu )& + , ldwrku, zero,work( ir ), ldwrkr ) + call stdlib_qlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + end if + else if( wntqs ) then + ! path 5s (m >= n, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_qlaset( 'F', m, n, zero, zero, u, ldu ) + call stdlib_qbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + else if( wntqa ) then + ! path 5a (m >= n, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_qlaset( 'F', m, m, zero, zero, u, ldu ) + call stdlib_qbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! set the right corner of u to identity matrix + if( m>n ) then + call stdlib_qlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) + end if + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + m [work] + ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + m + ! compute a=l*q + ! workspace: need m [tau] + m [work] + ! workspace: prefer m [tau] + m*nb [work] + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! zero out above l + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! workspace: need 3*m [e, tauq, taup] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_qgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + nwork = ie + m + ! perform bidiagonal svd, computing singular values only + ! workspace: need m [e] + bdspac + call stdlib_qbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ! work(il) is m by m; it is later resized to m by chunk for gemm + il = ivt + m*m + if( lwork >= m*n + m*m + 3*m + bdspac ) then + ldwrkl = m + chunk = n + else + ldwrkl = m + chunk = ( lwork - m*m ) / m + end if + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy l to work(il), zeroing about above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_qlaset( 'U', m - 1, m - 1, zero, zero,work( il + ldwrkl ), ldwrkl & + ) + ! generate q in a + ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_qgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u, and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & + idum, work( nwork ),iwork, info ) + ! overwrite u by left singular vectors of l and work(ivt) + ! by right singular vectors of l + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & + work( ivt ), m,work( nwork ), lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(ivt) by q + ! in a, storing result in work(il) and copying to a + ! workspace: need m*m [vt] + m*m [l] + ! workspace: prefer m*m [vt] + m*n [l] + ! at this point, l is resized as m by chunk. + do i = 1, n, chunk + blk = min( n - i + 1, chunk ) + call stdlib_qgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1, i ), lda,& + zero, work( il ), ldwrkl ) + call stdlib_qlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + il = 1 + ! work(il) is m by m + ldwrkl = m + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! workspace: need m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [l] + m [tau] + m*nb [work] + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy l to work(il), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_qlaset( 'U', m - 1, m - 1, zero, zero,work( il + ldwrkl ), ldwrkl & + ) + ! generate q in a + ! workspace: need m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [l] + m [tau] + m*nb [work] + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(iu). + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_qgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of l and vt + ! by right singular vectors of l + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(il) by + ! q in a, storing result in vt + ! workspace: need m*m [l] + call stdlib_qlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & + vt, ldvt ) + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ldwkvt = m + itau = ivt + ldwkvt*m + nwork = itau + m + ! compute a=l*q, copying result to vt + ! workspace: need m*m [vt] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! workspace: need m*m [vt] + m [tau] + n [work] + ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ! produce l in a, zeroing out other entries + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_qgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & + dum, idum,work( nwork ), iwork, info ) + ! overwrite u by left singular vectors of l and work(ivt) + ! by right singular vectors of l + ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] + ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& + ldwkvt,work( nwork ), lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(ivt) by + ! q in vt, storing result in a + ! workspace: need m*m [vt] + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& + a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_qlacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else + ! n < mnthr + ! path 5t (n > m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! workspace: need 3*m [e, tauq, taup] + n [work] + ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] + call stdlib_qgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5tn (n > m, jobz='n') + ! perform bidiagonal svd, only computing singular values + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_qbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 5to (n > m, jobz='o') + ldwkvt = m + ivt = nwork + if( lwork >= m*n + 3*m + bdspac ) then + ! work( ivt ) is m by n + call stdlib_qlaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) + nwork = ivt + ldwkvt*n + ! il is unused; silence compile warnings + il = -1 + else + ! work( ivt ) is m by m + nwork = ivt + ldwkvt*m + il = nwork + ! work(il) is m by chunk + chunk = ( lwork - m*m - 3*m ) / m + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac + call stdlib_qbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & + dum, idum,work( nwork ), iwork, info ) + ! overwrite u by left singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + if( lwork >= m*n + 3*m + bdspac ) then + ! path 5to-fast + ! overwrite work(ivt) by left singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] + call stdlib_qormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & + ivt ), ldwkvt,work( nwork ), lwork - nwork + 1, ierr ) + ! copy right singular vectors of a from work(ivt) to a + call stdlib_qlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) + else + ! path 5to-slow + ! generate p**t in a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] + call stdlib_qorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & + lwork - nwork + 1, ierr ) + ! multiply q in a by right singular vectors of + ! bidiagonal matrix in work(ivt), storing result in + ! work(il) and copying to a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] + do i = 1, n, chunk + blk = min( n - i + 1, chunk ) + call stdlib_qgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1, & + i ), lda, zero,work( il ), m ) + call stdlib_qlacpy( 'F', m, blk, work( il ), m, a( 1, i ),lda ) + end do + end if + else if( wntqs ) then + ! path 5ts (n > m, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_qlaset( 'F', m, n, zero, zero, vt, ldvt ) + call stdlib_qbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + else if( wntqa ) then + ! path 5ta (n > m, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_qlaset( 'F', n, n, zero, zero, vt, ldvt ) + call stdlib_qbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! set the right corner of vt to identity matrix + if( n>m ) then + call stdlib_qlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) + end if + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + n [work] + ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] + call stdlib_qormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_qormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( anrm DGESV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_qgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( lda DGESVD: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + !> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**T, not V. + + subroutine stdlib_qgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu, jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& + wntvs + integer(ilp) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & + iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & + wrkbl + integer(ilp) :: lwork_qgeqrf, lwork_qorgqr_n, lwork_qorgqr_m, lwork_qgebrd, & + lwork_qorgbr_p, lwork_qorgbr_q, lwork_qgelqf, lwork_qorglq_n, lwork_qorglq_m + real(qp) :: anrm, bignum, eps, smlnum + ! Local Arrays + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntua = stdlib_lsame( jobu, 'A' ) + wntus = stdlib_lsame( jobu, 'S' ) + wntuas = wntua .or. wntus + wntuo = stdlib_lsame( jobu, 'O' ) + wntun = stdlib_lsame( jobu, 'N' ) + wntva = stdlib_lsame( jobvt, 'A' ) + wntvs = stdlib_lsame( jobvt, 'S' ) + wntvas = wntva .or. wntvs + wntvo = stdlib_lsame( jobvt, 'O' ) + wntvn = stdlib_lsame( jobvt, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then + info = -1 + else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda=n .and. minmn>0 ) then + ! compute space needed for stdlib_qbdsqr + mnthr = stdlib_ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 ) + bdspac = 5*n + ! compute space needed for stdlib_qgeqrf + call stdlib_qgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_qgeqrf = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qorgqr + call stdlib_qorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_qorgqr_n = int( dum(1),KIND=ilp) + call stdlib_qorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_qorgqr_m = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qgebrd + call stdlib_qgebrd( n, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_qgebrd = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qorgbr p + call stdlib_qorgbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_p = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qorgbr q + call stdlib_qorgbr( 'Q', n, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_q = int( dum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + maxwrk = n + lwork_qgeqrf + maxwrk = max( maxwrk, 3*n + lwork_qgebrd ) + if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3*n + lwork_qorgbr_p ) + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 4*n, bdspac ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( n*n + wrkbl, n*n + m*n + n ) + minwrk = max( 3*n + m, bdspac ) + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or + ! 'a') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( n*n + wrkbl, n*n + m*n + n ) + minwrk = max( 3*n + m, bdspac ) + else if( wntus .and. wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntus .and. wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntus .and. wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' or + ! 'a') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_n ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntua .and. wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_m ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntua .and. wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_m ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + else if( wntua .and. wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' or + ! 'a') + wrkbl = n + lwork_qgeqrf + wrkbl = max( wrkbl, n + lwork_qorgqr_m ) + wrkbl = max( wrkbl, 3*n + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_q ) + wrkbl = max( wrkbl, 3*n + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n + m, bdspac ) + end if + else + ! path 10 (m at least n, but not much larger) + call stdlib_qgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_qgebrd = int( dum(1),KIND=ilp) + maxwrk = 3*n + lwork_qgebrd + if( wntus .or. wntuo ) then + call stdlib_qorgbr( 'Q', m, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_q = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*n + lwork_qorgbr_q ) + end if + if( wntua ) then + call stdlib_qorgbr( 'Q', m, m, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_q = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*n + lwork_qorgbr_q ) + end if + if( .not.wntvn ) then + maxwrk = max( maxwrk, 3*n + lwork_qorgbr_p ) + end if + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 3*n + m, bdspac ) + end if + else if( minmn>0 ) then + ! compute space needed for stdlib_qbdsqr + mnthr = stdlib_ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 ) + bdspac = 5*m + ! compute space needed for stdlib_qgelqf + call stdlib_qgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_qgelqf = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qorglq + call stdlib_qorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr ) + lwork_qorglq_n = int( dum(1),KIND=ilp) + call stdlib_qorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr ) + lwork_qorglq_m = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qgebrd + call stdlib_qgebrd( m, m, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_qgebrd = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qorgbr p + call stdlib_qorgbr( 'P', m, m, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_p = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_qorgbr q + call stdlib_qorgbr( 'Q', m, m, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_q = int( dum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + maxwrk = m + lwork_qgelqf + maxwrk = max( maxwrk, 3*m + lwork_qgebrd ) + if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3*m + lwork_qorgbr_q ) + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 4*m, bdspac ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( m*m + wrkbl, m*m + m*n + m ) + minwrk = max( 3*m + n, bdspac ) + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', + ! jobvt='o') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( m*m + wrkbl, m*m + m*n + m ) + minwrk = max( 3*m + n, bdspac ) + else if( wntvs .and. wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntvs .and. wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntvs .and. wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_m ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntva .and. wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_n ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntva .and. wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_n ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + else if( wntva .and. wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + wrkbl = m + lwork_qgelqf + wrkbl = max( wrkbl, m + lwork_qorglq_n ) + wrkbl = max( wrkbl, 3*m + lwork_qgebrd ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_p ) + wrkbl = max( wrkbl, 3*m + lwork_qorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m + n, bdspac ) + end if + else + ! path 10t(n greater than m, but not much larger) + call stdlib_qgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_qgebrd = int( dum(1),KIND=ilp) + maxwrk = 3*m + lwork_qgebrd + if( wntvs .or. wntvo ) then + ! compute space needed for stdlib_qorgbr p + call stdlib_qorgbr( 'P', m, n, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_p = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*m + lwork_qorgbr_p ) + end if + if( wntva ) then + call stdlib_qorgbr( 'P', n, n, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_qorgbr_p = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*m + lwork_qorgbr_p ) + end if + if( .not.wntun ) then + maxwrk = max( maxwrk, 3*m + lwork_qorgbr_q ) + end if + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 3*m + n, bdspac ) + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + ! no left singular vectors to be computed + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out below r + if( n > 1 ) then + call stdlib_qlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + ncvt = 0 + if( wntvo .or. wntvas ) then + ! if right singular vectors desired, generate p'. + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + ncvt = n + end if + iwork = ie + n + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a if desired + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,dum, 1, dum, 1, & + work( iwork ), info ) + ! if right singular vectors desired in vt, copy them there + if( wntvas )call stdlib_qlacpy( 'F', n, n, a, lda, vt, ldvt ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + ! n left singular vectors to be overwritten on a and + ! no right singular vectors to be computed + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then + ! work(iu) is lda by n, work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then + ! work(iu) is lda by n, work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n, work(ir) is n by n + ldwrku = ( lwork-n*n-n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to work(ir) and zero out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n + bdspac) + call stdlib_qbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + iu = ie + n + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_qgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( ir )& + , ldwrkr, zero,work( iu ), ldwrku ) + call stdlib_qlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) + call stdlib_qgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing a + ! (workspace: need 4*n, prefer 3*n + n*nb) + call stdlib_qorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,a, lda, dum, 1, & + work( iwork ), info ) + end if + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n and work(ir) is n by n + ldwrku = ( lwork-n*n-n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_qlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt, copying result to work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (workspace: need n*n + 4*n-1, prefer n*n + 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) and computing right + ! singular vectors of r in vt + ! (workspace: need n*n + bdspac) + call stdlib_qbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + iu = ie + n + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_qgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( ir )& + , ldwrkr, zero,work( iu ), ldwrku ) + call stdlib_qlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_qlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in a by left vectors bidiagonalizing r + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& + work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,a, lda, dum, & + 1, work( iwork ), info ) + end if + else if( wntus ) then + if( wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + ! n left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n + bdspac) + call stdlib_qbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,1, work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! (workspace: need n*n) + call stdlib_qgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & + zero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,1, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda + n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*n*n + 4*n, + ! prefer 2*n*n+3*n+2*n*nb) + call stdlib_qgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need 2*n*n + 4*n-1, + ! prefer 2*n*n+3*n+(n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (workspace: need 2*n*n + bdspac) + call stdlib_qbdsqr( 'U', n, n, n, 0, s, work( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, dum, 1, work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (workspace: need n*n) + call stdlib_qgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & + zero, u, ldu ) + ! copy right singular vectors of r to a + ! (workspace: need n*n) + call stdlib_qlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in a + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, n, m, 0, s, work( ie ), a,lda, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' + ! or 'a') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need n*n + 4*n-1, + ! prefer n*n+3*n+(n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (workspace: need n*n + bdspac) + call stdlib_qbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,ldvt, work( iu ),& + ldwrku, dum, 1,work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (workspace: need n*n) + call stdlib_qgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & + zero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_qlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt & + ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + else if( wntua ) then + if( wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + ! m left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! copy r to work(ir), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) + + ! generate q in u + ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n + bdspac) + call stdlib_qbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,1, work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(ir), storing result in a + ! (workspace: need n*n) + call stdlib_qgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_qlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n + m, prefer n + m*nb) + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,1, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda + n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n*n + n + m, prefer 2*n*n + n + m*nb) + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*n*n + 4*n, + ! prefer 2*n*n+3*n+2*n*nb) + call stdlib_qgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need 2*n*n + 4*n-1, + ! prefer 2*n*n+3*n+(n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (workspace: need 2*n*n + bdspac) + call stdlib_qbdsqr( 'U', n, n, n, 0, s, work( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, dum, 1, work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (workspace: need n*n) + call stdlib_qgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_qlacpy( 'F', m, n, a, lda, u, ldu ) + ! copy right singular vectors of r from work(ir) to a + call stdlib_qlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n + m, prefer n + m*nb) + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in a + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, n, m, 0, s, work( ie ), a,lda, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' + ! or 'a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) + call stdlib_qorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need n*n + 4*n-1, + ! prefer n*n+3*n+(n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (workspace: need n*n + bdspac) + call stdlib_qbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,ldvt, work( iu ),& + ldwrku, dum, 1,work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (workspace: need n*n) + call stdlib_qgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_qlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n + n*nb) + call stdlib_qgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n + m, prefer n + m*nb) + call stdlib_qorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r from a to vt, zeroing out below it + call stdlib_qlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_qlaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt & + ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n + 2*n*nb) + call stdlib_qgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (workspace: need 3*n + m, prefer 3*n + m*nb) + call stdlib_qormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + end if + else + ! m < mnthr + ! path 10 (m at least n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) + call stdlib_qgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (workspace: need 3*n + ncu, prefer 3*n + ncu*nb) + call stdlib_qlacpy( 'L', m, n, a, lda, u, ldu ) + if( wntus )ncu = n + if( wntua )ncu = m + call stdlib_qorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_qorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (workspace: need 4*n, prefer 3*n + n*nb) + call stdlib_qorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) + call stdlib_qorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + iwork = ie + n + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,ldvt, u, ldu, dum,& + 1, work( iwork ), info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,u, ldu, dum, & + 1, work( iwork ), info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,ldvt, a, lda, dum,& + 1, work( iwork ), info ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + ! no right singular vectors to be computed + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out above l + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuo .or. wntuas ) then + ! if left singular vectors desired, generate q + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + iwork = ie + m + nru = 0 + if( wntuo .or. wntuas )nru = m + ! perform bidiagonal qr iteration, computing left singular + ! vectors of a in a if desired + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,lda, dum, 1, & + work( iwork ), info ) + ! if left singular vectors desired in u, copy them there + if( wntuas )call stdlib_qlacpy( 'F', m, m, a, lda, u, ldu ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! no left singular vectors to be computed + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m-m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to work(ir) and zero out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) + + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l + ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, dum,& + 1, dum, 1,work( iwork ), info ) + iu = ie + m + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (workspace: need m*m + 2*m, prefer m*m + m*n + m) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_qgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1, i & + ), lda, zero,work( iu ), ldwrku ) + call stdlib_qlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) + call stdlib_qgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,dum, 1, dum, 1, & + work( iwork ), info ) + end if + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m-m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing about above it + call stdlib_qlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u, copying result to work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) + ! generate right vectors bidiagonalizing l in work(ir) + ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u, and computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, m, 0, s, work( ie ),work( ir ), ldwrkr, u, & + ldu, dum, 1,work( iwork ), info ) + iu = ie + m + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (workspace: need m*m + 2*m, prefer m*m + m*n + m)) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_qgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1, i & + ), lda, zero,work( iu ), ldwrku ) + call stdlib_qlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ! generate q in a + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in a + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,u, ldu, dum, 1, & + work( iwork ), info ) + end if + else if( wntvs ) then + if( wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + ! m right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(ir), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & + ) + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l in + ! work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, & + dum, 1, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & + zero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy result to vt + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,ldvt, dum, 1, & + dum, 1, work( iwork ),info ) + end if + else if( wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda + m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out below it + call stdlib_qlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ! generate q in a + ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*m*m + 4*m, + ! prefer 2*m*m+3*m+2*m*nb) + call stdlib_qgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need 2*m*m + 4*m-1, + ! prefer 2*m*m+3*m+(m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (workspace: need 2*m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, dum, 1, work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & + zero, vt, ldvt ) + ! copy left singular vectors of l to a + ! (workspace: need m*m) + call stdlib_qlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors of l in a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, compute left + ! singular vectors of a in a and compute right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, a, lda, & + dum, 1, work( iwork ),info ) + end if + else if( wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is lda by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ! generate q in a + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need m*m + 4*m-1, + ! prefer m*m+3*m+(m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (workspace: need m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + u, ldu, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & + zero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + else if( wntva ) then + if( wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + ! n right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( n + m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! copy l to work(ir), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & + ) + ! generate q in vt + ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need m*m + 4*m-1, + ! prefer m*m+3*m+(m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, & + dum, 1, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_qlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m + n, prefer m + n*nb) + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,ldvt, dum, 1, & + dum, 1, work( iwork ),info ) + end if + else if( wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( n + m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda + m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m*m + m + n, prefer 2*m*m + m + n*nb) + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*m*m + 4*m, + ! prefer 2*m*m+3*m+2*m*nb) + call stdlib_qgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need 2*m*m + 4*m-1, + ! prefer 2*m*m+3*m+(m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (workspace: need 2*m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, dum, 1, work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_qlacpy( 'F', m, n, a, lda, vt, ldvt ) + ! copy left singular vectors of a from work(ir) to a + call stdlib_qlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m + n, prefer m + n*nb) + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, a, lda, & + dum, 1, work( iwork ),info ) + end if + else if( wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( n + m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by m + ldwrku = lda + else + ! work(iu) is m by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_qlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) + call stdlib_qorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (workspace: need m*m + bdspac) + call stdlib_qbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + u, ldu, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_qgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_qlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m + m*nb) + call stdlib_qgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m + n, prefer m + n*nb) + call stdlib_qorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_qlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_qlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m + 2*m*nb) + call stdlib_qgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (workspace: need 3*m + n, prefer 3*m + n*nb) + call stdlib_qormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + end if + else + ! n < mnthr + ! path 10t(n greater than m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) + call stdlib_qgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) + call stdlib_qlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_qorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (workspace: need 3*m + nrvt, prefer 3*m + nrvt*nb) + call stdlib_qlacpy( 'U', m, n, a, lda, vt, ldvt ) + if( wntva )nrvt = n + if( wntvs )nrvt = m + call stdlib_qorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) + call stdlib_qorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m + m*nb) + call stdlib_qorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + iwork = ie + m + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,ldvt, u, ldu, dum,& + 1, work( iwork ), info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,u, ldu, dum, & + 1, work( iwork ), info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_qbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,ldvt, a, lda, dum,& + 1, work( iwork ), info ) + end if + end if + end if + ! if stdlib_qbdsqr failed to converge, copy unconverged superdiagonals + ! to work( 2:minmn ) + if( info/=0 ) then + if( ie>2 ) then + do i = 1, minmn - 1 + work( i+1 ) = work( i+ie-1 ) + end do + end if + if( ie<2 ) then + do i = minmn - 1, 1, -1 + work( i+1 ) = work( i+ie-1 ) + end do + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1, work( 2 ),minmn, ierr ) + if( anrm DGESVDQ: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + subroutine stdlib_qgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) + ! Scalar Arguments + character, intent(in) :: joba, jobp, jobr, jobu, jobv + integer(ilp), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork + integer(ilp), intent(out) :: numrank, info + integer(ilp), intent(inout) :: lwork + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: u(ldu,*), v(ldv,*), work(*) + real(qp), intent(out) :: s(*), rwork(*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: ierr, iwoff, nr, n1, optratio, p, q + integer(ilp) :: lwcon, lwqp3, lwrk_qgelqf, lwrk_qgesvd, lwrk_qgesvd2, lwrk_qgeqp3, & + lwrk_qgeqrf, lwrk_qormlq, lwrk_qormqr, lwrk_qormqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & + lworq, lworq2, lworlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk + logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& + rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr + real(qp) :: big, epsln, rtmp, sconda, sfmin + ! Local Arrays + real(qp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! test the input arguments + wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) + wntur = stdlib_lsame( jobu, 'R' ) + wntua = stdlib_lsame( jobu, 'A' ) + wntuf = stdlib_lsame( jobu, 'F' ) + lsvc0 = wntus .or. wntur .or. wntua + lsvec = lsvc0 .or. wntuf + dntwu = stdlib_lsame( jobu, 'N' ) + wntvr = stdlib_lsame( jobv, 'R' ) + wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) + rsvec = wntvr .or. wntva + dntwv = stdlib_lsame( jobv, 'N' ) + accla = stdlib_lsame( joba, 'A' ) + acclm = stdlib_lsame( joba, 'M' ) + conda = stdlib_lsame( joba, 'E' ) + acclh = stdlib_lsame( joba, 'H' ) .or. conda + rowprm = stdlib_lsame( jobp, 'P' ) + rtrans = stdlib_lsame( jobr, 'T' ) + if ( rowprm ) then + if ( conda ) then + iminwrk = max( 1, n + m - 1 + n ) + else + iminwrk = max( 1, n + m - 1 ) + end if + rminwrk = max( 2, m ) + else + if ( conda ) then + iminwrk = max( 1, n + n ) + else + iminwrk = max( 1, n ) + end if + rminwrk = 2 + end if + lquery = (liwork == -1 .or. lwork == -1 .or. lrwork == -1) + info = 0 + if ( .not. ( accla .or. acclm .or. acclh ) ) then + info = -1 + else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = -2 + else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then + info = -3 + else if ( .not.( lsvec .or. dntwu ) ) then + info = -4 + else if ( wntur .and. wntva ) then + info = -5 + else if ( .not.( rsvec .or. dntwv )) then + info = -5 + else if ( m<0 ) then + info = -6 + else if ( ( n<0 ) .or. ( n>m ) ) then + info = -7 + else if ( lda big / sqrt(real(m,KIND=qp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_qlascl('G',0,0,sqrt(real(m,KIND=qp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + call stdlib_qlaswp( n, a, lda, 1, m-1, iwork(n+1), 1 ) + end if + ! .. at this stage, preemptive scaling is done only to avoid column + ! norms overflows during the qr factorization. the svd procedure should + ! have its own scaling to save the singular values from overflows and + ! underflows. that depends on the svd procedure. + if ( .not.rowprm ) then + rtmp = stdlib_qlange( 'M', m, n, a, lda, rdummy ) + if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then + info = -8 + call stdlib_xerbla( 'DGESVDQ', -info ) + return + end if + if ( rtmp > big / sqrt(real(m,KIND=qp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_qlascl('G',0,0, sqrt(real(m,KIND=qp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + end if + ! Qr Factorization With Column Pivoting + ! a * p = q * [ r ] + ! [ 0 ] + do p = 1, n + ! All Columns Are Free Columns + iwork(p) = 0 + end do + call stdlib_qgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,ierr ) + ! if the user requested accuracy level allows truncation in the + ! computed upper triangular factor, the matrix r is examined and, + ! if possible, replaced with its leading upper trapezoidal part. + epsln = stdlib_qlamch('E') + sfmin = stdlib_qlamch('S') + ! small = sfmin / epsln + nr = n + if ( accla ) then + ! standard absolute error bound suffices. all sigma_i with + ! sigma_i < n*eps*||a||_f are flushed to zero. this is an + ! aggressive enforcement of lower numerical rank by introducing a + ! backward error of the order of n*eps*||a||_f. + nr = 1 + rtmp = sqrt(real(n,KIND=qp))*epsln + do p = 2, n + if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 + nr = nr + 1 + end do + 3002 continue + elseif ( acclm ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r is used as the criterion for being + ! close-to-rank-deficient. the threshold is set to epsln=stdlib_qlamch('e'). + ! [[this can be made more flexible by replacing this hard-coded value + ! with a user specified threshold.]] also, the values that underflow + ! will be truncated. + nr = 1 + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go & + to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! Rrqr Not Authorized To Determine Numerical Rank Except In The + ! obvious case of zero pivots. + ! .. inspect r for exact zeros on the diagonal; + ! r(i,i)=0 => r(i:n,i:n)=0. + nr = 1 + do p = 2, n + if ( abs(a(p,p)) == zero ) go to 3502 + nr = nr + 1 + end do + 3502 continue + if ( conda ) then + ! estimate the scaled condition number of a. use the fact that it is + ! the same as the scaled condition number of r. + ! V Is Used As Workspace + call stdlib_qlacpy( 'U', n, n, a, lda, v, ldv ) + ! only the leading nr x nr submatrix of the triangular factor + ! is considered. only if nr=n will this give a reliable error + ! bound. however, even for nr < n, this can be used on an + ! expert level and obtain useful information in the sense of + ! perturbation theory. + do p = 1, nr + rtmp = stdlib_qnrm2( p, v(1,p), 1 ) + call stdlib_qscal( p, one/rtmp, v(1,p), 1 ) + end do + if ( .not. ( lsvec .or. rsvec ) ) then + call stdlib_qpocon( 'U', nr, v, ldv, one, rtmp,work, iwork(n+iwoff), ierr & + ) + else + call stdlib_qpocon( 'U', nr, v, ldv, one, rtmp,work(n+1), iwork(n+iwoff), & + ierr ) + end if + sconda = one / sqrt(rtmp) + ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + ! see the reference [1] for more details. + end if + endif + if ( wntur ) then + n1 = nr + else if ( wntus .or. wntuf) then + n1 = n + else if ( wntua ) then + n1 = m + end if + if ( .not. ( rsvec .or. lsvec ) ) then + ! ....................................................................... + ! Only The Singular Values Are Requested + ! ....................................................................... + if ( rtrans ) then + ! .. compute the singular values of r**t = [a](1:nr,1:n)**t + ! .. set the lower triangle of [a] to [a](1:nr,1:n)**t and + ! the upper triangle of [a] to zero. + do p = 1, min( n, nr ) + do q = p + 1, n + a(q,p) = a(p,q) + if ( q <= nr ) a(p,q) = zero + end do + end do + call stdlib_qgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, work, lwork, info & + ) + else + ! .. compute the singular values of r = [a](1:nr,1:n) + if ( nr > 1 )call stdlib_qlaset( 'L', nr-1,nr-1, zero,zero, a(2,1), lda ) + call stdlib_qgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, work, lwork, info & + ) + end if + else if ( lsvec .and. ( .not. rsvec) ) then + ! ....................................................................... + ! The Singular Values And The Left Singular Vectors Requested + ! ......................................................................."""""""" + if ( rtrans ) then + ! .. apply stdlib_qgesvd to r**t + ! .. copy r**t into [u] and overwrite [u] with the right singular + ! vectors of r + do p = 1, nr + do q = p, n + u(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_qlaset( 'U', nr-1,nr-1, zero,zero, u(1,2), ldu ) + ! .. the left singular vectors not computed, the nr right singular + ! vectors overwrite [u](1:nr,1:nr) as transposed. these + ! will be pre-multiplied by q to build the left singular vectors of a. + call stdlib_qgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, nr + do q = p + 1, nr + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + else + ! Apply Stdlib_Dgesvd To R + ! .. copy r into [u] and overwrite [u] with the left singular vectors + call stdlib_qlacpy( 'U', nr, n, a, lda, u, ldu ) + if ( nr > 1 )call stdlib_qlaset( 'L', nr-1, nr-1, zero, zero, u(2,1), ldu ) + + ! .. the right singular vectors not computed, the nr left singular + ! vectors overwrite [u](1:nr,1:nr) + call stdlib_qgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of + ! r. these will be pre-multiplied by q to build the left singular + ! vectors of a. + end if + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. ( .not.wntuf ) ) then + call stdlib_qlaset('A', m-nr, nr, zero, zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_qlaset( 'A',nr,n1-nr,zero,zero,u(1,nr+1), ldu ) + call stdlib_qlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not.wntuf )call stdlib_qormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& + n+1), lwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_qlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! ....................................................................... + ! The Singular Values And The Right Singular Vectors Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_qgesvd to r**t + ! .. copy r**t into v and overwrite v with the left singular vectors + do p = 1, nr + do q = p, n + v(q,p) = (a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_qlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + ! .. the left singular vectors of r**t overwrite v, the right singular + ! vectors not computed + if ( wntvr .or. ( nr == n ) ) then + call stdlib_qgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, nr + do q = p + 1, nr + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr + 1, n + v(p,q) = v(q,p) + end do + end do + end if + call stdlib_qlapmt( .false., nr, n, v, ldv, iwork ) + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:n,1:nr) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the qr factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_qlaset('G', n, n-nr, zero, zero, v(1,nr+1), ldv) + call stdlib_qgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, n + do q = p + 1, n + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + call stdlib_qlapmt( .false., n, n, v, ldv, iwork ) + end if + else + ! Aply Stdlib_Dgesvd To R + ! Copy R Into V And Overwrite V With The Right Singular Vectors + call stdlib_qlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_qlaset( 'L', nr-1, nr-1, zero, zero, v(2,1), ldv ) + + ! .. the right singular vectors overwrite v, the nr left singular + ! vectors stored in u(1:nr,1:nr) + if ( wntvr .or. ( nr == n ) ) then + call stdlib_qgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_qlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:nr,1:n) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the lq factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_qlaset('G', n-nr, n, zero,zero, v(nr+1,1), ldv) + call stdlib_qgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_qlapmt( .false., n, n, v, ldv, iwork ) + end if + ! .. now [v] contains the transposed matrix of the right singular + ! vectors of a. + end if + else + ! ....................................................................... + ! Full Svd Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_qgesvd to r**t [[this option is left for r + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r**t into [v] and overwrite [v] with the left singular + ! vectors of r**t + do p = 1, nr + do q = p, n + v(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_qlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + ! .. the left singular vectors of r**t overwrite [v], the nr right + ! singular vectors of r**t stored in [u](1:nr,1:nr) as transposed + call stdlib_qgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, work(n+1), & + lwork-n, info ) + ! Assemble V + do p = 1, nr + do q = p + 1, nr + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr+1, n + v(p,q) = v(q,p) + end do + end do + end if + call stdlib_qlapmt( .false., nr, n, v, ldv, iwork ) + do p = 1, nr + do q = p + 1, nr + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_qlaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_qlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_qlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! .. copy r**t into [v] and overwrite [v] with the left singular + ! vectors of r**t + ! [[the optimal ratio n/nr for using qrf instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio*nr > n ) then + do p = 1, nr + do q = p, n + v(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_qlaset('U',nr-1,nr-1, zero,zero, v(1,2),ldv) + + call stdlib_qlaset('A',n,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_qgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, n + do q = p + 1, n + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + call stdlib_qlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + do p = 1, n + do q = p + 1, n + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_qlaset('A',m-n,n,zero,zero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_qlaset('A',n,n1-n,zero,zero,u(1,n+1),ldu) + call stdlib_qlaset('A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) + end if + end if + else + ! .. copy r**t into [u] and overwrite [u] with the right + ! singular vectors of r + do p = 1, nr + do q = p, n + u(q,nr+p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_qlaset('U',nr-1,nr-1,zero,zero,u(1,nr+2),ldu) + + call stdlib_qgeqrf( n, nr, u(1,nr+1), ldu, work(n+1),work(n+nr+1), lwork-& + n-nr, ierr ) + do p = 1, nr + do q = 1, n + v(q,p) = u(p,nr+q) + end do + end do + call stdlib_qlaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv) + call stdlib_qgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, work(n+nr+1)& + ,lwork-n-nr, info ) + call stdlib_qlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_qlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_qlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + call stdlib_qormqr('R','C', n, n, nr, u(1,nr+1), ldu,work(n+1),v,ldv,work(& + n+nr+1),lwork-n-nr,ierr) + call stdlib_qlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_qlaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_qlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_qlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) + end if + end if + end if + end if + else + ! .. apply stdlib_qgesvd to r [[this is the recommended option]] + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r into [v] and overwrite v with the right singular vectors + call stdlib_qlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_qlaset( 'L', nr-1,nr-1, zero,zero, v(2,1), ldv ) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_qgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_qlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_qlaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_qlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_qlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! The Requested Number Of The Left Singular Vectors + ! is then n1 (n or m) + ! [[the optimal ratio n/nr for using lq instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio * nr > n ) then + call stdlib_qlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_qlaset('L', nr-1,nr-1, zero,zero, v(2,1),ldv) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_qlaset('A', n-nr,n, zero,zero, v(nr+1,1),ldv) + call stdlib_qgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_qlapmt( .false., n, n, v, ldv, iwork ) + ! .. now [v] contains the transposed matrix of the right + ! singular vectors of a. the leading n left singular vectors + ! are in [u](1:n,1:n) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_qlaset('A',m-n,n,zero,zero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_qlaset('A',n,n1-n,zero,zero,u(1,n+1),ldu) + call stdlib_qlaset( 'A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) + end if + end if + else + call stdlib_qlacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu ) + if ( nr > 1 )call stdlib_qlaset('L',nr-1,nr-1,zero,zero,u(nr+2,1),ldu) + + call stdlib_qgelqf( nr, n, u(nr+1,1), ldu, work(n+1),work(n+nr+1), lwork-n-& + nr, ierr ) + call stdlib_qlacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv) + if ( nr > 1 )call stdlib_qlaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv) + call stdlib_qgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, work(n+nr+& + 1), lwork-n-nr, info ) + call stdlib_qlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_qlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_qlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + call stdlib_qormlq('R','N',n,n,nr,u(nr+1,1),ldu,work(n+1),v, ldv, work(n+& + nr+1),lwork-n-nr,ierr) + call stdlib_qlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_qlaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_qlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_qlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + end if + end if + ! .. end of the "r**t or r" branch + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not. wntuf )call stdlib_qormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& + n+1), lwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_qlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + ! ... end of the "full svd" branch + end if + ! check whether some singular values are returned as zeros, e.g. + ! due to underflow, and update the numerical rank. + p = nr + do q = p, 1, -1 + if ( s(q) > zero ) go to 4002 + nr = nr - 1 + end do + 4002 continue + ! .. if numerical rank deficiency is detected, the truncated + ! singular values are set to zero. + if ( nr < n ) call stdlib_qlaset( 'G', n-nr,1, zero,zero, s(nr+1), n ) + ! .. undo scaling; this may cause overflow in the largest singular + ! values. + if ( ascaled )call stdlib_qlascl( 'G',0,0, one,sqrt(real(m,KIND=qp)), nr,1, s, n, ierr & + ) + if ( conda ) rwork(1) = sconda + rwork(2) = p - nr + ! .. p-nr is the number of singular values that are computed as + ! exact zeros in stdlib_qgesvd() applied to the (possibly truncated) + ! full row rank triangular (trapezoidal) factor of a. + numrank = nr + return + end subroutine stdlib_qgesvdq + + !> DGESVJ: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + !> DGESVJ can sometimes compute tiny singular values and their singular vectors much + !> more accurately than other SVD routines, see below under Further Details. + + pure subroutine stdlib_qgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n + character, intent(in) :: joba, jobu, jobv + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), v(ldv,*), work(lwork) + real(qp), intent(out) :: sva(n) + ! ===================================================================== + ! Local Parameters + integer(ilp), parameter :: nsweep = 30 + + + ! Local Scalars + real(qp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & + large, mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, & + temp1, theta, thsign, tol + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, goscale, lower, lsvec, noscale, rotok, rsvec, uctol, & + upper + ! Local Arrays + real(qp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sign,sqrt + ! from lapack + ! from lapack + ! Executable Statements + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) + uctol = stdlib_lsame( jobu, 'C' ) + rsvec = stdlib_lsame( jobv, 'V' ) + applv = stdlib_lsame( jobv, 'A' ) + upper = stdlib_lsame( joba, 'U' ) + lower = stdlib_lsame( joba, 'L' ) + if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then + info = -1 + else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -2 + else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -5 + else if( ldaj}|a(:,i)^t * a(:,j)|/(||a(:,i)||*||a(:,j)||) < ctol*eps + ! where eps is the round-off and ctol is defined as follows: + if( uctol ) then + ! ... user controlled + ctol = work( 1 ) + else + ! ... default + if( lsvec .or. rsvec .or. applv ) then + ctol = sqrt( real( m,KIND=qp) ) + else + ctol = real( m,KIND=qp) + end if + end if + ! ... and the machine dependent parameters are + ! [!] (make sure that stdlib_qlamch() works properly on the target machine.) + epsln = stdlib_qlamch( 'EPSILON' ) + rooteps = sqrt( epsln ) + sfmin = stdlib_qlamch( 'SAFEMINIMUM' ) + rootsfmin = sqrt( sfmin ) + small = sfmin / epsln + big = stdlib_qlamch( 'OVERFLOW' ) + ! big = one / sfmin + rootbig = one / rootsfmin + large = big / sqrt( real( m*n,KIND=qp) ) + bigtheta = one / rooteps + tol = ctol*epsln + roottol = sqrt( tol ) + if( real( m,KIND=qp)*epsln>=one ) then + info = -4 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + ! initialize the right singular vector matrix. + if( rsvec ) then + mvl = n + call stdlib_qlaset( 'A', mvl, n, zero, one, v, ldv ) + else if( applv ) then + mvl = mv + end if + rsvec = rsvec .or. applv + ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) + ! (!) if necessary, scale a to protect the largest singular value + ! from overflow. it is possible that saving the largest singular + ! value destroys the information about the small ones. + ! this initial scaling is almost minimal in the sense that the + ! goal is to make sure that no column norm overflows, and that + ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries + ! in a are detected, the procedure returns with info=-6. + skl= one / sqrt( real( m,KIND=qp)*real( n,KIND=qp) ) + noscale = .true. + goscale = .true. + if( lower ) then + ! the input matrix is m-by-n lower triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_qlassq( m-p+1, a( p, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else if( upper ) then + ! the input matrix is m-by-n upper triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_qlassq( p, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else + ! the input matrix is m-by-n general dense + do p = 1, n + aapp = zero + aaqq = one + call stdlib_qlassq( m, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'DGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + end if + if( noscale )skl= one + ! move the smaller part of the spectrum from the underflow threshold + ! (!) start by determining the position of the nonzero entries of the + ! array sva() relative to ( sfmin, big ). + aapp = zero + aaqq = big + do p = 1, n + if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) + aapp = max( aapp, sva( p ) ) + end do + ! #:) quick return for zero matrix + if( aapp==zero ) then + if( lsvec )call stdlib_qlaset( 'G', m, n, zero, one, a, lda ) + work( 1 ) = one + work( 2 ) = zero + work( 3 ) = zero + work( 4 ) = zero + work( 5 ) = zero + work( 6 ) = zero + return + end if + ! #:) quick return for one-column matrix + if( n==1 ) then + if( lsvec )call stdlib_qlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,a( 1, 1 ), lda, ierr ) + + work( 1 ) = one / skl + if( sva( 1 )>=sfmin ) then + work( 2 ) = one + else + work( 2 ) = zero + end if + work( 3 ) = zero + work( 4 ) = zero + work( 5 ) = zero + work( 6 ) = zero + return + end if + ! protect small singular values from underflow, and try to + ! avoid underflows/overflows in computing jacobi rotations. + sn = sqrt( sfmin / epsln ) + temp1 = sqrt( big / real( n,KIND=qp) ) + if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & + then + temp1 = min( big, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=qp) ) ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = max( sn / aaqq, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=qp) )*aapp ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else + temp1 = one + end if + ! scale, if necessary + if( temp1/=one ) then + call stdlib_qlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr ) + end if + skl= temp1*skl + if( skl/=one ) then + call stdlib_qlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr ) + skl= one / skl + end if + ! row-cyclic jacobi svd algorithm with column pivoting + emptsw = ( n*( n-1 ) ) / 2 + notrot = 0 + fastr( 1 ) = zero + ! a is represented in factored form a = a * diag(work), where diag(work) + ! is initialized to identity. work is updated during fast scaled + ! rotations. + do q = 1, n + work( q ) = one + end do + swband = 3 + ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective + ! if stdlib_qgesvj is used as a computational routine in the preconditioned + ! jacobi svd algorithm stdlib_qgesvj. for sweeps i=1:swband the procedure + ! works on pivots inside a band-like region around the diagonal. + ! the boundaries are determined dynamically, based on the number of + ! pivots above a threshold. + kbl = min( 8, n ) + ! [tp] kbl is a tuning parameter that defines the tile size in the + ! tiling of the p-q loops of pivot pairs. in general, an optimal + ! value of kbl depends on the matrix dimensions and on the + ! parameters of the computer's memory. + nbl = n / kbl + if( ( nbl*kbl )/=n )nbl = nbl + 1 + blskip = kbl**2 + ! [tp] blkskip is a tuning parameter that depends on swband and kbl. + rowskip = min( 5, kbl ) + ! [tp] rowskip is a tuning parameter. + lkahead = 1 + ! [tp] lkahead is a tuning parameter. + ! quasi block transformations, using the lower (upper) triangular + ! structure of the input matrix. the quasi-block-cycling usually + ! invokes cubic convergence. big part of this cycle is done inside + ! canonical subspaces of dimensions less than m. + if( ( lower .or. upper ) .and. ( n>max( 64, 4*kbl ) ) ) then + ! [tp] the number of partition levels and the actual partition are + ! tuning parameters. + n4 = n / 4 + n2 = n / 2 + n34 = 3*n4 + if( applv ) then + q = 0 + else + q = 1 + end if + if( lower ) then + ! this works very well on lower triangular matrices, in particular + ! in the framework of the preconditioned jacobi svd (xgejsv). + ! the idea is simple: + ! [+ 0 0 0] note that jacobi transformations of [0 0] + ! [+ + 0 0] [0 0] + ! [+ + x 0] actually work on [x 0] [x 0] + ! [+ + x x] [x x]. [x x] + call stdlib_qgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,work( n34+1 ), & + sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2, work( n+1 ), & + lwork-n, ierr ) + call stdlib_qgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,work( n2+1 ), sva( & + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,work( n+1 ), lwork-n, & + ierr ) + call stdlib_qgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,work( n2+1 ), sva(& + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, & + ierr ) + call stdlib_qgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,work( n4+1 ), sva( & + n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, & + ierr ) + call stdlib_qgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & + tol, 1, work( n+1 ), lwork-n,ierr ) + call stdlib_qgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, sfmin,& + tol, 1, work( n+1 ),lwork-n, ierr ) + else if( upper ) then + call stdlib_qgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & + tol, 2, work( n+1 ), lwork-n,ierr ) + call stdlib_qgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, work( n4+1 ),sva( n4+1 ), & + mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1, work( n+1 ), lwork-n,ierr ) + + call stdlib_qgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, work( n+1 ),lwork-n, ierr ) + call stdlib_qgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,work( n2+1 ), sva( n2+1 ),& + mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, ierr ) + + end if + end if + ! .. row-cyclic pivot strategy with de rijk's pivoting .. + loop_1993: do i = 1, nsweep + ! .. go go go ... + mxaapq = zero + mxsinj = zero + iswrot = 0 + notrot = 0 + pskipped = 0 + ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs + ! 1 <= p < q <= n. this is the first step toward a blocked implementation + ! of the rotations. new implementation, based on block transformations, + ! is under development. + loop_2000: do ibr = 1, nbl + igl = ( ibr-1 )*kbl + 1 + loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) + igl = igl + ir1*kbl + loop_2001: do p = igl, min( igl+kbl-1, n-1 ) + ! .. de rijk's pivoting + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + call stdlib_qswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_qswap( mvl, v( 1, p ), 1,v( 1, q ), 1 ) + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = work( p ) + work( p ) = work( q ) + work( q ) = temp1 + end if + if( ir1==0 ) then + ! column norms are periodically updated by explicit + ! norm computation. + ! caveat: + ! unfortunately, some blas implementations compute stdlib_qnrm2(m,a(1,p),1) + ! as sqrt(stdlib_qdot(m,a(1,p),1,a(1,p),1)), which may cause the result to + ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_qnrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_qnrm2 is available, the if-then-else + ! below should read "aapp = stdlib_qnrm2( m, a(1,p), 1 ) * work(p)". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_qnrm2( m, a( 1, p ), 1 )*work( p ) + else + temp1 = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp )*work( p ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_qcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp,work( p ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_qdot( m, work( n+1 ), 1,a( 1, q ), 1 )*work( & + q ) / aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_qcopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq,work( q ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_qdot( m, work( n+1 ), 1,a( 1, p ), 1 )*work( & + p ) / aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs(aqoap-apoaq)/aapq + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*work( p ) / work( q ) + fastr( 4 ) = -t*work( q ) /work( p ) + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = work( p ) / work( q ) + aqoap = work( q ) / work( p ) + if( work( p )>=one ) then + if( work( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + work( p ) = work( p )*cs + work( q ) = work( q )*cs + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + end if + else + if( work( q )>=one ) then + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_qaxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + else + if( work( p )>=work( q ) )then + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_qaxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_qcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, one, m,1, work( n+1 ), & + lda,ierr ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + temp1 = -aapq*work( p ) / work( q ) + call stdlib_qaxpy( m, temp1, work( n+1 ), 1,a( 1, q ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qnrm2( m, a( 1, q ), 1 )*work( q ) + + else + t = zero + aaqq = one + call stdlib_qlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*work( q ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qnrm2( m, a( 1, p ), 1 )*work( p ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*work( p ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_qcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp,work( p ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_qdot( m, work( n+1 ), 1,a( 1, q ), 1 )*work( & + q ) / aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_qcopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq,work( q ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_qdot( m, work( n+1 ), 1,a( 1, p ), 1 )*work( & + p ) / aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs(aqoap-apoaq)/aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*work( p ) / work( q ) + fastr( 4 ) = -t*work( q ) /work( p ) + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = work( p ) / work( q ) + aqoap = work( q ) / work( p ) + if( work( p )>=one ) then + if( work( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + work( p ) = work( p )*cs + work( q ) = work( q )*cs + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_qaxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + end if + else + if( work( q )>=one ) then + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_qaxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_qaxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + else + if( work( p )>=work( q ) )then + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_qaxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_qcopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, aapp, one,m, 1, work( n+1 & + ), lda,ierr ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*work( p ) / work( q ) + call stdlib_qaxpy( m, temp1, work( n+1 ),1, a( 1, q ), 1 & + ) + call stdlib_qlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_qcopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, aaqq, one,m, 1, work( n+1 & + ), lda,ierr ) + call stdlib_qlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*work( q ) / work( p ) + call stdlib_qaxpy( m, temp1, work( n+1 ),1, a( 1, p ), 1 & + ) + call stdlib_qlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qnrm2( m, a( 1, q ), 1 )*work( q ) + + else + t = zero + aaqq = one + call stdlib_qlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*work( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qnrm2( m, a( 1, p ), 1 )*work( p ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*work( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_qnrm2( m, a( 1, n ), 1 )*work( n ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*work( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the singular values and find how many are above + ! the underflow threshold. + n2 = 0 + n4 = 0 + do p = 1, n - 1 + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = work( p ) + work( p ) = work( q ) + work( q ) = temp1 + call stdlib_qswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_qswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + if( sva( p )/=zero ) then + n4 = n4 + 1 + if( sva( p )*skl>sfmin )n2 = n2 + 1 + end if + end do + if( sva( n )/=zero ) then + n4 = n4 + 1 + if( sva( n )*skl>sfmin )n2 = n2 + 1 + end if + ! normalize the left singular vectors. + if( lsvec .or. uctol ) then + do p = 1, n2 + call stdlib_qscal( m, work( p ) / sva( p ), a( 1, p ), 1 ) + end do + end if + ! scale the product of jacobi rotations (assemble the fast rotations). + if( rsvec ) then + if( applv ) then + do p = 1, n + call stdlib_qscal( mvl, work( p ), v( 1, p ), 1 ) + end do + else + do p = 1, n + temp1 = one / stdlib_qnrm2( mvl, v( 1, p ), 1 ) + call stdlib_qscal( mvl, temp1, v( 1, p ), 1 ) + end do + end if + end if + ! undo scaling, if necessary (and possible). + if( ( ( skl>one ) .and. ( sva( 1 )<( big / skl) ) ).or. ( ( skl( sfmin / skl) ) ) ) then + do p = 1, n + sva( p ) = skl*sva( p ) + end do + skl= one + end if + work( 1 ) = skl + ! the singular values of a are skl*sva(1:n). if skl/=one + ! then some of the singular values may overflow or underflow and + ! the spectrum is given in this factored representation. + work( 2 ) = real( n4,KIND=qp) + ! n4 is the number of computed nonzero singular values of a. + work( 3 ) = real( n2,KIND=qp) + ! n2 is the number of singular values of a greater than sfmin. + ! if n2 DGESVX: uses the LU factorization to compute the solution to a real + !> system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_qgesvx( fact, trans, n, nrhs, a, lda, af, ldaf, ipiv,equed, r, c, b, ldb, & + x, ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*), c(*), r(*) + real(qp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j + real(qp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -12 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + rpvgrw = stdlib_qlantr( 'M', 'U', 'N', info, info, af, ldaf,work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_qlange( 'M', n, info, a, lda, work ) / rpvgrw + end if + work( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_qlange( norm, n, n, a, lda, work ) + rpvgrw = stdlib_qlantr( 'M', 'U', 'N', n, n, af, ldaf, work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_qlange( 'M', n, n, a, lda, work ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_qgecon( norm, n, af, ldaf, anorm, rcond, work, iwork, info ) + ! compute the solution matrix x. + call stdlib_qlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_qgetrs( trans, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_qgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + work( 1 ) = rpvgrw + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DGETC2: computes an LU factorization with complete pivoting of the + !> n-by-n matrix A. The factorization has the form A = P * L * U * Q, + !> where P and Q are permutation matrices, L is lower triangular with + !> unit diagonal elements and U is upper triangular. + !> This is the Level 2 BLAS algorithm. + + pure subroutine stdlib_qgetc2( n, a, lda, ipiv, jpiv, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*), jpiv(*) + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ip, ipv, j, jp, jpv + real(qp) :: bignum, eps, smin, smlnum, xmax + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! handle the case n=1 by itself + if( n==1 ) then + ipiv( 1 ) = 1 + jpiv( 1 ) = 1 + if( abs( a( 1, 1 ) )=xmax ) then + xmax = abs( a( ip, jp ) ) + ipv = ip + jpv = jp + end if + end do + end do + if( i==1 )smin = max( eps*xmax, smlnum ) + ! swap rows + if( ipv/=i )call stdlib_qswap( n, a( ipv, 1 ), lda, a( i, 1 ), lda ) + ipiv( i ) = ipv + ! swap columns + if( jpv/=i )call stdlib_qswap( n, a( 1, jpv ), 1, a( 1, i ), 1 ) + jpiv( i ) = jpv + ! check for singularity + if( abs( a( i, i ) ) DGETF2: computes an LU factorization of a general m-by-n matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_qgetf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: sfmin + integer(ilp) :: i, j, jp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_qscal( m-j, one / a( j, j ), a( j+1, j ), 1 ) + else + do i = 1, m-j + a( j+i, j ) = a( j+i, j ) / a( j, j ) + end do + end if + end if + else if( info==0 ) then + info = j + end if + if( j DGETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_qgetrf( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_qgetrf2( m, n, a, lda, ipiv, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks and test for exact + ! singularity. + call stdlib_qgetrf2( m-j+1, jb, a( j, j ), lda, ipiv( j ), iinfo ) + ! adjust info and the pivot indices. + if( info==0 .and. iinfo>0 )info = iinfo + j - 1 + do i = j, min( m, j+jb-1 ) + ipiv( i ) = j - 1 + ipiv( i ) + end do + ! apply interchanges to columns 1:j-1. + call stdlib_qlaswp( j-1, a, lda, j, j+jb-1, ipiv, 1 ) + if( j+jb<=n ) then + ! apply interchanges to columns j+jb:n. + call stdlib_qlaswp( n-j-jb+1, a( 1, j+jb ), lda, j, j+jb-1,ipiv, 1 ) + ! compute block row of u. + call stdlib_qtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, & + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_qgetrf + + !> DGETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + + pure recursive subroutine stdlib_qgetrf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: sfmin, temp + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_qscal( m-1, one / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 1, m-1 + a( 1+i, 1 ) = a( 1+i, 1 ) / a( 1, 1 ) + end do + end if + else + info = 1 + end if + else + ! use recursive code + n1 = min( m, n ) / 2 + n2 = n-n1 + ! [ a11 ] + ! factor [ --- ] + ! [ a21 ] + call stdlib_qgetrf2( m, n1, a, lda, ipiv, iinfo ) + if ( info==0 .and. iinfo>0 )info = iinfo + ! [ a12 ] + ! apply interchanges to [ --- ] + ! [ a22 ] + call stdlib_qlaswp( n2, a( 1, n1+1 ), lda, 1, n1, ipiv, 1 ) + ! solve a12 + call stdlib_qtrsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1, n1+1 ), lda ) + + ! update a22 + call stdlib_qgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, one, a( n1+1, n1+1 ), lda ) + ! factor a22 + call stdlib_qgetrf2( m-n1, n2, a( n1+1, n1+1 ), lda, ipiv( n1+1 ),iinfo ) + ! adjust info and the pivot indices + if ( info==0 .and. iinfo>0 )info = iinfo + n1 + do i = n1+1, min( m, n ) + ipiv( i ) = ipiv( i ) + n1 + end do + ! apply interchanges to a21 + call stdlib_qlaswp( n1, a( 1, 1 ), lda, n1+1, min( m, n), ipiv, 1 ) + end if + return + end subroutine stdlib_qgetrf2 + + !> DGETRI: computes the inverse of a matrix using the LU factorization + !> computed by DGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + + pure subroutine stdlib_qgetri( n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iws, j, jb, jj, jp, ldwork, lwkopt, nb, nbmin, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'DGETRI', ' ', n, -1, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( lda 0 from stdlib_qtrtri, then u is singular, + ! and the inverse is not computed. + call stdlib_qtrtri( 'UPPER', 'NON-UNIT', n, a, lda, info ) + if( info>0 )return + nbmin = 2 + ldwork = n + if( nb>1 .and. nb=n ) then + ! use unblocked code. + do j = n, 1, -1 + ! copy current column of l to work and replace with zeros. + do i = j + 1, n + work( i ) = a( i, j ) + a( i, j ) = zero + end do + ! compute current column of inv(a). + if( j DGETRS: solves a system of linear equations + !> A * X = B or A**T * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by DGETRF. + + pure subroutine stdlib_qgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DGETSLS: solves overdetermined or underdetermined real linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'T' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_qgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, tran + integer(ilp) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & + lw2, wsizeo, wsizem, info2 + real(qp) :: anrm, bignum, bnrm, smlnum, tq(5), workq(1) + ! Intrinsic Functions + intrinsic :: real,max,min,int + ! Executable Statements + ! test the input arguments. + info = 0 + maxmn = max( m, n ) + tran = stdlib_lsame( trans, 'T' ) + lquery = ( lwork==-1 .or. lwork==-2 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + call stdlib_qgeqr( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_qgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_qgeqr( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_qgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + else + call stdlib_qgelq( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_qgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_qgelq( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_qgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + end if + if( ( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_qlaset( 'F', maxmn, nrhs, zero, zero, b, ldb ) + go to 50 + end if + brow = m + if ( tran ) then + brow = n + end if + bnrm = stdlib_qlange( 'M', brow, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_qlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if ( m>=n ) then + ! compute qr factorization of a + call stdlib_qgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + if ( .not.tran ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_qgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& + 1 ), lw2,info ) + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_qtrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = n + else + ! overdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_qtrtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = zero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_qgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_qgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + ! workspace at least m, optimally m*nb. + if( .not.tran ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_qtrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = zero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_qgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_qgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_qtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_qlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_qlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_qlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( tszo + lwo,KIND=qp) + return + end subroutine stdlib_qgetsls + + !> DGETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a real M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in DGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of DGEQRT for more details on the format. + + pure subroutine stdlib_qgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1 + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, & + num_all_row_blocks + ! Intrinsic Functions + intrinsic :: ceiling,real,max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m DGGBAK: forms the right or left eigenvectors of a real generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> DGGBAL. + + pure subroutine stdlib_qggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(in) :: lscale(*), rscale(*) + real(qp), intent(inout) :: v(ldv,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, k + ! Intrinsic Functions + intrinsic :: max,int + ! Executable Statements + ! test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( n==0 .and. ihi==0 .and. ilo/=1 ) then + info = -4 + else if( n>0 .and. ( ihimax( 1, n ) ) )then + info = -5 + else if( n==0 .and. ilo==1 .and. ihi/=0 ) then + info = -5 + else if( m<0 ) then + info = -8 + else if( ldv DGGBAL: balances a pair of general real matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + + pure subroutine stdlib_qggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: lscale(*), rscale(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sclfac = 1.0e+1_qp + + + ! Local Scalars + integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, & + lrab, lsfmax, lsfmin, m, nr, nrp2 + real(qp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, & + pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc + ! Intrinsic Functions + intrinsic :: abs,real,int,log10,max,min,sign + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldacmax )cmax = abs( cor ) + lscale( i ) = lscale( i ) + cor + cor = alpha*work( i ) + if( abs( cor )>cmax )cmax = abs( cor ) + rscale( i ) = rscale( i ) + cor + end do + if( cmax DGGES: computes for a pair of N-by-N real nonsymmetric matrices (A,B), + !> the generalized eigenvalues, the generalized real Schur form (S,T), + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T.The + !> leading columns of VSL and VSR then form an orthonormal basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> DGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_qgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alphar, & + alphai, beta, vsl, ldvsl, vsr,ldvsr, work, lwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), & + work(*) + ! Function Arguments + procedure(stdlib_selctg_q) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, & + wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, ip, iright, irows, & + itau, iwrk, maxwrk, minwrk + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, safmax, safmin, & + smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( lda0 )then + minwrk = max( 8*n, 6*n + 16 ) + maxwrk = minwrk - n +n*stdlib_ilaenv( 1, 'DGEQRF', ' ', n, 1, n, 0 ) + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORMQR', ' ', n, 1, n, -1 & + ) ) + if( ilvsl ) then + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORGQR', ' ', n, 1, n, & + -1 ) ) + end if + else + minwrk = 1 + maxwrk = 1 + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_qlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need 6*n + 2*n space for storing balancing factors) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_qggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_qgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_qormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_qlaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_qlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_qorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_qlaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_qgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + ! perform qz algorithm, computing schur vectors if desired + ! (workspace: need n) + iwrk = itau + call stdlib_qhgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 50 + end if + ! sort eigenvalues alpha/beta if desired + ! (workspace: need 4*n+16 ) + sdim = 0 + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + call stdlib_qtgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alphar,alphai, beta, & + vsl, ldvsl, vsr, ldvsr, sdim, pvsl,pvsr, dif, work( iwrk ), lwork-iwrk+1, idum, 1,& + ierr ) + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_qggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_qggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) ) then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_qlascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_qlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 50 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_qgges + + !> DGGES3: computes for a pair of N-by-N real nonsymmetric matrices (A,B), + !> the generalized eigenvalues, the generalized real Schur form (S,T), + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T.The + !> leading columns of VSL and VSR then form an orthonormal basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> DGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_qgges3( jobvsl, jobvsr, sort, selctg, n, a, lda, b,ldb, sdim, alphar, & + alphai, beta, vsl, ldvsl,vsr, ldvsr, work, lwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), & + work(*) + ! Function Arguments + procedure(stdlib_selctg_q) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, & + wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, ip, iright, irows, & + itau, iwrk, lwkopt + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, safmax, safmin, & + smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_qlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_qggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_qgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_qormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + if( ilvsl ) then + call stdlib_qlaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_qlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_qorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_qlaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + call stdlib_qgghd3( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + work( iwrk ), lwork+1-iwrk,ierr ) + ! perform qz algorithm, computing schur vectors if desired + iwrk = itau + call stdlib_qlaqz0( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 50 + end if + ! sort eigenvalues alpha/beta if desired + sdim = 0 + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + call stdlib_qtgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alphar,alphai, beta, & + vsl, ldvsl, vsr, ldvsr, sdim, pvsl,pvsr, dif, work( iwrk ), lwork-iwrk+1, idum, 1,& + ierr ) + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + if( ilvsl )call stdlib_qggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_qggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) ) then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_qlascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_qlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 50 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_qgges3 + + !> DGGESX: computes for a pair of N-by-N real nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the real Schur form (S,T), and, + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T; computes + !> a reciprocal condition number for the average of the selected + !> eigenvalues (RCONDE); and computes a reciprocal condition number for + !> the right and left deflating subspaces corresponding to the selected + !> eigenvalues (RCONDV). The leading columns of VSL and VSR then form + !> an orthonormal basis for the corresponding left and right eigenspaces + !> (deflating subspaces). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or for both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_qggesx( jobvsl, jobvsr, sort, selctg, sense, n, a, lda,b, ldb, sdim, & + alphar, alphai, beta, vsl, ldvsl,vsr, ldvsr, rconde, rcondv, work, lwork, iwork,liwork, & + bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, liwork, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), rconde(2), rcondv(2), vsl(& + ldvsl,*), vsr(ldvsr,*), work(*) + ! Function Arguments + procedure(stdlib_selctg_q) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, wantsb, & + wantse, wantsn, wantst, wantsv + integer(ilp) :: i, icols, ierr, ihi, ijob, ijobvl, ijobvr, ileft, ilo, ip, iright, & + irows, itau, iwrk, liwmin, lwrk, maxwrk, minwrk + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pl, pr, safmax, safmin, & + smlnum + ! Local Arrays + real(qp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( wantsn ) then + ijob = 0 + else if( wantse ) then + ijob = 1 + else if( wantsv ) then + ijob = 2 + else if( wantsb ) then + ijob = 4 + end if + ! test the input arguments + info = 0 + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda0) then + minwrk = max( 8*n, 6*n + 16 ) + maxwrk = minwrk - n +n*stdlib_ilaenv( 1, 'DGEQRF', ' ', n, 1, n, 0 ) + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORMQR', ' ', n, 1, n, -1 & + ) ) + if( ilvsl ) then + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'DORGQR', ' ', n, 1, n, & + -1 ) ) + end if + lwrk = maxwrk + if( ijob>=1 )lwrk = max( lwrk, n*n/2 ) + else + minwrk = 1 + maxwrk = 1 + lwrk = 1 + end if + work( 1 ) = lwrk + if( wantsn .or. n==0 ) then + liwmin = 1 + else + liwmin = n + 6 + end if + iwork( 1 ) = liwmin + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_qlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need 6*n + 2*n for permutation parameters) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_qggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_qgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_qormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_qlaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_qlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_qorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_qlaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_qgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (workspace: need n) + iwrk = itau + call stdlib_qhgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 60 + end if + ! sort eigenvalues alpha/beta and compute the reciprocal of + ! condition number(s) + ! (workspace: if ijob >= 1, need max( 8*(n+1), 2*sdim*(n-sdim) ) + ! otherwise, need 8*(n+1) ) + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + ! reorder eigenvalues, transform generalized schur vectors, and + ! compute reciprocal condition numbers + call stdlib_qtgsen( ijob, ilvsl, ilvsr, bwork, n, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,sdim, pl, pr, dif, work( iwrk ), lwork-iwrk+1,iwork, & + liwork, ierr ) + if( ijob>=1 )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( ierr==-22 ) then + ! not enough real workspace + info = -22 + else + if( ijob==1 .or. ijob==4 ) then + rconde( 1 ) = pl + rconde( 2 ) = pr + end if + if( ijob==2 .or. ijob==4 ) then + rcondv( 1 ) = dif( 1 ) + rcondv( 2 ) = dif( 2 ) + end if + if( ierr==1 )info = n + 3 + end if + end if + ! apply permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_qggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_qggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) ) then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_qlascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_qlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 60 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwmin + return + end subroutine stdlib_qggesx + + !> DGGEV: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B . + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_qggev( jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai,beta, vl, ldvl, vr, & + ldvr, work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, itau, & + iwrk, jc, jr, maxwrk, minwrk + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_qlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ! (workspace: need 6*n) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_qggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = iwrk + iwrk = itau + irows + call stdlib_qgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_qormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_qlaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_qlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_qorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_qlaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_qgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_qgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (workspace: need n) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_qhgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 110 + end if + ! compute eigenvectors + ! (workspace: need 6*n) + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_qtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 110 + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_qggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, vl, & + ldvl, ierr ) + loop_50: do jc = 1, n + if( alphai( jc ) DGGEV3: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B . + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_qggev3( jobvl, jobvr, n, a, lda, b, ldb, alphar,alphai, beta, vl, ldvl, vr,& + ldvr, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, itau, & + iwrk, jc, jr, lwkopt + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_qlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_qggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = iwrk + iwrk = itau + irows + call stdlib_qgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_qormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + if( ilvl ) then + call stdlib_qlaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_qlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_qorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_qlaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_qgghd3( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + work( iwrk ), lwork+1-iwrk, ierr ) + else + call stdlib_qgghd3( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_qlaqz0( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 110 + end if + ! compute eigenvectors + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_qtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 110 + end if + ! undo balancing on vl and vr and normalization + if( ilvl ) then + call stdlib_qggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, vl, & + ldvl, ierr ) + loop_50: do jc = 1, n + if( alphai( jc ) DGGEVX: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for + !> the eigenvalues (RCONDE), and reciprocal condition numbers for the + !> right eigenvectors (RCONDV). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j) . + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B. + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_qggevx( balanc, jobvl, jobvr, sense, n, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr, ilo,ihi, lscale, rscale, abnrm, bbnrm, rconde,rcondv, work, lwork, & + iwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + real(qp), intent(out) :: abnrm, bbnrm + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), lscale(*), rconde(*), rcondv(*)& + , rscale(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery, noscl, pair, wantsb, wantse, & + wantsn, wantsv + character :: chtemp + integer(ilp) :: i, icols, ierr, ijobvl, ijobvr, in, irows, itau, iwrk, iwrk1, j, jc, & + jr, m, maxwrk, minwrk, mm + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + noscl = stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'P' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc,'S' ) .or. & + stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) )then + info = -1 + else if( ijobvl<=0 ) then + info = -2 + else if( ijobvr<=0 ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsb .or. wantsv ) )then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_qlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_qlange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_qlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute and/or balance the matrix pair (a,b) + ! (workspace: need 6*n if balanc = 's' or 'b', 1 otherwise) + call stdlib_qggbal( balanc, n, a, lda, b, ldb, ilo, ihi, lscale, rscale,work, ierr ) + + ! compute abnrm and bbnrm + abnrm = stdlib_qlange( '1', n, n, a, lda, work( 1 ) ) + if( ilascl ) then + work( 1 ) = abnrm + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, 1, 1, work( 1 ), 1,ierr ) + abnrm = work( 1 ) + end if + bbnrm = stdlib_qlange( '1', n, n, b, ldb, work( 1 ) ) + if( ilbscl ) then + work( 1 ) = bbnrm + call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, 1, 1, work( 1 ), 1,ierr ) + bbnrm = work( 1 ) + end if + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb ) + irows = ihi + 1 - ilo + if( ilv .or. .not.wantsn ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_qgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to a + ! (workspace: need n, prefer n*nb) + call stdlib_qormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl and/or vr + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_qlaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_qlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_qorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + if( ilvr )call stdlib_qlaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv .or. .not.wantsn ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_qgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_qgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (workspace: need n) + if( ilv .or. .not.wantsn ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_qhgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr, work,lwork, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 130 + end if + ! compute eigenvectors and estimate condition numbers if desired + ! (workspace: stdlib_qtgevc: need 6*n + ! stdlib_qtgsna: need 2*n*(n+2)+16 if sense = 'v' or 'b', + ! need n otherwise ) + if( ilv .or. .not.wantsn ) then + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_qtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, n,& + in, work, ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 130 + end if + end if + if( .not.wantsn ) then + ! compute eigenvectors (stdlib_qtgevc) and estimate condition + ! numbers (stdlib_qtgsna). note that the definition of the condition + ! number is not invariant under transformation (u,v) to + ! (q*u, z*v), where (u,v) are eigenvectors of the generalized + ! schur form (s,t), q and z are orthogonal matrices. in order + ! to avoid using extra 2*n*n workspace, we have to recalculate + ! eigenvectors and estimate one condition numbers at a time. + pair = .false. + loop_20: do i = 1, n + if( pair ) then + pair = .false. + cycle loop_20 + end if + mm = 1 + if( i DGGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + + pure subroutine stdlib_qggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), d(*) + real(qp), intent(out) :: work(*), x(*), y(*) + ! =================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, lopt, lwkmin, lwkopt, nb, nb1, nb2, nb3, nb4, np + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + np = min( n, p ) + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -2 + else if( p<0 .or. pm ) then + call stdlib_qtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', n-m, 1,b( m+1, m+p-n+1 ), & + ldb, d( m+1 ), n-m, info ) + if( info>0 ) then + info = 1 + return + end if + call stdlib_qcopy( n-m, d( m+1 ), 1, y( m+p-n+1 ), 1 ) + end if + ! set y1 = 0 + do i = 1, m + p - n + y( i ) = zero + end do + ! update d1 = d1 - t12*y2 + call stdlib_qgemv( 'NO TRANSPOSE', m, n-m, -one, b( 1, m+p-n+1 ), ldb,y( m+p-n+1 ), 1, & + one, d, 1 ) + ! solve triangular system: r11*x = d1 + if( m>0 ) then + call stdlib_qtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', m, 1, a, lda,d, m, info ) + + if( info>0 ) then + info = 2 + return + end if + ! copy d to x + call stdlib_qcopy( m, d, 1, x, 1 ) + end if + ! backward transformation y = z**t *y + call stdlib_qormrq( 'LEFT', 'TRANSPOSE', p, 1, np,b( max( 1, n-p+1 ), 1 ), ldb, work( & + m+1 ), y,max( 1, p ), work( m+np+1 ), lwork-m-np, info ) + work( 1 ) = m + np + max( lopt, int( work( m+np+1 ),KIND=ilp) ) + return + end subroutine stdlib_qggglm + + !> DGGHD3: reduces a pair of real matrices (A,B) to generalized upper + !> Hessenberg form using orthogonal transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the orthogonal matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**T*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**T*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**T*x. + !> The orthogonal matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T + !> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T + !> If Q1 is the orthogonal matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then DGGHD3 reduces the original + !> problem to generalized Hessenberg form. + !> This is a blocked variant of DGGHRD, using matrix-matrix + !> multiplications for parts of the computation to enhance performance. + + pure subroutine stdlib_qgghd3( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: blk22, initq, initz, lquery, wantq, wantz + character :: compq2, compz2 + integer(ilp) :: cola, i, ierr, j, j0, jcol, jj, jrow, k, kacc22, len, lwkopt, n2nb, nb,& + nblst, nbmin, nh, nnb, nx, ppw, ppwo, pw, top, topq + real(qp) :: c, c1, c2, s, s1, s2, temp, temp1, temp2, temp3 + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 ) + lwkopt = max( 6*n*nb, 1 ) + work( 1 ) = real( lwkopt,KIND=qp) + initq = stdlib_lsame( compq, 'I' ) + wantq = initq .or. stdlib_lsame( compq, 'V' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi1 )call stdlib_qlaset( 'LOWER', n-1, n-1, zero, zero, b(2, 1), ldb ) + ! quick return if possible + nh = ihi - ilo + 1 + if( nh<=1 ) then + work( 1 ) = one + return + end if + ! determine the blocksize. + nbmin = stdlib_ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 ) + if( nb>1 .and. nb=6*n*nbmin ) then + nb = lwork / ( 6*n ) + else + nb = 1 + end if + end if + end if + end if + if( nb=nh ) then + ! use unblocked code below + jcol = ilo + else + ! use blocked code + kacc22 = stdlib_ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 ) + blk22 = kacc22==2 + do jcol = ilo, ihi-2, nb + nnb = min( nb, ihi-jcol-1 ) + ! initialize small orthogonal factors that will hold the + ! accumulated givens rotations in workspace. + ! n2nb denotes the number of 2*nnb-by-2*nnb factors + ! nblst denotes the (possibly smaller) order of the last + ! factor. + n2nb = ( ihi-jcol-1 ) / nnb - 1 + nblst = ihi - jcol - n2nb*nnb + call stdlib_qlaset( 'ALL', nblst, nblst, zero, one, work, nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_qlaset( 'ALL', 2*nnb, 2*nnb, zero, one,work( pw ), 2*nnb ) + pw = pw + 4*nnb*nnb + end do + ! reduce columns jcol:jcol+nnb-1 of a to hessenberg form. + do j = jcol, jcol+nnb-1 + ! reduce jth column of a. store cosines and sines in jth + ! column of a and b, respectively. + do i = ihi, j+2, -1 + temp = a( i-1, j ) + call stdlib_qlartg( temp, a( i, j ), c, s, a( i-1, j ) ) + a( i, j ) = c + b( i, j ) = s + end do + ! accumulate givens rotations into workspace array. + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + c = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + c = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + ! top denotes the number of top rows in a and b that will + ! not be updated during the next steps. + if( jcol<=2 ) then + top = 0 + else + top = jcol + end if + ! propagate transformations through b and replace stored + ! left sines/cosines by right sines/cosines. + do jj = n, j+1, -1 + ! update jjth column of b. + do i = min( jj+1, ihi ), j+2, -1 + c = a( i, j ) + s = b( i, j ) + temp = b( i, jj ) + b( i, jj ) = c*temp - s*b( i-1, jj ) + b( i-1, jj ) = s*temp + c*b( i-1, jj ) + end do + ! annihilate b( jj+1, jj ). + if( jj0 ) then + do i = jj, 1, -1 + call stdlib_qrot( ihi-top, a( top+1, j+i+1 ), 1,a( top+1, j+i ), 1, a( & + j+1+i, j ),-b( j+1+i, j ) ) + end do + end if + ! update (j+1)th column of a by transformations from left. + if ( j < jcol + nnb - 1 ) then + len = 1 + j - jcol + ! multiply with the trailing accumulated orthogonal + ! matrix, which takes the form + ! [ u11 u12 ] + ! u = [ ], + ! [ u21 u22 ] + ! where u21 is a len-by-len matrix and u12 is lower + ! triangular. + jrow = ihi - nblst + 1 + call stdlib_qgemv( 'TRANSPOSE', nblst, len, one, work,nblst, a( jrow, j+1 )& + , 1, zero,work( pw ), 1 ) + ppw = pw + len + do i = jrow, jrow+nblst-len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_qtrmv( 'LOWER', 'TRANSPOSE', 'NON-UNIT',nblst-len, work( & + len*nblst + 1 ), nblst,work( pw+len ), 1 ) + call stdlib_qgemv( 'TRANSPOSE', len, nblst-len, one,work( (len+1)*nblst - & + len + 1 ), nblst,a( jrow+nblst-len, j+1 ), 1, one,work( pw+len ), 1 ) + + ppw = pw + do i = jrow, jrow+nblst-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ! multiply with the other accumulated orthogonal + ! matrices, which take the form + ! [ u11 u12 0 ] + ! [ ] + ! u = [ u21 u22 0 ], + ! [ ] + ! [ 0 0 i ] + ! where i denotes the (nnb-len)-by-(nnb-len) identity + ! matrix, u21 is a len-by-len upper triangular matrix + ! and u12 is an nnb-by-nnb lower triangular matrix. + ppwo = 1 + nblst*nblst + j0 = jrow - nnb + do jrow = j0, jcol+1, -nnb + ppw = pw + len + do i = jrow, jrow+nnb-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + ppw = pw + do i = jrow+nnb, jrow+nnb+len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_qtrmv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', len,work( ppwo + & + nnb ), 2*nnb, work( pw ),1 ) + call stdlib_qtrmv( 'LOWER', 'TRANSPOSE', 'NON-UNIT', nnb,work( ppwo + & + 2*len*nnb ),2*nnb, work( pw + len ), 1 ) + call stdlib_qgemv( 'TRANSPOSE', nnb, len, one,work( ppwo ), 2*nnb, a( & + jrow, j+1 ), 1,one, work( pw ), 1 ) + call stdlib_qgemv( 'TRANSPOSE', len, nnb, one,work( ppwo + 2*len*nnb + & + nnb ), 2*nnb,a( jrow+nnb, j+1 ), 1, one,work( pw+len ), 1 ) + ppw = pw + do i = jrow, jrow+len+nnb-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + ! apply accumulated orthogonal matrices to a. + cola = n - jcol - nnb + 1 + j = ihi - nblst + 1 + call stdlib_qgemm( 'TRANSPOSE', 'NO TRANSPOSE', nblst,cola, nblst, one, work, & + nblst,a( j, jcol+nnb ), lda, zero, work( pw ),nblst ) + call stdlib_qlacpy( 'ALL', nblst, cola, work( pw ), nblst,a( j, jcol+nnb ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of + ! [ u11 u12 ] + ! u = [ ] + ! [ u21 u22 ], + ! where all blocks are nnb-by-nnb, u21 is upper + ! triangular and u12 is lower triangular. + call stdlib_qorm22( 'LEFT', 'TRANSPOSE', 2*nnb, cola, nnb,nnb, work( ppwo )& + , 2*nnb,a( j, jcol+nnb ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_qgemm( 'TRANSPOSE', 'NO TRANSPOSE', 2*nnb,cola, 2*nnb, one, & + work( ppwo ), 2*nnb,a( j, jcol+nnb ), lda, zero, work( pw ),2*nnb ) + call stdlib_qlacpy( 'ALL', 2*nnb, cola, work( pw ), 2*nnb,a( j, jcol+nnb ),& + lda ) + end if + ppwo = ppwo + 4*nnb*nnb + end do + ! apply accumulated orthogonal matrices to q. + if( wantq ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, one, q( & + topq, j ), ldq,work, nblst, zero, work( pw ), nh ) + call stdlib_qlacpy( 'ALL', nh, nblst, work( pw ), nh,q( topq, j ), ldq ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_qorm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,q( topq, j ), ldq, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, one,& + q( topq, j ), ldq,work( ppwo ), 2*nnb, zero, work( pw ),nh ) + call stdlib_qlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,q( topq, j ), ldq ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! accumulate right givens rotations if required. + if ( wantz .or. top>0 ) then + ! initialize small orthogonal factors that will hold the + ! accumulated givens rotations in workspace. + call stdlib_qlaset( 'ALL', nblst, nblst, zero, one, work,nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_qlaset( 'ALL', 2*nnb, 2*nnb, zero, one,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! accumulate givens rotations into workspace array. + do j = jcol, jcol+nnb-1 + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + c = a( i, j ) + a( i, j ) = zero + s = b( i, j ) + b( i, j ) = zero + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + c = a( i, j ) + a( i, j ) = zero + s = b( i, j ) + b( i, j ) = zero + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end do + else + call stdlib_qlaset( 'LOWER', ihi - jcol - 1, nnb, zero, zero,a( jcol + 2, & + jcol ), lda ) + call stdlib_qlaset( 'LOWER', ihi - jcol - 1, nnb, zero, zero,b( jcol + 2, & + jcol ), ldb ) + end if + ! apply accumulated orthogonal matrices to a and b. + if ( top>0 ) then + j = ihi - nblst + 1 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, one, a( & + 1, j ), lda,work, nblst, zero, work( pw ), top ) + call stdlib_qlacpy( 'ALL', top, nblst, work( pw ), top,a( 1, j ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_qorm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,a( 1, j ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + one, a( 1, j ), lda,work( ppwo ), 2*nnb, zero,work( pw ), top ) + call stdlib_qlacpy( 'ALL', top, 2*nnb, work( pw ), top,a( 1, j ), lda ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + j = ihi - nblst + 1 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, one, b( & + 1, j ), ldb,work, nblst, zero, work( pw ), top ) + call stdlib_qlacpy( 'ALL', top, nblst, work( pw ), top,b( 1, j ), ldb ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_qorm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,b( 1, j ), ldb, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + one, b( 1, j ), ldb,work( ppwo ), 2*nnb, zero,work( pw ), top ) + call stdlib_qlacpy( 'ALL', top, 2*nnb, work( pw ), top,b( 1, j ), ldb ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! apply accumulated orthogonal matrices to z. + if( wantz ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, one, z( & + topq, j ), ldz,work, nblst, zero, work( pw ), nh ) + call stdlib_qlacpy( 'ALL', nh, nblst, work( pw ), nh,z( topq, j ), ldz ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_qorm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,z( topq, j ), ldz, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, one,& + z( topq, j ), ldz,work( ppwo ), 2*nnb, zero, work( pw ),nh ) + call stdlib_qlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,z( topq, j ), ldz ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + end if + ! use unblocked code to reduce the rest of the matrix + ! avoid re-initialization of modified q and z. + compq2 = compq + compz2 = compz + if ( jcol/=ilo ) then + if ( wantq )compq2 = 'V' + if ( wantz )compz2 = 'V' + end if + if ( jcol DGGHRD: reduces a pair of real matrices (A,B) to generalized upper + !> Hessenberg form using orthogonal transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the orthogonal matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**T*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**T*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**T*x. + !> The orthogonal matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T + !> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T + !> If Q1 is the orthogonal matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then DGGHRD reduces the original + !> problem to generalized Hessenberg form. + + pure subroutine stdlib_qgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilq, ilz + integer(ilp) :: icompq, icompz, jcol, jrow + real(qp) :: c, s, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode compq + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + ! decode compz + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! test the input parameters. + info = 0 + if( icompq<=0 ) then + info = -1 + else if( icompz<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi DGGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + + pure subroutine stdlib_qgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), c(*), d(*) + real(qp), intent(out) :: work(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkmin, lwkopt, mn, nb, nb1, nb2, nb3, nb4, nr + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( p<0 .or. p>n .or. p0 ) then + call stdlib_qtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', p, 1,b( 1, n-p+1 ), ldb, d,& + p, info ) + if( info>0 ) then + info = 1 + return + end if + ! put the solution in x + call stdlib_qcopy( p, d, 1, x( n-p+1 ), 1 ) + ! update c1 + call stdlib_qgemv( 'NO TRANSPOSE', n-p, p, -one, a( 1, n-p+1 ), lda,d, 1, one, c, 1 & + ) + end if + ! solve r11*x1 = c1 for x1 + if( n>p ) then + call stdlib_qtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n-p, 1,a, lda, c, n-p, & + info ) + if( info>0 ) then + info = 2 + return + end if + ! put the solutions in x + call stdlib_qcopy( n-p, c, 1, x, 1 ) + end if + ! compute the residual vector: + if( m0 )call stdlib_qgemv( 'NO TRANSPOSE', nr, n-m, -one, a( n-p+1, m+1 ),lda, d( & + nr+1 ), 1, one, c( n-p+1 ), 1 ) + else + nr = p + end if + if( nr>0 ) then + call stdlib_qtrmv( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', nr,a( n-p+1, n-p+1 ), lda, & + d, 1 ) + call stdlib_qaxpy( nr, -one, d, 1, c( n-p+1 ), 1 ) + end if + ! backward transformation x = q**t*x + call stdlib_qormrq( 'LEFT', 'TRANSPOSE', n, 1, p, b, ldb, work( 1 ), x,n, work( p+mn+1 & + ), lwork-p-mn, info ) + work( 1 ) = p + mn + max( lopt, int( work( p+mn+1 ),KIND=ilp) ) + return + end subroutine stdlib_qgglse + + !> DGGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal + !> matrix, and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**T*(inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the + !> transpose of the matrix Z. + + pure subroutine stdlib_qggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'DGEQRF', ' ', n, m, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'DGERQF', ' ', n, p, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'DORMQR', ' ', n, m, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( p<0 ) then + info = -3 + else if( lda DGGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**T + !> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the + !> transpose of the matrix Z. + + pure subroutine stdlib_qggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'DGERQF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'DGEQRF', ' ', p, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'DORMRQ', ' ', m, n, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( p<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DGSVJ0: is called from DGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as DGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + + pure subroutine stdlib_qgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep + real(qp), intent(in) :: eps, sfmin, tol + character, intent(in) :: jobv + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*) + real(qp), intent(out) :: work(lwork) + ! ===================================================================== + + ! Local Scalars + real(qp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Local Arrays + real(qp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,real,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( lda sqrt(overflow_threshold), and + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_qnrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_qnrm2 is available, the if-then-else + ! below should read "aapp = stdlib_qnrm2( m, a(1,p), 1 ) * d(p)". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_qnrm2( m, a( 1, p ), 1 )*d( p ) + else + temp1 = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp )*d( p ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_qcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_qdot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_qcopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_qdot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + ! Rotate + ! rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + end if + else + if( d( q )>=one ) then + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_qaxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + else + if( d( p )>=d( q ) ) then + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_qaxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_qcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, one, m,1, work, lda, & + ierr ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_qaxpy( m, temp1, work, 1,a( 1, q ), 1 ) + call stdlib_qlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qnrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_qlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qnrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ........................................................ + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! -#- m x 2 jacobi svd -#- + ! -#- safe gram matrix computation -#- + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_qcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_qdot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_qcopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_qdot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_qaxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + end if + else + if( d( q )>=one ) then + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_qaxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_qaxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + else + if( d( p )>=d( q ) ) then + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_qaxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_qcopy( m, a( 1, p ), 1, work,1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, one,m, 1, work, lda,& + ierr ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_qaxpy( m, temp1, work, 1,a( 1, q ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_qcopy( m, a( 1, q ), 1, work,1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one,m, 1, work, lda,& + ierr ) + call stdlib_qlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*d( q ) / d( p ) + call stdlib_qaxpy( m, temp1, work, 1,a( 1, p ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qnrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_qlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qnrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_qnrm2( m, a( 1, n ), 1 )*d( n ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*d( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:) reaching this point means that the procedure has completed the given + ! number of iterations. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means that during the i-th sweep all pivots were + ! below the given tolerance, causing early exit. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector d. + do p = 1, n - 1 + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = d( p ) + d( p ) = d( q ) + d( q ) = temp1 + call stdlib_qswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_qswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_qgsvj0 + + !> DGSVJ1: is called from DGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as DGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> DGSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + + pure subroutine stdlib_qgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: eps, sfmin, tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep + character, intent(in) :: jobv + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*) + real(qp), intent(out) :: work(lwork) + ! ===================================================================== + + ! Local Scalars + real(qp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, & + mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, & + thsign + integer(ilp) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & + notrot, nblc, nblr, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Local Arrays + real(qp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,real,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( n1<0 ) then + info = -4 + else if( ldazero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! Safe Gram Matrix Computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_qcopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_qdot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_qdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_qcopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_qdot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs(aqoap-apoaq) / aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_qrotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_qrotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_qaxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + end if + else + if( d( q )>=one ) then + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_qaxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_qaxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + else + if( d( p )>=d( q ) ) then + call stdlib_qaxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_qaxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_qaxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_qaxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_qaxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_qaxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_qaxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_qaxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_qcopy( m, a( 1, p ), 1, work,1 ) + call stdlib_qlascl( 'G', 0, 0, aapp, one,m, 1, work, lda,& + ierr ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_qaxpy( m, temp1, work, 1,a( 1, q ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_qcopy( m, a( 1, q ), 1, work,1 ) + call stdlib_qlascl( 'G', 0, 0, aaqq, one,m, 1, work, lda,& + ierr ) + call stdlib_qlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*d( q ) / d( p ) + call stdlib_qaxpy( m, temp1, work, 1,a( 1, p ), 1 ) + + call stdlib_qlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qnrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_qlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qnrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + ! if ( notrot >= emptsw ) go to 2011 + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapp= emptsw ) go to 2011 + end if + end do loop_2100 + ! end of the p-loop + end do loop_2010 + ! end of the jbc-loop + 2011 continue + ! 2011 bailed out of the jbc-loop + do p = igl, min( igl+kbl-1, n ) + sva( p ) = abs( sva( p ) ) + end do + ! ** if ( notrot >= emptsw ) go to 1994 + end do loop_2000 + ! 2000 :: end of the ibr-loop + ! .. update sva(n) + if( ( sva( n )rootsfmin ) )then + sva( n ) = stdlib_qnrm2( m, a( 1, n ), 1 )*d( n ) + else + t = zero + aapp = one + call stdlib_qlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*d( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:) reaching this point means that the procedure has completed the given + ! number of sweeps. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means that during the i-th sweep all pivots were + ! below the given threshold, causing early exit. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector d + do p = 1, n - 1 + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = d( p ) + d( p ) = d( q ) + d( q ) = temp1 + call stdlib_qswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_qswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_qgsvj1 + + !> DGTCON: estimates the reciprocal of the condition number of a real + !> tridiagonal matrix A using the LU factorization as computed by + !> DGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_qgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, iwork, info & + ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: d(*), dl(*), du(*), du2(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + integer(ilp) :: i, kase, kase1 + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input arguments. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm DGTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_qgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, x, & + ldx, ferr, berr, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: b(ldb,*), d(*), df(*), dl(*), dlf(*), du(*), du2(*), duf(*) + + real(qp), intent(out) :: berr(*), ferr(*), work(*) + real(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_qgttrs( trans, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, info ) + + call stdlib_qaxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 70 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_qgttrs( transt, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, & + info ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_qgttrs( transn, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, & + info ) + end if + go to 70 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_110 + return + end subroutine stdlib_qgtrfs + + !> DGTSV: solves the equation + !> A*X = B, + !> where A is an n by n tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T*X = B may be solved by interchanging the + !> order of the arguments DU and DL. + + pure subroutine stdlib_qgtsv( n, nrhs, dl, d, du, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(qp), intent(inout) :: b(ldb,*), d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: fact, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb=abs( dl( i ) ) ) then + ! no row interchange required + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + b( i+1, 1 ) = b( i+1, 1 ) - fact*b( i, 1 ) + else + info = i + return + end if + dl( i ) = zero + else + ! interchange rows i and i+1 + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + dl( i ) = du( i+1 ) + du( i+1 ) = -fact*dl( i ) + du( i ) = temp + temp = b( i, 1 ) + b( i, 1 ) = b( i+1, 1 ) + b( i+1, 1 ) = temp - fact*b( i+1, 1 ) + end if + end do loop_10 + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + b( i+1, 1 ) = b( i+1, 1 ) - fact*b( i, 1 ) + else + info = i + return + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + du( i ) = temp + temp = b( i, 1 ) + b( i, 1 ) = b( i+1, 1 ) + b( i+1, 1 ) = temp - fact*b( i+1, 1 ) + end if + end if + if( d( n )==zero ) then + info = n + return + end if + else + loop_40: do i = 1, n - 2 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + ! no row interchange required + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + do j = 1, nrhs + b( i+1, j ) = b( i+1, j ) - fact*b( i, j ) + end do + else + info = i + return + end if + dl( i ) = zero + else + ! interchange rows i and i+1 + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + dl( i ) = du( i+1 ) + du( i+1 ) = -fact*dl( i ) + du( i ) = temp + do j = 1, nrhs + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - fact*b( i+1, j ) + end do + end if + end do loop_40 + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + do j = 1, nrhs + b( i+1, j ) = b( i+1, j ) - fact*b( i, j ) + end do + else + info = i + return + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + du( i ) = temp + do j = 1, nrhs + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - fact*b( i+1, j ) + end do + end if + end if + if( d( n )==zero ) then + info = n + return + end if + end if + ! back solve with the matrix u from the factorization. + if( nrhs<=2 ) then + j = 1 + 70 continue + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) / d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-dl( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-dl( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + return + end subroutine stdlib_qgtsv + + !> DGTSVX: uses the LU factorization to compute the solution to a real + !> system of linear equations A * X = B or A**T * X = B, + !> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_qgtsvx( fact, trans, n, nrhs, dl, d, du, dlf, df, duf,du2, ipiv, b, & + ldb, x, ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: b(ldb,*), d(*), dl(*), du(*) + real(qp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + real(qp), intent(inout) :: df(*), dlf(*), du2(*), duf(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact, notran + character :: norm + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + notran = stdlib_lsame( trans, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb1 ) then + call stdlib_qcopy( n-1, dl, 1, dlf, 1 ) + call stdlib_qcopy( n-1, du, 1, duf, 1 ) + end if + call stdlib_qgttrf( n, dlf, df, duf, du2, ipiv, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_qlangt( norm, n, dl, d, du ) + ! compute the reciprocal of the condition number of a. + call stdlib_qgtcon( norm, n, dlf, df, duf, du2, ipiv, anorm, rcond, work,iwork, info ) + + ! compute the solution vectors x. + call stdlib_qlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_qgttrs( trans, n, nrhs, dlf, df, duf, du2, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_qgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2, ipiv,b, ldb, x, ldx, & + ferr, berr, work, iwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DGTTRF: computes an LU factorization of a real tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + + pure subroutine stdlib_qgttrf( n, dl, d, du, du2, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: d(*), dl(*), du(*) + real(qp), intent(out) :: du2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: fact, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DGTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! initialize ipiv(i) = i and du2(i) = 0 + do i = 1, n + ipiv( i ) = i + end do + do i = 1, n - 2 + du2( i ) = zero + end do + do i = 1, n - 2 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + ! no row interchange required, eliminate dl(i) + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + ! interchange rows i and i+1, eliminate dl(i) + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + du2( i ) = du( i+1 ) + du( i+1 ) = -fact*du( i+1 ) + ipiv( i ) = i + 1 + end if + end do + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + ipiv( i ) = i + 1 + end if + end if + ! check for a zero on the diagonal of u. + do i = 1, n + if( d( i )==zero ) then + info = i + go to 50 + end if + end do + 50 continue + return + end subroutine stdlib_qgttrf + + !> DGTTRS: solves one of the systems of equations + !> A*X = B or A**T*X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by DGTTRF. + + pure subroutine stdlib_qgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: b(ldb,*) + real(qp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: itrans, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + notran = ( trans=='N' .or. trans=='N' ) + if( .not.notran .and. .not.( trans=='T' .or. trans=='T' ) .and. .not.( trans=='C' .or. & + trans=='C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_qgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_qgtts2( itrans, n, jb, dl, d, du, du2, ipiv, b( 1, j ),ldb ) + end do + end if + end subroutine stdlib_qgttrs + + !> DGTTS2: solves one of the systems of equations + !> A*X = B or A**T*X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by DGTTRF. + + pure subroutine stdlib_qgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: itrans, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: b(ldb,*) + real(qp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ip, j + real(qp) :: temp + ! Executable Statements + ! quick return if possible + if( n==0 .or. nrhs==0 )return + if( itrans==0 ) then + ! solve a*x = b using the lu factorization of a, + ! overwriting each right hand side vector with its solution. + if( nrhs<=1 ) then + j = 1 + 10 continue + ! solve l*x = b. + do i = 1, n - 1 + ip = ipiv( i ) + temp = b( i+1-ip+i, j ) - dl( i )*b( ip, j ) + b( i, j ) = b( ip, j ) + b( i+1, j ) = temp + end do + ! solve u*x = b. + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + else + ! solve a**t * x = b. + if( nrhs<=1 ) then + ! solve u**t*x = b. + j = 1 + 70 continue + b( 1, j ) = b( 1, j ) / d( 1 ) + if( n>1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d( i & + ) + end do + ! solve l**t*x = b. + do i = n - 1, 1, -1 + ip = ipiv( i ) + temp = b( i, j ) - dl( i )*b( i+1, j ) + b( i, j ) = b( ip, j ) + b( ip, j ) = temp + end do + if( j1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d(& + i ) + end do + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + end do + end if + end if + end subroutine stdlib_qgtts2 + + !> DHGEQZ: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**T, B = Q1*T*Z1**T, + !> as computed by DGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**T, T = Q*P*Z**T, + !> where Q and Z are orthogonal matrices, P is an upper triangular + !> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 + !> diagonal blocks. + !> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair + !> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of + !> eigenvalues. + !> Additionally, the 2-by-2 upper triangular diagonal blocks of P + !> corresponding to 2-by-2 blocks of S are reduced to positive diagonal + !> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, + !> P(j,j) > 0, and P(j+1,j+1) > 0. + !> Optionally, the orthogonal matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> orthogonal matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the orthogonal factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Real eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + + subroutine stdlib_qhgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alphar, alphai, & + beta, q, ldq, z, ldz, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz, job + integer(ilp), intent(in) :: ihi, ilo, ldh, ldq, ldt, ldz, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), work(*) + real(qp), intent(inout) :: h(ldh,*), q(ldq,*), t(ldt,*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: safety = 1.0e+2_qp + ! $ safety = one ) + + ! Local Scalars + logical(lk) :: ilazr2, ilazro, ilpivt, ilq, ilschr, ilz, lquery + integer(ilp) :: icompq, icompz, ifirst, ifrstm, iiter, ilast, ilastm, in, ischur, & + istart, j, jc, jch, jiter, jr, maxit + real(qp) :: a11, a12, a1i, a1r, a21, a22, a2i, a2r, ad11, ad11l, ad12, ad12l, ad21, & + ad21l, ad22, ad22l, ad32l, an, anorm, ascale, atol, b11, b1a, b1i, b1r, b22, b2a, b2i, & + b2r, bn, bnorm, bscale, btol, c, c11i, c11r, c12, c21, c22i, c22r, cl, cq, cr, cz, & + eshift, s, s1, s1inv, s2, safmax, safmin, scale, sl, sqi, sqr, sr, szi, szr, t1, tau, & + temp, temp2, tempi, tempr, u1, u12, u12l, u2, ulp, vs, w11, w12, w21, w22, wabs, wi, & + wr, wr2 + ! Local Arrays + real(qp) :: v(3) + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + ! decode job, compq, compz + if( stdlib_lsame( job, 'E' ) ) then + ilschr = .false. + ischur = 1 + else if( stdlib_lsame( job, 'S' ) ) then + ilschr = .true. + ischur = 2 + else + ischur = 0 + end if + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! check argument values + info = 0 + work( 1 ) = max( 1, n ) + lquery = ( lwork==-1 ) + if( ischur==0 ) then + info = -1 + else if( icompq==0 ) then + info = -2 + else if( icompz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi ilo )temp = temp + abs ( t( j - 1, j ) ) + if( abs( t( j, j ) )=btol ) then + if( jch+1>=ilast ) then + go to 80 + else + ifirst = jch + 1 + go to 110 + end if + end if + t( jch+1, jch+1 ) = zero + end do + go to 70 + else + ! only test 2 passed -- chase the zero to t(ilast,ilast) + ! then process as in the case t(ilast,ilast)=0 + do jch = j, ilast - 1 + temp = t( jch, jch+1 ) + call stdlib_qlartg( temp, t( jch+1, jch+1 ), c, s,t( jch, jch+1 ) ) + + t( jch+1, jch+1 ) = zero + if( jchilast )ifrstm = ilo + end if + go to 350 + ! qz step + ! this iteration only involves rows/columns ifirst:ilast. we + ! assume ifirst < ilast, and that the diagonal of b is non-zero. + 110 continue + iiter = iiter + 1 + if( .not.ilschr ) then + ifrstm = ifirst + end if + ! compute single shifts. + ! at this point, ifirst < ilast, and the diagonal elements of + ! t(ifirst:ilast,ifirst,ilast) are larger than btol (in + ! magnitude) + if( ( iiter / 10 )*10==iiter ) then + ! exceptional shift. chosen for no particularly good reason. + ! (single shift only.) + if( ( real( maxit,KIND=qp)*safmin )*abs( h( ilast, ilast-1 ) ) abs( (wr2/s2)*t( & + ilast, ilast )- h( ilast, ilast ) ) ) then + temp = wr + wr = wr2 + wr2 = temp + temp = s1 + s1 = s2 + s2 = temp + end if + temp = max( s1, safmin*max( one, abs( wr ), abs( wi ) ) ) + if( wi/=zero )go to 200 + end if + ! fiddle with shift to avoid overflow + temp = min( ascale, one )*( half*safmax ) + if( s1>temp ) then + scale = temp / s1 + else + scale = one + end if + temp = min( bscale, one )*( half*safmax ) + if( abs( wr )>temp )scale = min( scale, temp / abs( wr ) ) + s1 = scale*s1 + wr = scale*wr + ! now check for two consecutive small subdiagonals. + do j = ilast - 1, ifirst + 1, -1 + istart = j + temp = abs( s1*h( j, j-1 ) ) + temp2 = abs( s1*h( j, j )-wr*t( j, j ) ) + tempr = max( temp, temp2 ) + if( tempristart ) then + temp = h( j, j-1 ) + call stdlib_qlartg( temp, h( j+1, j-1 ), c, s, h( j, j-1 ) ) + h( j+1, j-1 ) = zero + end if + do jc = j, ilastm + temp = c*h( j, jc ) + s*h( j+1, jc ) + h( j+1, jc ) = -s*h( j, jc ) + c*h( j+1, jc ) + h( j, jc ) = temp + temp2 = c*t( j, jc ) + s*t( j+1, jc ) + t( j+1, jc ) = -s*t( j, jc ) + c*t( j+1, jc ) + t( j, jc ) = temp2 + end do + if( ilq ) then + do jr = 1, n + temp = c*q( jr, j ) + s*q( jr, j+1 ) + q( jr, j+1 ) = -s*q( jr, j ) + c*q( jr, j+1 ) + q( jr, j ) = temp + end do + end if + temp = t( j+1, j+1 ) + call stdlib_qlartg( temp, t( j+1, j ), c, s, t( j+1, j+1 ) ) + t( j+1, j ) = zero + do jr = ifrstm, min( j+2, ilast ) + temp = c*h( jr, j+1 ) + s*h( jr, j ) + h( jr, j ) = -s*h( jr, j+1 ) + c*h( jr, j ) + h( jr, j+1 ) = temp + end do + do jr = ifrstm, j + temp = c*t( jr, j+1 ) + s*t( jr, j ) + t( jr, j ) = -s*t( jr, j+1 ) + c*t( jr, j ) + t( jr, j+1 ) = temp + end do + if( ilz ) then + do jr = 1, n + temp = c*z( jr, j+1 ) + s*z( jr, j ) + z( jr, j ) = -s*z( jr, j+1 ) + c*z( jr, j ) + z( jr, j+1 ) = temp + end do + end if + end do loop_190 + go to 350 + ! use francis double-shift + ! note: the francis double-shift should work with real shifts, + ! but only if the block is at least 3x3. + ! this code may break if this point is reached with + ! a 2x2 block with real eigenvalues. + 200 continue + if( ifirst+1==ilast ) then + ! special case -- 2x2 block with complex eigenvectors + ! step 1: standardize, that is, rotate so that + ! ( b11 0 ) + ! b = ( ) with b11 non-negative. + ! ( 0 b22 ) + call stdlib_qlasv2( t( ilast-1, ilast-1 ), t( ilast-1, ilast ),t( ilast, ilast ),& + b22, b11, sr, cr, sl, cl ) + if( b11 unfl ) + ! __ + ! (sa - wb) ( cz -sz ) + ! ( sz cz ) + c11r = s1*a11 - wr*b11 + c11i = -wi*b11 + c12 = s1*a12 + c21 = s1*a21 + c22r = s1*a22 - wr*b22 + c22i = -wi*b22 + if( abs( c11r )+abs( c11i )+abs( c12 )>abs( c21 )+abs( c22r )+abs( c22i ) ) & + then + t1 = stdlib_qlapy3( c12, c11r, c11i ) + cz = c12 / t1 + szr = -c11r / t1 + szi = -c11i / t1 + else + cz = stdlib_qlapy2( c22r, c22i ) + if( cz<=safmin ) then + cz = zero + szr = one + szi = zero + else + tempr = c22r / cz + tempi = c22i / cz + t1 = stdlib_qlapy2( cz, c21 ) + cz = cz / t1 + szr = -c21*tempr / t1 + szi = c21*tempi / t1 + end if + end if + ! compute givens rotation on left + ! ( cq sq ) + ! ( __ ) a or b + ! ( -sq cq ) + an = abs( a11 ) + abs( a12 ) + abs( a21 ) + abs( a22 ) + bn = abs( b11 ) + abs( b22 ) + wabs = abs( wr ) + abs( wi ) + if( s1*an>wabs*bn ) then + cq = cz*b11 + sqr = szr*b22 + sqi = -szi*b22 + else + a1r = cz*a11 + szr*a12 + a1i = szi*a12 + a2r = cz*a21 + szr*a22 + a2i = szi*a22 + cq = stdlib_qlapy2( a1r, a1i ) + if( cq<=safmin ) then + cq = zero + sqr = one + sqi = zero + else + tempr = a1r / cq + tempi = a1i / cq + sqr = tempr*a2r + tempi*a2i + sqi = tempi*a2r - tempr*a2i + end if + end if + t1 = stdlib_qlapy3( cq, sqr, sqi ) + cq = cq / t1 + sqr = sqr / t1 + sqi = sqi / t1 + ! compute diagonal elements of qbz + tempr = sqr*szr - sqi*szi + tempi = sqr*szi + sqi*szr + b1r = cq*cz*b11 + tempr*b22 + b1i = tempi*b22 + b1a = stdlib_qlapy2( b1r, b1i ) + b2r = cq*cz*b22 + tempr*b11 + b2i = -tempi*b11 + b2a = stdlib_qlapy2( b2r, b2i ) + ! normalize so beta > 0, and im( alpha1 ) > 0 + beta( ilast-1 ) = b1a + beta( ilast ) = b2a + alphar( ilast-1 ) = ( wr*b1a )*s1inv + alphai( ilast-1 ) = ( wi*b1a )*s1inv + alphar( ilast ) = ( wr*b2a )*s1inv + alphai( ilast ) = -( wi*b2a )*s1inv + ! step 3: go to next block -- exit if finished. + ilast = ifirst - 1 + if( ilastilast )ifrstm = ilo + end if + go to 350 + else + ! usual case: 3x3 or larger block, using francis implicit + ! double-shift + ! 2 + ! eigenvalue equation is w - c w + d = 0, + ! -1 2 -1 + ! so compute 1st column of (a b ) - c a b + d + ! using the formula in qzit (from eispack) + ! we assume that the block is at least 3x3 + ad11 = ( ascale*h( ilast-1, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad21 = ( ascale*h( ilast, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad12 = ( ascale*h( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad22 = ( ascale*h( ilast, ilast ) ) /( bscale*t( ilast, ilast ) ) + u12 = t( ilast-1, ilast ) / t( ilast, ilast ) + ad11l = ( ascale*h( ifirst, ifirst ) ) /( bscale*t( ifirst, ifirst ) ) + ad21l = ( ascale*h( ifirst+1, ifirst ) ) /( bscale*t( ifirst, ifirst ) ) + ad12l = ( ascale*h( ifirst, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + ad22l = ( ascale*h( ifirst+1, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + ad32l = ( ascale*h( ifirst+2, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + u12l = t( ifirst, ifirst+1 ) / t( ifirst+1, ifirst+1 ) + v( 1 ) = ( ad11-ad11l )*( ad22-ad11l ) - ad12*ad21 +ad21*u12*ad11l + ( ad12l-& + ad11l*u12l )*ad21l + v( 2 ) = ( ( ad22l-ad11l )-ad21l*u12l-( ad11-ad11l )-( ad22-ad11l )+ad21*u12 )& + *ad21l + v( 3 ) = ad32l*ad21l + istart = ifirst + call stdlib_qlarfg( 3, v( 1 ), v( 2 ), 1, tau ) + v( 1 ) = one + ! sweep + loop_290: do j = istart, ilast - 2 + ! all but last elements: use 3x3 householder transforms. + ! zero (j-1)st column of a + if( j>istart ) then + v( 1 ) = h( j, j-1 ) + v( 2 ) = h( j+1, j-1 ) + v( 3 ) = h( j+2, j-1 ) + call stdlib_qlarfg( 3, h( j, j-1 ), v( 2 ), 1, tau ) + v( 1 ) = one + h( j+1, j-1 ) = zero + h( j+2, j-1 ) = zero + end if + do jc = j, ilastm + temp = tau*( h( j, jc )+v( 2 )*h( j+1, jc )+v( 3 )*h( j+2, jc ) ) + h( j, jc ) = h( j, jc ) - temp + h( j+1, jc ) = h( j+1, jc ) - temp*v( 2 ) + h( j+2, jc ) = h( j+2, jc ) - temp*v( 3 ) + temp2 = tau*( t( j, jc )+v( 2 )*t( j+1, jc )+v( 3 )*t( j+2, jc ) ) + t( j, jc ) = t( j, jc ) - temp2 + t( j+1, jc ) = t( j+1, jc ) - temp2*v( 2 ) + t( j+2, jc ) = t( j+2, jc ) - temp2*v( 3 ) + end do + if( ilq ) then + do jr = 1, n + temp = tau*( q( jr, j )+v( 2 )*q( jr, j+1 )+v( 3 )*q( jr, j+2 ) ) + + q( jr, j ) = q( jr, j ) - temp + q( jr, j+1 ) = q( jr, j+1 ) - temp*v( 2 ) + q( jr, j+2 ) = q( jr, j+2 ) - temp*v( 3 ) + end do + end if + ! zero j-th column of b (see dlagbc for details) + ! swap rows to pivot + ilpivt = .false. + temp = max( abs( t( j+1, j+1 ) ), abs( t( j+1, j+2 ) ) ) + temp2 = max( abs( t( j+2, j+1 ) ), abs( t( j+2, j+2 ) ) ) + if( max( temp, temp2 )=temp2 ) then + w11 = t( j+1, j+1 ) + w21 = t( j+2, j+1 ) + w12 = t( j+1, j+2 ) + w22 = t( j+2, j+2 ) + u1 = t( j+1, j ) + u2 = t( j+2, j ) + else + w21 = t( j+1, j+1 ) + w11 = t( j+2, j+1 ) + w22 = t( j+1, j+2 ) + w12 = t( j+2, j+2 ) + u2 = t( j+1, j ) + u1 = t( j+2, j ) + end if + ! swap columns if nec. + if( abs( w12 )>abs( w11 ) ) then + ilpivt = .true. + temp = w12 + temp2 = w22 + w12 = w11 + w22 = w21 + w11 = temp + w21 = temp2 + end if + ! lu-factor + temp = w21 / w11 + u2 = u2 - temp*u1 + w22 = w22 - temp*w12 + w21 = zero + ! compute scale + scale = one + if( abs( w22 ) DHSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a real upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + + subroutine stdlib_qhsein( side, eigsrc, initv, select, n, h, ldh, wr, wi,vl, ldvl, vr, ldvr, & + mm, m, work, ifaill,ifailr, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: eigsrc, initv, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldh, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + integer(ilp), intent(out) :: ifaill(*), ifailr(*) + real(qp), intent(in) :: h(ldh,*), wi(*) + real(qp), intent(inout) :: vl(ldvl,*), vr(ldvr,*), wr(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: bothv, fromqr, leftv, noinit, pair, rightv + integer(ilp) :: i, iinfo, k, kl, kln, kr, ksi, ksr, ldwork + real(qp) :: bignum, eps3, hnorm, smlnum, ulp, unfl, wki, wkr + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! decode and test the input parameters. + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + fromqr = stdlib_lsame( eigsrc, 'Q' ) + noinit = stdlib_lsame( initv, 'N' ) + ! set m to the number of columns required to store the selected + ! eigenvectors, and standardize the array select. + m = 0 + pair = .false. + do k = 1, n + if( pair ) then + pair = .false. + select( k ) = .false. + else + if( wi( k )==zero ) then + if( select( k ) )m = m + 1 + else + pair = .true. + if( select( k ) .or. select( k+1 ) ) then + select( k ) = .true. + m = m + 2 + end if + end if + end if + end do + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.fromqr .and. .not.stdlib_lsame( eigsrc, 'N' ) ) then + info = -2 + else if( .not.noinit .and. .not.stdlib_lsame( initv, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldhkr ) then + do i = k, n - 1 + if( h( i+1, i )==zero )go to 50 + end do + 50 continue + kr = i + end if + end if + if( kl/=kln ) then + kln = kl + ! compute infinity-norm of submatrix h(kl:kr,kl:kr) if it + ! has not ben computed before. + hnorm = stdlib_qlanhs( 'I', kr-kl+1, h( kl, kl ), ldh, work ) + if( stdlib_qisnan( hnorm ) ) then + info = -6 + return + else if( hnorm>zero ) then + eps3 = hnorm*ulp + else + eps3 = smlnum + end if + end if + ! perturb eigenvalue if it is close to any previous + ! selected eigenvalues affiliated to the submatrix + ! h(kl:kr,kl:kr). close roots are modified by eps3. + wkr = wr( k ) + wki = wi( k ) + 60 continue + do i = k - 1, kl, -1 + if( select( i ) .and. abs( wr( i )-wkr )+abs( wi( i )-wki )0 ) then + if( pair ) then + info = info + 2 + else + info = info + 1 + end if + ifaill( ksr ) = k + ifaill( ksi ) = k + else + ifaill( ksr ) = 0 + ifaill( ksi ) = 0 + end if + do i = 1, kl - 1 + vl( i, ksr ) = zero + end do + if( pair ) then + do i = 1, kl - 1 + vl( i, ksi ) = zero + end do + end if + end if + if( rightv ) then + ! compute right eigenvector. + call stdlib_qlaein( .true., noinit, kr, h, ldh, wkr, wki,vr( 1, ksr ), vr( 1, & + ksi ), work, ldwork,work( n*n+n+1 ), eps3, smlnum, bignum,iinfo ) + if( iinfo>0 ) then + if( pair ) then + info = info + 2 + else + info = info + 1 + end if + ifailr( ksr ) = k + ifailr( ksi ) = k + else + ifailr( ksr ) = 0 + ifailr( ksi ) = 0 + end if + do i = kr + 1, n + vr( i, ksr ) = zero + end do + if( pair ) then + do i = kr + 1, n + vr( i, ksi ) = zero + end do + end if + end if + if( pair ) then + ksr = ksr + 2 + else + ksr = ksr + 1 + end if + end if + end do loop_120 + return + end subroutine stdlib_qhsein + + !> DHSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_qhseqr( job, compz, n, ilo, ihi, h, ldh, wr, wi, z,ldz, work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + character, intent(in) :: compz, job + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), z(ldz,*) + real(qp), intent(out) :: wi(*), work(*), wr(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: nl = 49 + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_qlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== nl allocates some local workspace to help small matrices + ! . through a rare stdlib_qlahqr failure. nl > ntiny = 15 is + ! . required and nl <= nmin = stdlib_ilaenv(ispec=12,...) is recom- + ! . mended. (the default value of nmin is 75.) using nl = 49 + ! . allows up to six simultaneous shifts and a 16-by-16 + ! . deflation window. ==== + + + ! Local Arrays + real(qp) :: hl(nl,nl), workl(nl) + ! Local Scalars + integer(ilp) :: i, kbot, nmin + logical(lk) :: initz, lquery, wantt, wantz + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! ==== decode and check the input parameters. ==== + wantt = stdlib_lsame( job, 'S' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + work( 1 ) = real( max( 1, n ),KIND=qp) + lquery = lwork==-1 + info = 0 + if( .not.stdlib_lsame( job, 'E' ) .and. .not.wantt ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( ldhnmin ) then + call stdlib_qlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo,ihi, z, ldz, & + work, lwork, info ) + else + ! ==== small matrix ==== + call stdlib_qlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo,ihi, z, ldz, & + info ) + if( info>0 ) then + ! ==== a rare stdlib_qlahqr failure! stdlib_qlaqr0 sometimes succeeds + ! . when stdlib_qlahqr fails. ==== + kbot = info + if( n>=nl ) then + ! ==== larger matrices have enough subdiagonal scratch + ! . space to call stdlib_qlaqr0 directly. ==== + call stdlib_qlaqr0( wantt, wantz, n, ilo, kbot, h, ldh, wr,wi, ilo, ihi, z,& + ldz, work, lwork, info ) + else + ! ==== tiny matrices don't have enough subdiagonal + ! . scratch space to benefit from stdlib_qlaqr0. hence, + ! . tiny matrices must be copied into a larger + ! . array before calling stdlib_qlaqr0. ==== + call stdlib_qlacpy( 'A', n, n, h, ldh, hl, nl ) + hl( n+1, n ) = zero + call stdlib_qlaset( 'A', nl, nl-n, zero, zero, hl( 1, n+1 ),nl ) + call stdlib_qlaqr0( wantt, wantz, nl, ilo, kbot, hl, nl, wr,wi, ilo, ihi, & + z, ldz, workl, nl, info ) + if( wantt .or. info/=0 )call stdlib_qlacpy( 'A', n, n, hl, nl, h, ldh ) + + end if + end if + end if + ! ==== clear out the trash, if necessary. ==== + if( ( wantt .or. info/=0 ) .and. n>2 )call stdlib_qlaset( 'L', n-2, n-2, zero, zero,& + h( 3, 1 ), ldh ) + ! ==== ensure reported workspace size is backward-compatible with + ! . previous lapack versions. ==== + work( 1 ) = max( real( max( 1, n ),KIND=qp), work( 1 ) ) + end if + end subroutine stdlib_qhseqr + + !> DISNAN: returns .TRUE. if its argument is NaN, and .FALSE. + !> otherwise. To be replaced by the Fortran 2003 intrinsic in the + !> future. + + pure logical(lk) function stdlib_qisnan( din ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: din + ! ===================================================================== + ! Executable Statements + stdlib_qisnan = stdlib_qlaisnan(din,din) + return + end function stdlib_qisnan + + !> DLA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_qla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, ldab, m, n, kl, ku, trans + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( kl<0 .or. kl>m-1 ) then + info = 4 + else if( ku<0 .or. ku>n-1 ) then + info = 5 + else if( ldab0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + kd = ku + 1 + ke = kl + 1 + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( kd+i-j, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( ke-i+j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( kd+i-j, j ) ) + symb_wero = symb_wero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( ke-i+j, i ) ) + symb_wero = symb_wero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_qla_gbamv + + !> DLA_GBRCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(qp) function stdlib_qla_gbrcond( trans, n, kl, ku, ab, ldab,afb, ldafb, ipiv, cmode, c,& + info, work, iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: n, ldab, ldafb, kl, ku, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: ab(ldab,*), afb(ldafb,*), c(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j, kd, ke + real(qp) :: ainvnm, tmp + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_qla_gbrcond = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame(trans, 'T').and. .not. stdlib_lsame(trans, & + 'C') ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 .or. kl>n-1 ) then + info = -3 + else if( ku<0 .or. ku>n-1 ) then + info = -4 + else if( ldab DLA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(qp) function stdlib_qla_gbrpvgrw( n, kl, ku, ncols, ab,ldab, afb, ldafb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, kl, ku, ncols, ldab, ldafb + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*), afb(ldafb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j, kd + real(qp) :: amax, umax, rpvgrw + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + rpvgrw = one + kd = ku + 1 + do j = 1, ncols + amax = zero + umax = zero + do i = max( j-ku, 1 ), min( j+kl, n ) + amax = max( abs( ab( kd+i-j, j)), amax ) + end do + do i = max( j-ku, 1 ), j + umax = max( abs( afb( kd+i-j, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_qla_gbrpvgrw = rpvgrw + end function stdlib_qla_gbrpvgrw + + !> DLA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_qla_geamv ( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n, trans + ! Array Arguments + real(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' )) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, lenx + temp = abs( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, lenx + temp = abs( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = 1, lenx + temp = abs( a( i, j ) ) + symb_wero = symb_wero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if (.not.symb_wero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = 1, lenx + temp = abs( a( j, i ) ) + symb_wero = symb_wero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if (.not.symb_wero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_qla_geamv + + !> DLA_GERCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(qp) function stdlib_qla_gercond( trans, n, a, lda, af,ldaf, ipiv, cmode, c,info, work, & + iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(qp) :: ainvnm, tmp + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_qla_gercond = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame(trans, 'T').and. .not. stdlib_lsame(trans, & + 'C') ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DLA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(qp) function stdlib_qla_gerpvgrw( n, ncols, a, lda, af,ldaf ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, ncols, lda, ldaf + ! Array Arguments + real(qp), intent(in) :: a(lda,*), af(ldaf,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: amax, umax, rpvgrw + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + rpvgrw = one + do j = 1, ncols + amax = zero + umax = zero + do i = 1, n + amax = max( abs( a( i, j ) ), amax ) + end do + do i = 1, j + umax = max( abs( af( i, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_qla_gerpvgrw = rpvgrw + end function stdlib_qla_gerpvgrw + + !> DLA_LIN_BERR: computes component-wise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the component-wise absolute value of the matrix + !> or vector Z. + + pure subroutine stdlib_qla_lin_berr ( n, nz, nrhs, res, ayb, berr ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, nz, nrhs + ! Array Arguments + real(qp), intent(in) :: ayb(n,nrhs) + real(qp), intent(out) :: berr(nrhs) + real(qp), intent(in) :: res(n,nrhs) + ! ===================================================================== + ! Local Scalars + real(qp) :: tmp,safe1 + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! adding safe1 to the numerator guards against spuriously zero + ! residuals. a similar safeguard is in the sla_yyamv routine used + ! to compute ayb. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (nz+1)*safe1 + do j = 1, nrhs + berr(j) = zero + do i = 1, n + if (ayb(i,j) /= zero) then + tmp = (safe1+abs(res(i,j)))/ayb(i,j) + berr(j) = max( berr(j), tmp ) + end if + ! if ayb is exactly 0.0_qp (and if computed by sla_yyamv), then we know + ! the true residual also must be exactly zero. + end do + end do + end subroutine stdlib_qla_lin_berr + + !> DLA_PORCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(qp) function stdlib_qla_porcond( uplo, n, a, lda, af, ldaf,cmode, c, info, work,iwork ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + real(qp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(qp), intent(out) :: work(*) + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase, i, j + real(qp) :: ainvnm, tmp + logical(lk) :: up + ! Array Arguments + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_qla_porcond = zero + info = 0 + if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLA_PORCOND', -info ) + return + end if + if( n==0 ) then + stdlib_qla_porcond = one + return + end if + up = .false. + if ( stdlib_lsame( uplo, 'U' ) ) up = .true. + ! compute the equilibration matrix r such that + ! inv(r)*a*c has unit 1-norm. + if ( up ) then + do i = 1, n + tmp = zero + if ( cmode == 1 ) then + do j = 1, i + tmp = tmp + abs( a( j, i ) * c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) * c( j ) ) + end do + else if ( cmode == 0 ) then + do j = 1, i + tmp = tmp + abs( a( j, i ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) ) + end do + else + do j = 1, i + tmp = tmp + abs( a( j ,i ) / c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) / c( j ) ) + end do + end if + work( 2*n+i ) = tmp + end do + else + do i = 1, n + tmp = zero + if ( cmode == 1 ) then + do j = 1, i + tmp = tmp + abs( a( i, j ) * c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) * c( j ) ) + end do + else if ( cmode == 0 ) then + do j = 1, i + tmp = tmp + abs( a( i, j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) ) + end do + else + do j = 1, i + tmp = tmp + abs( a( i, j ) / c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) / c( j ) ) + end do + end if + work( 2*n+i ) = tmp + end do + endif + ! estimate the norm of inv(op(a)). + ainvnm = zero + kase = 0 + 10 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==2 ) then + ! multiply by r. + do i = 1, n + work( i ) = work( i ) * work( 2*n+i ) + end do + if (up) then + call stdlib_qpotrs( 'UPPER', n, 1, af, ldaf, work, n, info ) + else + call stdlib_qpotrs( 'LOWER', n, 1, af, ldaf, work, n, info ) + endif + ! multiply by inv(c). + if ( cmode == 1 ) then + do i = 1, n + work( i ) = work( i ) / c( i ) + end do + else if ( cmode == -1 ) then + do i = 1, n + work( i ) = work( i ) * c( i ) + end do + end if + else + ! multiply by inv(c**t). + if ( cmode == 1 ) then + do i = 1, n + work( i ) = work( i ) / c( i ) + end do + else if ( cmode == -1 ) then + do i = 1, n + work( i ) = work( i ) * c( i ) + end do + end if + if ( up ) then + call stdlib_qpotrs( 'UPPER', n, 1, af, ldaf, work, n, info ) + else + call stdlib_qpotrs( 'LOWER', n, 1, af, ldaf, work, n, info ) + endif + ! multiply by r. + do i = 1, n + work( i ) = work( i ) * work( 2*n+i ) + end do + end if + go to 10 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm /= zero )stdlib_qla_porcond = ( one / ainvnm ) + return + end function stdlib_qla_porcond + + !> DLA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(qp) function stdlib_qla_porpvgrw( uplo, ncols, a, lda, af,ldaf, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols, lda, ldaf + ! Array Arguments + real(qp), intent(in) :: a(lda,*), af(ldaf,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: amax, umax, rpvgrw + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + ! stdlib_qpotrf will have factored only the ncolsxncols leading minor, so + ! we restrict the growth search to that minor and use only the first + ! 2*ncols workspace entries. + rpvgrw = one + do i = 1, 2*ncols + work( i ) = zero + end do + ! find the max magnitude entry of each column. + if ( upper ) then + do j = 1, ncols + do i = 1, j + work( ncols+j ) =max( abs( a( i, j ) ), work( ncols+j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( ncols+j ) =max( abs( a( i, j ) ), work( ncols+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of the factor in + ! af. no pivoting, so no permutations. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do j = 1, ncols + do i = 1, j + work( j ) = max( abs( af( i, j ) ), work( j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( j ) = max( abs( af( i, j ) ), work( j ) ) + end do + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_qla_porpvgrw = rpvgrw + end function stdlib_qla_porpvgrw + + !> DLA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_qla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n, uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) ) then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + do j = i+1, n + temp = abs( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + do j = i+1, n + temp = abs( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = abs( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = abs( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_qla_syamv + + !> DLA_SYRCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(qp) function stdlib_qla_syrcond( uplo, n, a, lda, af, ldaf,ipiv, cmode, c, info, work,& + iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + character :: normin + integer(ilp) :: kase, i, j + real(qp) :: ainvnm, smlnum, tmp + logical(lk) :: up + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_qla_syrcond = zero + info = 0 + if( n<0 ) then + info = -2 + else if( lda DLA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(qp) function stdlib_qla_syrpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*), af(ldaf,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(qp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if ( upper ) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( abs( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( abs( a( i, j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( abs( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( abs( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_qsytrs. + ! calls to stdlib_dswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( abs( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( abs( af( i, k ) ), work( k ) ) + work( k-1 ) = max( abs( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( abs( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( abs( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( abs( af( i, k ) ), work( k ) ) + work( k+1 ) = max( abs( af(i, k+1 ) ), work( k+1 ) ) + end do + work( k ) = max( abs( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_qla_syrpvgrw = rpvgrw + end function stdlib_qla_syrpvgrw + + !> DLA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + + pure subroutine stdlib_qla_wwaddw( n, x, y, w ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: x(*), y(*) + real(qp), intent(in) :: w(*) + ! ===================================================================== + ! Local Scalars + real(qp) :: s + integer(ilp) :: i + ! Executable Statements + do 10 i = 1, n + s = x(i) + w(i) + s = (s + s) - s + y(i) = ((x(i) - s) + w(i)) + y(i) + x(i) = s + 10 continue + return + end subroutine stdlib_qla_wwaddw + + !> DLABAD: takes as input the values computed by DLAMCH for underflow and + !> overflow, and returns the square root of each of these values if the + !> log of LARGE is sufficiently large. This subroutine is intended to + !> identify machines with a large exponent range, such as the Crays, and + !> redefine the underflow and overflow limits to be the square roots of + !> the values computed by DLAMCH. This subroutine is needed because + !> DLAMCH does not compensate for poor arithmetic in the upper half of + !> the exponent range, as is found on a Cray. + + pure subroutine stdlib_qlabad( small, large ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(inout) :: large, small + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: log10,sqrt + ! Executable Statements + ! if it looks like we're on a cray, take the square root of + ! small and large to avoid overflow and underflow problems. + if( log10( large )>2000._qp ) then + small = sqrt( small ) + large = sqrt( large ) + end if + return + end subroutine stdlib_qlabad + + !> DLABRD: reduces the first NB rows and columns of a real general + !> m by n matrix A to upper or lower bidiagonal form by an orthogonal + !> transformation Q**T * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by DGEBRD + + pure subroutine stdlib_qlabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldx, ldy, m, n, nb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*), e(*), taup(*), tauq(*), x(ldx,*), y(ldy,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( m>=n ) then + ! reduce to upper bidiagonal form + loop_10: do i = 1, nb + ! update a(i:m,i) + call stdlib_qgemv( 'NO TRANSPOSE', m-i+1, i-1, -one, a( i, 1 ),lda, y( i, 1 ), & + ldy, one, a( i, i ), 1 ) + call stdlib_qgemv( 'NO TRANSPOSE', m-i+1, i-1, -one, x( i, 1 ),ldx, a( 1, i ), 1,& + one, a( i, i ), 1 ) + ! generate reflection q(i) to annihilate a(i+1:m,i) + call stdlib_qlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,tauq( i ) ) + + d( i ) = a( i, i ) + if( i DLACN2: estimates the 1-norm of a square, real matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + pure subroutine stdlib_qlacn2( n, v, x, isgn, est, kase, isave ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(qp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(out) :: isgn(*) + integer(ilp), intent(inout) :: isave(3) + real(qp), intent(out) :: v(*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + ! Local Scalars + integer(ilp) :: i, jlast + real(qp) :: altsgn, estold, temp, xs + ! Intrinsic Functions + intrinsic :: abs,real,nint + ! Executable Statements + if( kase==0 ) then + do i = 1, n + x( i ) = one / real( n,KIND=qp) + end do + kase = 1 + isave( 1 ) = 1 + return + end if + go to ( 20, 40, 70, 110, 140 )isave( 1 ) + ! ................ entry (isave( 1 ) = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 150 + end if + est = stdlib_qasum( n, x, 1 ) + do i = 1, n + if( x(i)>=zero ) then + x(i) = one + else + x(i) = -one + end if + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + isave( 1 ) = 2 + return + ! ................ entry (isave( 1 ) = 2) + ! first iteration. x has been overwritten by transpose(a)*x. + 40 continue + isave( 2 ) = stdlib_iqamax( n, x, 1 ) + isave( 3 ) = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = zero + end do + x( isave( 2 ) ) = one + kase = 1 + isave( 1 ) = 3 + return + ! ................ entry (isave( 1 ) = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_qcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_qasum( n, v, 1 ) + do i = 1, n + if( x(i)>=zero ) then + xs = one + else + xs = -one + end if + if( nint( xs,KIND=ilp)/=isgn( i ) )go to 90 + end do + ! repeated sign vector detected, hence algorithm has converged. + go to 120 + 90 continue + ! test for cycling. + if( est<=estold )go to 120 + do i = 1, n + if( x(i)>=zero ) then + x(i) = one + else + x(i) = -one + end if + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + isave( 1 ) = 4 + return + ! ................ entry (isave( 1 ) = 4) + ! x has been overwritten by transpose(a)*x. + 110 continue + jlast = isave( 2 ) + isave( 2 ) = stdlib_iqamax( n, x, 1 ) + if( ( x( jlast )/=abs( x( isave( 2 ) ) ) ) .and.( isave( 3 )est ) then + call stdlib_qcopy( n, x, 1, v, 1 ) + est = temp + end if + 150 continue + kase = 0 + return + end subroutine stdlib_qlacn2 + + !> DLACON: estimates the 1-norm of a square, real matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + subroutine stdlib_qlacon( n, v, x, isgn, est, kase ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(qp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(out) :: isgn(*) + real(qp), intent(out) :: v(*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + ! Local Scalars + integer(ilp) :: i, iter, j, jlast, jump + real(qp) :: altsgn, estold, temp + ! Intrinsic Functions + intrinsic :: abs,real,nint,sign + ! Save Statement + save + ! Executable Statements + if( kase==0 ) then + do i = 1, n + x( i ) = one / real( n,KIND=qp) + end do + kase = 1 + jump = 1 + return + end if + go to ( 20, 40, 70, 110, 140 )jump + ! ................ entry (jump = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 150 + end if + est = stdlib_qasum( n, x, 1 ) + do i = 1, n + x( i ) = sign( one, x( i ) ) + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + jump = 2 + return + ! ................ entry (jump = 2) + ! first iteration. x has been overwritten by transpose(a)*x. + 40 continue + j = stdlib_iqamax( n, x, 1 ) + iter = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = zero + end do + x( j ) = one + kase = 1 + jump = 3 + return + ! ................ entry (jump = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_qcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_qasum( n, v, 1 ) + do i = 1, n + if( nint( sign( one, x( i ) ),KIND=ilp)/=isgn( i ) )go to 90 + end do + ! repeated sign vector detected, hence algorithm has converged. + go to 120 + 90 continue + ! test for cycling. + if( est<=estold )go to 120 + do i = 1, n + x( i ) = sign( one, x( i ) ) + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + jump = 4 + return + ! ................ entry (jump = 4) + ! x has been overwritten by transpose(a)*x. + 110 continue + jlast = j + j = stdlib_iqamax( n, x, 1 ) + if( ( x( jlast )/=abs( x( j ) ) ) .and. ( iterest ) then + call stdlib_qcopy( n, x, 1, v, 1 ) + est = temp + end if + 150 continue + kase = 0 + return + end subroutine stdlib_qlacon + + !> DLACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + + pure subroutine stdlib_qlacpy( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_qlacpy + + !> DLADIV: performs complex division in real arithmetic + !> a + i*b + !> p + i*q = --------- + !> c + i*d + !> The algorithm is due to Michael Baudin and Robert L. Smith + !> and can be found in the paper + !> "A Robust Complex Division in Scilab" + + pure subroutine stdlib_qladiv( a, b, c, d, p, q ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: a, b, c, d + real(qp), intent(out) :: p, q + ! ===================================================================== + ! Parameters + real(qp), parameter :: bs = 2.0_qp + + + + ! Local Scalars + real(qp) :: aa, bb, cc, dd, ab, cd, s, ov, un, be, eps + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + aa = a + bb = b + cc = c + dd = d + ab = max( abs(a), abs(b) ) + cd = max( abs(c), abs(d) ) + s = one + ov = stdlib_qlamch( 'OVERFLOW THRESHOLD' ) + un = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'EPSILON' ) + be = bs / (eps*eps) + if( ab >= half*ov ) then + aa = half * aa + bb = half * bb + s = two * s + end if + if( cd >= half*ov ) then + cc = half * cc + dd = half * dd + s = half * s + end if + if( ab <= un*bs/eps ) then + aa = aa * be + bb = bb * be + s = s / be + end if + if( cd <= un*bs/eps ) then + cc = cc * be + dd = dd * be + s = s * be + end if + if( abs( d )<=abs( c ) ) then + call stdlib_qladiv1(aa, bb, cc, dd, p, q) + else + call stdlib_qladiv1(bb, aa, dd, cc, p, q) + q = -q + end if + p = p * s + q = q * s + return + end subroutine stdlib_qladiv + + + pure subroutine stdlib_qladiv1( a, b, c, d, p, q ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(inout) :: a + real(qp), intent(in) :: b, c, d + real(qp), intent(out) :: p, q + ! ===================================================================== + + ! Local Scalars + real(qp) :: r, t + ! Executable Statements + r = d / c + t = one / (c + d * r) + p = stdlib_qladiv2(a, b, c, d, r, t) + a = -a + q = stdlib_qladiv2(b, a, c, d, r, t) + return + end subroutine stdlib_qladiv1 + + + pure real(qp) function stdlib_qladiv2( a, b, c, d, r, t ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: a, b, c, d, r, t + ! ===================================================================== + + ! Local Scalars + real(qp) :: br + ! Executable Statements + if( r/=zero ) then + br = b * r + if( br/=zero ) then + stdlib_qladiv2 = (a + br) * t + else + stdlib_qladiv2 = a * t + (b * t) * r + end if + else + stdlib_qladiv2 = (a + d * (b / c)) * t + end if + return + end function stdlib_qladiv2 + + !> DLAE2: computes the eigenvalues of a 2-by-2 symmetric matrix + !> [ A B ] + !> [ B C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, and RT2 + !> is the eigenvalue of smaller absolute value. + + pure subroutine stdlib_qlae2( a, b, c, rt1, rt2 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: a, b, c + real(qp), intent(out) :: rt1, rt2 + ! ===================================================================== + + + + + ! Local Scalars + real(qp) :: ab, acmn, acmx, adf, df, rt, sm, tb + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ! compute the eigenvalues + sm = a + c + df = a - c + adf = abs( df ) + tb = b + b + ab = abs( tb ) + if( abs( a )>abs( c ) ) then + acmx = a + acmn = c + else + acmx = c + acmn = a + end if + if( adf>ab ) then + rt = adf*sqrt( one+( ab / adf )**2 ) + else if( adfzero ) then + rt1 = half*( sm+rt ) + ! order of execution important. + ! to get fully accurate smaller eigenvalue, + ! next line needs to be executed in higher precision. + rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b + else + ! includes case rt1 = rt2 = 0 + rt1 = half*rt + rt2 = -half*rt + end if + return + end subroutine stdlib_qlae2 + + !> DLAEBZ: contains the iteration loops which compute and use the + !> function N(w), which is the count of eigenvalues of a symmetric + !> tridiagonal matrix T less than or equal to its argument w. It + !> performs a choice of two types of loops: + !> IJOB=1, followed by + !> IJOB=2: It takes as input a list of intervals and returns a list of + !> sufficiently small intervals whose union contains the same + !> eigenvalues as the union of the original intervals. + !> The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. + !> The output interval (AB(j,1),AB(j,2)] will contain + !> eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. + !> IJOB=3: It performs a binary search in each input interval + !> (AB(j,1),AB(j,2)] for a point w(j) such that + !> N(w(j))=NVAL(j), and uses C(j) as the starting point of + !> the search. If such a w(j) is found, then on output + !> AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output + !> (AB(j,1),AB(j,2)] will be a small interval containing the + !> point where N(w) jumps through NVAL(j), unless that point + !> lies outside the initial interval. + !> Note that the intervals are in all cases half-open intervals, + !> i.e., of the form (a,b] , which includes b but not a . + !> To avoid underflow, the matrix should be scaled so that its largest + !> element is no greater than overflow**(1/2) * underflow**(1/4) + !> in absolute value. To assure the most accurate computation + !> of small eigenvalues, the matrix should be scaled to be + !> not much smaller than that, either. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966 + !> Note: the arguments are, in general, *not* checked for unreasonable + !> values. + + pure subroutine stdlib_qlaebz( ijob, nitmax, n, mmax, minp, nbmin, abstol,reltol, pivmin, d, & + e, e2, nval, ab, c, mout,nab, work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ijob, minp, mmax, n, nbmin, nitmax + integer(ilp), intent(out) :: info, mout + real(qp), intent(in) :: abstol, pivmin, reltol + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(inout) :: nab(mmax,*), nval(*) + real(qp), intent(inout) :: ab(mmax,*), c(*) + real(qp), intent(in) :: d(*), e(*), e2(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: itmp1, itmp2, j, ji, jit, jp, kf, kfnew, kl, klnew + real(qp) :: tmp1, tmp2 + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! check for errors + info = 0 + if( ijob<1 .or. ijob>3 ) then + info = -1 + return + end if + ! initialize nab + if( ijob==1 ) then + ! compute the number of eigenvalues in the initial intervals. + mout = 0 + do ji = 1, minp + do jp = 1, 2 + tmp1 = d( 1 ) - ab( ji, jp ) + if( abs( tmp1 )=nbmin .and. nbmin>0 ) then + ! begin of parallel version of the loop + do ji = kf, kl + ! compute n(c), the number of eigenvalues less than c + work( ji ) = d( 1 ) - c( ji ) + iwork( ji ) = 0 + if( work( ji )<=pivmin ) then + iwork( ji ) = 1 + work( ji ) = min( work( ji ), -pivmin ) + end if + do j = 2, n + work( ji ) = d( j ) - e2( j-1 ) / work( ji ) - c( ji ) + if( work( ji )<=pivmin ) then + iwork( ji ) = iwork( ji ) + 1 + work( ji ) = min( work( ji ), -pivmin ) + end if + end do + end do + if( ijob<=2 ) then + ! ijob=2: choose all intervals containing eigenvalues. + klnew = kl + loop_70: do ji = kf, kl + ! insure that n(w) is monotone + iwork( ji ) = min( nab( ji, 2 ),max( nab( ji, 1 ), iwork( ji ) ) ) + ! update the queue -- add intervals if both halves + ! contain eigenvalues. + if( iwork( ji )==nab( ji, 2 ) ) then + ! no eigenvalue in the upper interval: + ! just use the lower interval. + ab( ji, 2 ) = c( ji ) + else if( iwork( ji )==nab( ji, 1 ) ) then + ! no eigenvalue in the lower interval: + ! just use the upper interval. + ab( ji, 1 ) = c( ji ) + else + klnew = klnew + 1 + if( klnew<=mmax ) then + ! eigenvalue in both intervals -- add upper to + ! queue. + ab( klnew, 2 ) = ab( ji, 2 ) + nab( klnew, 2 ) = nab( ji, 2 ) + ab( klnew, 1 ) = c( ji ) + nab( klnew, 1 ) = iwork( ji ) + ab( ji, 2 ) = c( ji ) + nab( ji, 2 ) = iwork( ji ) + else + info = mmax + 1 + end if + end if + end do loop_70 + if( info/=0 )return + kl = klnew + else + ! ijob=3: binary search. keep only the interval containing + ! w s.t. n(w) = nval + do ji = kf, kl + if( iwork( ji )<=nval( ji ) ) then + ab( ji, 1 ) = c( ji ) + nab( ji, 1 ) = iwork( ji ) + end if + if( iwork( ji )>=nval( ji ) ) then + ab( ji, 2 ) = c( ji ) + nab( ji, 2 ) = iwork( ji ) + end if + end do + end if + else + ! end of parallel version of the loop + ! begin of serial version of the loop + klnew = kl + loop_100: do ji = kf, kl + ! compute n(w), the number of eigenvalues less than w + tmp1 = c( ji ) + tmp2 = d( 1 ) - tmp1 + itmp1 = 0 + if( tmp2<=pivmin ) then + itmp1 = 1 + tmp2 = min( tmp2, -pivmin ) + end if + do j = 2, n + tmp2 = d( j ) - e2( j-1 ) / tmp2 - tmp1 + if( tmp2<=pivmin ) then + itmp1 = itmp1 + 1 + tmp2 = min( tmp2, -pivmin ) + end if + end do + if( ijob<=2 ) then + ! ijob=2: choose all intervals containing eigenvalues. + ! insure that n(w) is monotone + itmp1 = min( nab( ji, 2 ),max( nab( ji, 1 ), itmp1 ) ) + ! update the queue -- add intervals if both halves + ! contain eigenvalues. + if( itmp1==nab( ji, 2 ) ) then + ! no eigenvalue in the upper interval: + ! just use the lower interval. + ab( ji, 2 ) = tmp1 + else if( itmp1==nab( ji, 1 ) ) then + ! no eigenvalue in the lower interval: + ! just use the upper interval. + ab( ji, 1 ) = tmp1 + else if( klnew=nval( ji ) ) then + ab( ji, 2 ) = tmp1 + nab( ji, 2 ) = itmp1 + end if + end if + end do loop_100 + kl = klnew + end if + ! check for convergence + kfnew = kf + loop_110: do ji = kf, kl + tmp1 = abs( ab( ji, 2 )-ab( ji, 1 ) ) + tmp2 = max( abs( ab( ji, 2 ) ), abs( ab( ji, 1 ) ) ) + if( tmp1=nab( ji, 2 ) ) & + then + ! converged -- swap with position kfnew, + ! then increment kfnew + if( ji>kfnew ) then + tmp1 = ab( ji, 1 ) + tmp2 = ab( ji, 2 ) + itmp1 = nab( ji, 1 ) + itmp2 = nab( ji, 2 ) + ab( ji, 1 ) = ab( kfnew, 1 ) + ab( ji, 2 ) = ab( kfnew, 2 ) + nab( ji, 1 ) = nab( kfnew, 1 ) + nab( ji, 2 ) = nab( kfnew, 2 ) + ab( kfnew, 1 ) = tmp1 + ab( kfnew, 2 ) = tmp2 + nab( kfnew, 1 ) = itmp1 + nab( kfnew, 2 ) = itmp2 + if( ijob==3 ) then + itmp1 = nval( ji ) + nval( ji ) = nval( kfnew ) + nval( kfnew ) = itmp1 + end if + end if + kfnew = kfnew + 1 + end if + end do loop_110 + kf = kfnew + ! choose midpoints + do ji = kf, kl + c( ji ) = half*( ab( ji, 1 )+ab( ji, 2 ) ) + end do + ! if no more intervals to refine, quit. + if( kf>kl )go to 140 + end do loop_130 + ! converged + 140 continue + info = max( kl+1-kf, 0 ) + mout = kl + return + end subroutine stdlib_qlaebz + + !> DLAED0: computes all eigenvalues and corresponding eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + + pure subroutine stdlib_qlaed0( icompq, qsiz, n, d, e, q, ldq, qstore, ldqs,work, iwork, info & + ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldq, ldqs, n, qsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), e(*), q(ldq,*) + real(qp), intent(out) :: qstore(ldqs,*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: curlvl, curprb, curr, i, igivcl, igivnm, igivpt, indxq, iperm, iprmpt, & + iq, iqptr, iwrem, j, k, lgn, matsiz, msd2, smlsiz, smm1, spm1, spm2, submat, subpbs, & + tlvls + real(qp) :: temp + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>2 ) then + info = -1 + else if( ( icompq==1 ) .and. ( qsizsmlsiz ) then + do j = subpbs, 1, -1 + iwork( 2*j ) = ( iwork( j )+1 ) / 2 + iwork( 2*j-1 ) = iwork( j ) / 2 + end do + tlvls = tlvls + 1 + subpbs = 2*subpbs + go to 10 + end if + do j = 2, subpbs + iwork( j ) = iwork( j ) + iwork( j-1 ) + end do + ! divide the matrix into subpbs submatrices of size at most smlsiz+1 + ! using rank-1 modifications (cuts). + spm1 = subpbs - 1 + do i = 1, spm1 + submat = iwork( i ) + 1 + smm1 = submat - 1 + d( smm1 ) = d( smm1 ) - abs( e( smm1 ) ) + d( submat ) = d( submat ) - abs( e( smm1 ) ) + end do + indxq = 4*n + 3 + if( icompq/=2 ) then + ! set up workspaces for eigenvalues only/accumulate new vectors + ! routine + temp = log( real( n,KIND=qp) ) / log( two ) + lgn = int( temp,KIND=ilp) + if( 2**lgn 1 ) + curlvl = 1 + 80 continue + if( subpbs>1 ) then + spm2 = subpbs - 2 + loop_90: do i = 0, spm2, 2 + if( i==0 ) then + submat = 1 + matsiz = iwork( 2 ) + msd2 = iwork( 1 ) + curprb = 0 + else + submat = iwork( i ) + 1 + matsiz = iwork( i+2 ) - iwork( i ) + msd2 = matsiz / 2 + curprb = curprb + 1 + end if + ! merge lower order eigensystems (of size msd2 and matsiz - msd2) + ! into an eigensystem of size matsiz. + ! stdlib_qlaed1 is used only for the full eigensystem of a tridiagonal + ! matrix. + ! stdlib_qlaed7 handles the cases in which eigenvalues only or eigenvalues + ! and eigenvectors of a full symmetric matrix (which was reduced to + ! tridiagonal form) are desired. + if( icompq==2 ) then + call stdlib_qlaed1( matsiz, d( submat ), q( submat, submat ),ldq, iwork( & + indxq+submat ),e( submat+msd2-1 ), msd2, work,iwork( subpbs+1 ), info ) + + else + call stdlib_qlaed7( icompq, matsiz, qsiz, tlvls, curlvl, curprb,d( submat ), & + qstore( 1, submat ), ldqs,iwork( indxq+submat ), e( submat+msd2-1 ),msd2, & + work( iq ), iwork( iqptr ),iwork( iprmpt ), iwork( iperm ),iwork( igivpt ), & + iwork( igivcl ),work( igivnm ), work( iwrem ),iwork( subpbs+1 ), info ) + + end if + if( info/=0 )go to 130 + iwork( i / 2+1 ) = iwork( i+2 ) + end do loop_90 + subpbs = subpbs / 2 + curlvl = curlvl + 1 + go to 80 + end if + ! end while + ! re-merge the eigenvalues/vectors which were deflated at the final + ! merge step. + if( icompq==1 ) then + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + call stdlib_qcopy( qsiz, qstore( 1, j ), 1, q( 1, i ), 1 ) + end do + call stdlib_qcopy( n, work, 1, d, 1 ) + else if( icompq==2 ) then + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + call stdlib_qcopy( n, q( 1, j ), 1, work( n*i+1 ), 1 ) + end do + call stdlib_qcopy( n, work, 1, d, 1 ) + call stdlib_qlacpy( 'A', n, n, work( n+1 ), n, q, ldq ) + else + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + end do + call stdlib_qcopy( n, work, 1, d, 1 ) + end if + go to 140 + 130 continue + info = submat*( n+1 ) + submat + matsiz - 1 + 140 continue + return + end subroutine stdlib_qlaed0 + + !> DLAED1: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles + !> the case in which eigenvalues only or eigenvalues and eigenvectors + !> of a full symmetric matrix (which was reduced to tridiagonal form) + !> are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**T*u, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by DLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_qlaed1( n, d, q, ldq, indxq, rho, cutpnt, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, ldq, n + integer(ilp), intent(out) :: info + real(qp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: indxq(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), q(ldq,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: coltyp, i, idlmda, indx, indxc, indxp, iq2, is, iw, iz, k, n1, n2, & + zpp1 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( ldqcutpnt .or. ( n / 2 ) DLAED2: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny entry in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_qlaed2( k, n, n1, d, q, ldq, indxq, rho, z, dlamda, w,q2, indx, indxc,& + indxp, coltyp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, k + integer(ilp), intent(in) :: ldq, n, n1 + real(qp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: coltyp(*), indx(*), indxc(*), indxp(*) + integer(ilp), intent(inout) :: indxq(*) + real(qp), intent(inout) :: d(*), q(ldq,*), z(*) + real(qp), intent(out) :: dlamda(*), q2(*), w(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: mone = -1.0_qp + + ! Local Arrays + integer(ilp) :: ctot(4), psm(4) + ! Local Scalars + integer(ilp) :: ct, i, imax, iq1, iq2, j, jmax, js, k2, n1p1, n2, nj, pj + real(qp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -2 + else if( ldqn1 .or. ( n / 2 )n )go to 100 + if( rho*abs( z( nj ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + coltyp( nj ) = 4 + indxp( k2 ) = nj + else + ! check if eigenvalues are close enough to allow deflation. + s = z( pj ) + c = z( nj ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_qlapy2( c, s ) + t = d( nj ) - d( pj ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( nj ) = tau + z( pj ) = zero + if( coltyp( nj )/=coltyp( pj ) )coltyp( nj ) = 2 + coltyp( pj ) = 4 + call stdlib_qrot( n, q( 1, pj ), 1, q( 1, nj ), 1, c, s ) + t = d( pj )*c**2 + d( nj )*s**2 + d( nj ) = d( pj )*s**2 + d( nj )*c**2 + d( pj ) = t + k2 = k2 - 1 + i = 1 + 90 continue + if( k2+i<=n ) then + if( d( pj ) DLAED3: finds the roots of the secular equation, as defined by the + !> values in D, W, and RHO, between 1 and K. It makes the + !> appropriate calls to DLAED4 and then updates the eigenvectors by + !> multiplying the matrix of eigenvectors of the pair of eigensystems + !> being combined by the matrix of eigenvectors of the K-by-K system + !> which is solved here. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_qlaed3( k, n, n1, d, q, ldq, rho, dlamda, q2, indx,ctot, w, s, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldq, n, n1 + real(qp), intent(in) :: rho + ! Array Arguments + integer(ilp), intent(in) :: ctot(*), indx(*) + real(qp), intent(out) :: d(*), q(ldq,*), s(*) + real(qp), intent(inout) :: dlamda(*), w(*) + real(qp), intent(in) :: q2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, iq2, j, n12, n2, n23 + real(qp) :: temp + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( k<0 ) then + info = -1 + else if( n This subroutine computes the I-th updated eigenvalue of a symmetric + !> rank-one modification to a diagonal matrix whose elements are + !> given in the array d, and that + !> D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + + pure subroutine stdlib_qlaed4( n, i, d, z, delta, rho, dlam, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i, n + integer(ilp), intent(out) :: info + real(qp), intent(out) :: dlam + real(qp), intent(in) :: rho + ! Array Arguments + real(qp), intent(in) :: d(*), z(*) + real(qp), intent(out) :: delta(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + ! Local Scalars + logical(lk) :: orgati, swtch, swtch3 + integer(ilp) :: ii, iim1, iip1, ip1, iter, j, niter + real(qp) :: a, b, c, del, dltlb, dltub, dphi, dpsi, dw, eps, erretm, eta, midpt, phi, & + prew, psi, rhoinv, tau, temp, temp1, w + ! Local Arrays + real(qp) :: zz(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! since this routine is called in an inner loop, we do no argument + ! checking. + ! quick return for n=1 and 2. + info = 0 + if( n==1 ) then + ! presumably, i=1 upon entry + dlam = d( 1 ) + rho*z( 1 )*z( 1 ) + delta( 1 ) = one + return + end if + if( n==2 ) then + call stdlib_qlaed5( i, d, z, delta, rho, dlam ) + return + end if + ! compute machine epsilon + eps = stdlib_qlamch( 'EPSILON' ) + rhoinv = one / rho + ! the case i = n + if( i==n ) then + ! initialize some basic variables + ii = n - 1 + niter = 1 + ! calculate initial guess + midpt = rho / two + ! if ||z||_2 is not one, then temp should be set to + ! rho * ||z||_2^2 / two + do j = 1, n + delta( j ) = ( d( j )-d( i ) ) - midpt + end do + psi = zero + do j = 1, n - 2 + psi = psi + z( j )*z( j ) / delta( j ) + end do + c = rhoinv + psi + w = c + z( ii )*z( ii ) / delta( ii ) +z( n )*z( n ) / delta( n ) + if( w<=zero ) then + temp = z( n-1 )*z( n-1 ) / ( d( n )-d( n-1 )+rho ) +z( n )*z( n ) / rho + if( c<=temp ) then + tau = rho + else + del = d( n ) - d( n-1 ) + a = -c*del + z( n-1 )*z( n-1 ) + z( n )*z( n ) + b = z( n )*z( n )*del + if( a=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = tau + eta + if( temp>dltub .or. temp=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = tau + eta + if( temp>dltub .or. tempzero ) then + ! d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + ! we choose d(i) as origin. + orgati = .true. + a = c*del + z( i )*z( i ) + z( ip1 )*z( ip1 ) + b = z( i )*z( i )*del + if( a>zero ) then + tau = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + else + tau = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + end if + dltlb = zero + dltub = midpt + else + ! (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + ! we choose d(i+1) as origin. + orgati = .false. + a = c*del - z( i )*z( i ) - z( ip1 )*z( ip1 ) + b = z( ip1 )*z( ip1 )*del + if( azero )swtch3 = .true. + end if + if( ii==1 .or. ii==n )swtch3 = .false. + temp = z( ii ) / delta( ii ) + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = w + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv +three*abs( temp ) + abs( tau )& + *dw + ! test for convergence + if( abs( w )<=eps*erretm ) then + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + go to 250 + end if + if( w<=zero ) then + dltlb = max( dltlb, tau ) + else + dltub = min( dltub, tau ) + end if + ! calculate the new step + niter = niter + 1 + if( .not.swtch3 ) then + if( orgati ) then + c = w - delta( ip1 )*dw - ( d( i )-d( ip1 ) )*( z( i ) / delta( i ) )& + **2 + else + c = w - delta( i )*dw - ( d( ip1 )-d( i ) )*( z( ip1 ) / delta( ip1 ) )& + **2 + end if + a = ( delta( i )+delta( ip1 ) )*w -delta( i )*delta( ip1 )*dw + b = delta( i )*delta( ip1 )*w + if( c==zero ) then + if( a==zero ) then + if( orgati ) then + a = z( i )*z( i ) + delta( ip1 )*delta( ip1 )*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) + delta( i )*delta( i )*( dpsi+dphi ) + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + temp = rhoinv + psi + phi + if( orgati ) then + temp1 = z( iim1 ) / delta( iim1 ) + temp1 = temp1*temp1 + c = temp - delta( iip1 )*( dpsi+dphi ) -( d( iim1 )-d( iip1 ) )*temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + zz( 3 ) = delta( iip1 )*delta( iip1 )*( ( dpsi-temp1 )+dphi ) + else + temp1 = z( iip1 ) / delta( iip1 ) + temp1 = temp1*temp1 + c = temp - delta( iim1 )*( dpsi+dphi ) -( d( iip1 )-d( iim1 ) )*temp1 + zz( 1 ) = delta( iim1 )*delta( iim1 )*( dpsi+( dphi-temp1 ) ) + zz( 3 ) = z( iip1 )*z( iip1 ) + end if + zz( 2 ) = z( ii )*z( ii ) + call stdlib_qlaed6( niter, orgati, c, delta( iim1 ), zz, w, eta,info ) + if( info/=0 )go to 250 + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + temp = tau + eta + if( temp>dltub .or. tempabs( prew ) / ten )swtch = .true. + else + if( w>abs( prew ) / ten )swtch = .true. + end if + tau = tau + eta + ! main loop to update the values of the array delta + iter = niter + 1 + loop_240: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + go to 250 + end if + if( w<=zero ) then + dltlb = max( dltlb, tau ) + else + dltub = min( dltub, tau ) + end if + ! calculate the new step + if( .not.swtch3 ) then + if( .not.swtch ) then + if( orgati ) then + c = w - delta( ip1 )*dw -( d( i )-d( ip1 ) )*( z( i ) / delta( i ) )& + **2 + else + c = w - delta( i )*dw - ( d( ip1 )-d( i ) )*( z( ip1 ) / delta( ip1 ) )& + **2 + end if + else + temp = z( ii ) / delta( ii ) + if( orgati ) then + dpsi = dpsi + temp*temp + else + dphi = dphi + temp*temp + end if + c = w - delta( i )*dpsi - delta( ip1 )*dphi + end if + a = ( delta( i )+delta( ip1 ) )*w -delta( i )*delta( ip1 )*dw + b = delta( i )*delta( ip1 )*w + if( c==zero ) then + if( a==zero ) then + if( .not.swtch ) then + if( orgati ) then + a = z( i )*z( i ) + delta( ip1 )*delta( ip1 )*( dpsi+dphi ) + + else + a = z( ip1 )*z( ip1 ) +delta( i )*delta( i )*( dpsi+dphi ) + end if + else + a = delta( i )*delta( i )*dpsi +delta( ip1 )*delta( ip1 )& + *dphi + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + temp = rhoinv + psi + phi + if( swtch ) then + c = temp - delta( iim1 )*dpsi - delta( iip1 )*dphi + zz( 1 ) = delta( iim1 )*delta( iim1 )*dpsi + zz( 3 ) = delta( iip1 )*delta( iip1 )*dphi + else + if( orgati ) then + temp1 = z( iim1 ) / delta( iim1 ) + temp1 = temp1*temp1 + c = temp - delta( iip1 )*( dpsi+dphi ) -( d( iim1 )-d( iip1 ) )& + *temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + zz( 3 ) = delta( iip1 )*delta( iip1 )*( ( dpsi-temp1 )+dphi ) + else + temp1 = z( iip1 ) / delta( iip1 ) + temp1 = temp1*temp1 + c = temp - delta( iim1 )*( dpsi+dphi ) -( d( iip1 )-d( iim1 ) )& + *temp1 + zz( 1 ) = delta( iim1 )*delta( iim1 )*( dpsi+( dphi-temp1 ) ) + zz( 3 ) = z( iip1 )*z( iip1 ) + end if + end if + call stdlib_qlaed6( niter, orgati, c, delta( iim1 ), zz, w, eta,info ) + if( info/=0 )go to 250 + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + temp = tau + eta + if( temp>dltub .or. tempzero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch + end do loop_240 + ! return with info = 1, niter = maxit and not converged + info = 1 + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + end if + 250 continue + return + end subroutine stdlib_qlaed4 + + !> This subroutine computes the I-th eigenvalue of a symmetric rank-one + !> modification of a 2-by-2 diagonal matrix + !> diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal elements in the array D are assumed to satisfy + !> D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + + pure subroutine stdlib_qlaed5( i, d, z, delta, rho, dlam ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i + real(qp), intent(out) :: dlam + real(qp), intent(in) :: rho + ! Array Arguments + real(qp), intent(in) :: d(2), z(2) + real(qp), intent(out) :: delta(2) + ! ===================================================================== + + ! Local Scalars + real(qp) :: b, c, del, tau, temp, w + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + del = d( 2 ) - d( 1 ) + if( i==1 ) then + w = one + two*rho*( z( 2 )*z( 2 )-z( 1 )*z( 1 ) ) / del + if( w>zero ) then + b = del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 1 )*z( 1 )*del + ! b > zero, always + tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) + dlam = d( 1 ) + tau + delta( 1 ) = -z( 1 ) / tau + delta( 2 ) = z( 2 ) / ( del-tau ) + else + b = -del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*del + if( b>zero ) then + tau = -two*c / ( b+sqrt( b*b+four*c ) ) + else + tau = ( b-sqrt( b*b+four*c ) ) / two + end if + dlam = d( 2 ) + tau + delta( 1 ) = -z( 1 ) / ( del+tau ) + delta( 2 ) = -z( 2 ) / tau + end if + temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + delta( 1 ) = delta( 1 ) / temp + delta( 2 ) = delta( 2 ) / temp + else + ! now i=2 + b = -del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*del + if( b>zero ) then + tau = ( b+sqrt( b*b+four*c ) ) / two + else + tau = two*c / ( -b+sqrt( b*b+four*c ) ) + end if + dlam = d( 2 ) + tau + delta( 1 ) = -z( 1 ) / ( del+tau ) + delta( 2 ) = -z( 2 ) / tau + temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + delta( 1 ) = delta( 1 ) / temp + delta( 2 ) = delta( 2 ) / temp + end if + return + end subroutine stdlib_qlaed5 + + !> DLAED6: computes the positive or negative root (closest to the origin) + !> of + !> z(1) z(2) z(3) + !> f(x) = rho + --------- + ---------- + --------- + !> d(1)-x d(2)-x d(3)-x + !> It is assumed that + !> if ORGATI = .true. the root is between d(2) and d(3); + !> otherwise it is between d(1) and d(2) + !> This routine will be called by DLAED4 when necessary. In most cases, + !> the root sought is the smallest in magnitude, though it might not be + !> in some extremely rare situations. + + pure subroutine stdlib_qlaed6( kniter, orgati, rho, d, z, finit, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: orgati + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kniter + real(qp), intent(in) :: finit, rho + real(qp), intent(out) :: tau + ! Array Arguments + real(qp), intent(in) :: d(3), z(3) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + + + ! Local Arrays + real(qp) :: dscale(3), zscale(3) + ! Local Scalars + logical(lk) :: scale + integer(ilp) :: i, iter, niter + real(qp) :: a, b, base, c, ddf, df, eps, erretm, eta, f, fc, sclfac, sclinv, small1, & + small2, sminv1, sminv2, temp, temp1, temp2, temp3, temp4, lbd, ubd + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + info = 0 + if( orgati ) then + lbd = d(2) + ubd = d(3) + else + lbd = d(1) + ubd = d(2) + end if + if( finit < zero )then + lbd = zero + else + ubd = zero + end if + niter = 1 + tau = zero + if( kniter==2 ) then + if( orgati ) then + temp = ( d( 3 )-d( 2 ) ) / two + c = rho + z( 1 ) / ( ( d( 1 )-d( 2 ) )-temp ) + a = c*( d( 2 )+d( 3 ) ) + z( 2 ) + z( 3 ) + b = c*d( 2 )*d( 3 ) + z( 2 )*d( 3 ) + z( 3 )*d( 2 ) + else + temp = ( d( 1 )-d( 2 ) ) / two + c = rho + z( 3 ) / ( ( d( 3 )-d( 2 ) )-temp ) + a = c*( d( 1 )+d( 2 ) ) + z( 1 ) + z( 2 ) + b = c*d( 1 )*d( 2 ) + z( 1 )*d( 2 ) + z( 2 )*d( 1 ) + end if + temp = max( abs( a ), abs( b ), abs( c ) ) + a = a / temp + b = b / temp + c = c / temp + if( c==zero ) then + tau = b / a + else if( a<=zero ) then + tau = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + tau = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + if( tau < lbd .or. tau > ubd )tau = ( lbd+ubd )/two + if( d(1)==tau .or. d(2)==tau .or. d(3)==tau ) then + tau = zero + else + temp = finit + tau*z(1)/( d(1)*( d( 1 )-tau ) ) +tau*z(2)/( d(2)*( d( 2 )-tau ) )& + +tau*z(3)/( d(3)*( d( 3 )-tau ) ) + if( temp <= zero )then + lbd = tau + else + ubd = tau + end if + if( abs( finit )<=abs( temp ) )tau = zero + end if + end if + ! get machine parameters for possible scaling to avoid overflow + ! modified by sven: parameters small1, sminv1, small2, + ! sminv2, eps are not saved anymore between one call to the + ! others but recomputed at each call + eps = stdlib_qlamch( 'EPSILON' ) + base = stdlib_qlamch( 'BASE' ) + small1 = base**( int( log( stdlib_qlamch( 'SAFMIN' ) ) / log( base ) /three,KIND=ilp) ) + + sminv1 = one / small1 + small2 = small1*small1 + sminv2 = sminv1*sminv1 + ! determine if scaling of inputs necessary to avoid overflow + ! when computing 1/temp**3 + if( orgati ) then + temp = min( abs( d( 2 )-tau ), abs( d( 3 )-tau ) ) + else + temp = min( abs( d( 1 )-tau ), abs( d( 2 )-tau ) ) + end if + scale = .false. + if( temp<=small1 ) then + scale = .true. + if( temp<=small2 ) then + ! scale up by power of radix nearest 1/safmin**(2/3) + sclfac = sminv2 + sclinv = small2 + else + ! scale up by power of radix nearest 1/safmin**(1/3) + sclfac = sminv1 + sclinv = small1 + end if + ! scaling up safe because d, z, tau scaled elsewhere to be o(1) + do i = 1, 3 + dscale( i ) = d( i )*sclfac + zscale( i ) = z( i )*sclfac + end do + tau = tau*sclfac + lbd = lbd*sclfac + ubd = ubd*sclfac + else + ! copy d and z to dscale and zscale + do i = 1, 3 + dscale( i ) = d( i ) + zscale( i ) = z( i ) + end do + end if + fc = zero + df = zero + ddf = zero + do i = 1, 3 + temp = one / ( dscale( i )-tau ) + temp1 = zscale( i )*temp + temp2 = temp1*temp + temp3 = temp2*temp + fc = fc + temp1 / dscale( i ) + df = df + temp2 + ddf = ddf + temp3 + end do + f = finit + tau*fc + if( abs( f )<=zero )go to 60 + if( f <= zero )then + lbd = tau + else + ubd = tau + end if + ! iteration begins -- use gragg-thornton-warner cubic convergent + ! scheme + ! it is not hard to see that + ! 1) iterations will go up monotonically + ! if finit < 0; + ! 2) iterations will go down monotonically + ! if finit > 0. + iter = niter + 1 + loop_50: do niter = iter, maxit + if( orgati ) then + temp1 = dscale( 2 ) - tau + temp2 = dscale( 3 ) - tau + else + temp1 = dscale( 1 ) - tau + temp2 = dscale( 2 ) - tau + end if + a = ( temp1+temp2 )*f - temp1*temp2*df + b = temp1*temp2*f + c = f - ( temp1+temp2 )*df + temp1*temp2*ddf + temp = max( abs( a ), abs( b ), abs( c ) ) + a = a / temp + b = b / temp + c = c / temp + if( c==zero ) then + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + if( f*eta>=zero ) then + eta = -f / df + end if + tau = tau + eta + if( tau < lbd .or. tau > ubd )tau = ( lbd + ubd )/two + fc = zero + erretm = zero + df = zero + ddf = zero + do i = 1, 3 + if ( ( dscale( i )-tau )/=zero ) then + temp = one / ( dscale( i )-tau ) + temp1 = zscale( i )*temp + temp2 = temp1*temp + temp3 = temp2*temp + temp4 = temp1 / dscale( i ) + fc = fc + temp4 + erretm = erretm + abs( temp4 ) + df = df + temp2 + ddf = ddf + temp3 + else + go to 60 + end if + end do + f = finit + tau*fc + erretm = eight*( abs( finit )+abs( tau )*erretm ) +abs( tau )*df + if( ( abs( f )<=four*eps*erretm ) .or.( (ubd-lbd)<=four*eps*abs(tau) ) )go to & + 60 + if( f <= zero )then + lbd = tau + else + ubd = tau + end if + end do loop_50 + info = 1 + 60 continue + ! undo scaling + if( scale )tau = tau*sclinv + return + end subroutine stdlib_qlaed6 + + !> DLAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense symmetric matrix + !> that has been reduced to tridiagonal form. DLAED1 handles + !> the case in which all eigenvalues and eigenvectors of a symmetric + !> tridiagonal matrix are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**Tu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED8. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by DLAED9). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_qlaed7( icompq, n, qsiz, tlvls, curlvl, curpbm, d, q,ldq, indxq, rho, & + cutpnt, qstore, qptr, prmptr,perm, givptr, givcol, givnum, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, cutpnt, icompq, ldq, n, qsiz, tlvls + integer(ilp), intent(out) :: info + real(qp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + + integer(ilp), intent(out) :: indxq(*), iwork(*) + real(qp), intent(inout) :: d(*), givnum(2,*), q(ldq,*), qstore(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: coltyp, curr, i, idlmda, indx, indxc, indxp, iq2, is, iw, iz, k, ldq2, & + n1, n2, ptr + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>1 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( icompq==1 .and. qsizcutpnt .or. n DLAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_qlaed8( icompq, k, n, qsiz, d, q, ldq, indxq, rho,cutpnt, z, dlamda, & + q2, ldq2, w, perm, givptr,givcol, givnum, indxp, indx, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, icompq, ldq, ldq2, n, qsiz + integer(ilp), intent(out) :: givptr, info, k + real(qp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: givcol(2,*), indx(*), indxp(*), perm(*) + integer(ilp), intent(inout) :: indxq(*) + real(qp), intent(inout) :: d(*), q(ldq,*), z(*) + real(qp), intent(out) :: dlamda(*), givnum(2,*), q2(ldq2,*), w(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: mone = -1.0_qp + + ! Local Scalars + integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2 + real(qp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>1 ) then + info = -1 + else if( n<0 ) then + info = -3 + else if( icompq==1 .and. qsizn ) then + info = -10 + else if( ldq2n )go to 100 + if( rho*abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + indxp( k2 ) = j + else + ! check if eigenvalues are close enough to allow deflation. + s = z( jlam ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_qlapy2( c, s ) + t = d( j ) - d( jlam ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( j ) = tau + z( jlam ) = zero + ! record the appropriate givens rotation + givptr = givptr + 1 + givcol( 1, givptr ) = indxq( indx( jlam ) ) + givcol( 2, givptr ) = indxq( indx( j ) ) + givnum( 1, givptr ) = c + givnum( 2, givptr ) = s + if( icompq==1 ) then + call stdlib_qrot( qsiz, q( 1, indxq( indx( jlam ) ) ), 1,q( 1, indxq( indx( j & + ) ) ), 1, c, s ) + end if + t = d( jlam )*c*c + d( j )*s*s + d( j ) = d( jlam )*s*s + d( j )*c*c + d( jlam ) = t + k2 = k2 - 1 + i = 1 + 90 continue + if( k2+i<=n ) then + if( d( jlam ) DLAED9: finds the roots of the secular equation, as defined by the + !> values in D, Z, and RHO, between KSTART and KSTOP. It makes the + !> appropriate calls to DLAED4 and then stores the new matrix of + !> eigenvectors for use in calculating the next level of Z vectors. + + pure subroutine stdlib_qlaed9( k, kstart, kstop, n, d, q, ldq, rho, dlamda, w,s, lds, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, kstart, kstop, ldq, lds, n + real(qp), intent(in) :: rho + ! Array Arguments + real(qp), intent(out) :: d(*), q(ldq,*), s(lds,*) + real(qp), intent(inout) :: dlamda(*), w(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: temp + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( k<0 ) then + info = -1 + else if( kstart<1 .or. kstart>max( 1, k ) ) then + info = -2 + else if( max( 1, kstop )max( 1, k ) )then + info = -3 + else if( n DLAEDA: computes the Z vector corresponding to the merge step in the + !> CURLVLth step of the merge process with TLVLS steps for the CURPBMth + !> problem. + + pure subroutine stdlib_qlaeda( n, tlvls, curlvl, curpbm, prmptr, perm, givptr,givcol, givnum,& + q, qptr, z, ztemp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, n, tlvls + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + real(qp), intent(in) :: givnum(2,*), q(*) + real(qp), intent(out) :: z(*), ztemp(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: bsiz1, bsiz2, curr, i, k, mid, psiz1, psiz2, ptr, zptr1 + ! Intrinsic Functions + intrinsic :: real,int,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAEDA', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine location of first number in second half. + mid = n / 2 + 1 + ! gather last/first rows of appropriate eigenblocks into center of z + ptr = 1 + ! determine location of lowest level subproblem in the full storage + ! scheme + curr = ptr + curpbm*2**curlvl + 2**( curlvl-1 ) - 1 + ! determine size of these matrices. we add half to the value of + ! the sqrt in case the machine underestimates one of these square + ! roots. + bsiz1 = int( half+sqrt( real( qptr( curr+1 )-qptr( curr ),KIND=qp) ),KIND=ilp) + bsiz2 = int( half+sqrt( real( qptr( curr+2 )-qptr( curr+1 ),KIND=qp) ),KIND=ilp) + + do k = 1, mid - bsiz1 - 1 + z( k ) = zero + end do + call stdlib_qcopy( bsiz1, q( qptr( curr )+bsiz1-1 ), bsiz1,z( mid-bsiz1 ), 1 ) + call stdlib_qcopy( bsiz2, q( qptr( curr+1 ) ), bsiz2, z( mid ), 1 ) + do k = mid + bsiz2, n + z( k ) = zero + end do + ! loop through remaining levels 1 -> curlvl applying the givens + ! rotations and permutation and then multiplying the center matrices + ! against the current z. + ptr = 2**tlvls + 1 + loop_70: do k = 1, curlvl - 1 + curr = ptr + curpbm*2**( curlvl-k ) + 2**( curlvl-k-1 ) - 1 + psiz1 = prmptr( curr+1 ) - prmptr( curr ) + psiz2 = prmptr( curr+2 ) - prmptr( curr+1 ) + zptr1 = mid - psiz1 + ! apply givens at curr and curr+1 + do i = givptr( curr ), givptr( curr+1 ) - 1 + call stdlib_qrot( 1, z( zptr1+givcol( 1, i )-1 ), 1,z( zptr1+givcol( 2, i )-1 ), & + 1, givnum( 1, i ),givnum( 2, i ) ) + end do + do i = givptr( curr+1 ), givptr( curr+2 ) - 1 + call stdlib_qrot( 1, z( mid-1+givcol( 1, i ) ), 1,z( mid-1+givcol( 2, i ) ), 1, & + givnum( 1, i ),givnum( 2, i ) ) + end do + psiz1 = prmptr( curr+1 ) - prmptr( curr ) + psiz2 = prmptr( curr+2 ) - prmptr( curr+1 ) + do i = 0, psiz1 - 1 + ztemp( i+1 ) = z( zptr1+perm( prmptr( curr )+i )-1 ) + end do + do i = 0, psiz2 - 1 + ztemp( psiz1+i+1 ) = z( mid+perm( prmptr( curr+1 )+i )-1 ) + end do + ! multiply blocks at curr and curr+1 + ! determine size of these matrices. we add half to the value of + ! the sqrt in case the machine underestimates one of these + ! square roots. + bsiz1 = int( half+sqrt( real( qptr( curr+1 )-qptr( curr ),KIND=qp) ),KIND=ilp) + + bsiz2 = int( half+sqrt( real( qptr( curr+2 )-qptr( curr+1 ),KIND=qp) ),KIND=ilp) + + if( bsiz1>0 ) then + call stdlib_qgemv( 'T', bsiz1, bsiz1, one, q( qptr( curr ) ),bsiz1, ztemp( 1 ), & + 1, zero, z( zptr1 ), 1 ) + end if + call stdlib_qcopy( psiz1-bsiz1, ztemp( bsiz1+1 ), 1, z( zptr1+bsiz1 ),1 ) + if( bsiz2>0 ) then + call stdlib_qgemv( 'T', bsiz2, bsiz2, one, q( qptr( curr+1 ) ),bsiz2, ztemp( & + psiz1+1 ), 1, zero, z( mid ), 1 ) + end if + call stdlib_qcopy( psiz2-bsiz2, ztemp( psiz1+bsiz2+1 ), 1,z( mid+bsiz2 ), 1 ) + + ptr = ptr + 2**( tlvls-k ) + end do loop_70 + return + end subroutine stdlib_qlaeda + + !> DLAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg + !> matrix H. + + pure subroutine stdlib_qlaein( rightv, noinit, n, h, ldh, wr, wi, vr, vi, b,ldb, work, eps3, & + smlnum, bignum, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: noinit, rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldh, n + real(qp), intent(in) :: bignum, eps3, smlnum, wi, wr + ! Array Arguments + real(qp), intent(out) :: b(ldb,*), work(*) + real(qp), intent(in) :: h(ldh,*) + real(qp), intent(inout) :: vi(*), vr(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: tenth = 1.0e-1_qp + + ! Local Scalars + character :: normin, trans + integer(ilp) :: i, i1, i2, i3, ierr, its, j + real(qp) :: absbii, absbjj, ei, ej, growto, norm, nrmsml, rec, rootn, scale, temp, & + vcrit, vmax, vnorm, w, w1, x, xi, xr, y + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Executable Statements + info = 0 + ! growto is the threshold used in the acceptance test for an + ! eigenvector. + rootn = sqrt( real( n,KIND=qp) ) + growto = tenth / rootn + nrmsml = max( one, eps3*rootn )*smlnum + ! form b = h - (wr,wi)*i (except that the subdiagonal elements and + ! the imaginary parts of the diagonal elements are not stored). + do j = 1, n + do i = 1, j - 1 + b( i, j ) = h( i, j ) + end do + b( j, j ) = h( j, j ) - wr + end do + if( wi==zero ) then + ! real eigenvalue. + if( noinit ) then + ! set initial vector. + do i = 1, n + vr( i ) = eps3 + end do + else + ! scale supplied initial vector. + vnorm = stdlib_qnrm2( n, vr, 1 ) + call stdlib_qscal( n, ( eps3*rootn ) / max( vnorm, nrmsml ), vr,1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing zero + ! pivots by eps3. + do i = 1, n - 1 + ei = h( i+1, i ) + if( abs( b( i, i ) )=growto*scale )go to 120 + ! choose new orthogonal starting vector and try again. + temp = eps3 / ( rootn+one ) + vr( 1 ) = eps3 + do i = 2, n + vr( i ) = temp + end do + vr( n-its+1 ) = vr( n-its+1 ) - eps3*rootn + end do + ! failure to find eigenvector in n iterations. + info = 1 + 120 continue + ! normalize eigenvector. + i = stdlib_iqamax( n, vr, 1 ) + call stdlib_qscal( n, one / abs( vr( i ) ), vr, 1 ) + else + ! complex eigenvalue. + if( noinit ) then + ! set initial vector. + do i = 1, n + vr( i ) = eps3 + vi( i ) = zero + end do + else + ! scale supplied initial vector. + norm = stdlib_qlapy2( stdlib_qnrm2( n, vr, 1 ), stdlib_qnrm2( n, vi, 1 ) ) + + rec = ( eps3*rootn ) / max( norm, nrmsml ) + call stdlib_qscal( n, rec, vr, 1 ) + call stdlib_qscal( n, rec, vi, 1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing zero + ! pivots by eps3. + ! the imaginary part of the (i,j)-th element of u is stored in + ! b(j+1,i). + b( 2, 1 ) = -wi + do i = 2, n + b( i+1, 1 ) = zero + end do + loop_170: do i = 1, n - 1 + absbii = stdlib_qlapy2( b( i, i ), b( i+1, i ) ) + ei = h( i+1, i ) + if( absbiivcrit ) then + rec = one / vmax + call stdlib_qscal( n, rec, vr, 1 ) + call stdlib_qscal( n, rec, vi, 1 ) + scale = scale*rec + vmax = one + vcrit = bignum + end if + xr = vr( i ) + xi = vi( i ) + if( rightv ) then + do j = i + 1, n + xr = xr - b( i, j )*vr( j ) + b( j+1, i )*vi( j ) + xi = xi - b( i, j )*vi( j ) - b( j+1, i )*vr( j ) + end do + else + do j = 1, i - 1 + xr = xr - b( j, i )*vr( j ) + b( i+1, j )*vi( j ) + xi = xi - b( j, i )*vi( j ) - b( i+1, j )*vr( j ) + end do + end if + w = abs( b( i, i ) ) + abs( b( i+1, i ) ) + if( w>smlnum ) then + if( ww*bignum ) then + rec = one / w1 + call stdlib_qscal( n, rec, vr, 1 ) + call stdlib_qscal( n, rec, vi, 1 ) + xr = vr( i ) + xi = vi( i ) + scale = scale*rec + vmax = vmax*rec + end if + end if + ! divide by diagonal element of b. + call stdlib_qladiv( xr, xi, b( i, i ), b( i+1, i ), vr( i ),vi( i ) ) + + vmax = max( abs( vr( i ) )+abs( vi( i ) ), vmax ) + vcrit = bignum / vmax + else + do j = 1, n + vr( j ) = zero + vi( j ) = zero + end do + vr( i ) = one + vi( i ) = one + scale = zero + vmax = one + vcrit = bignum + end if + end do loop_250 + ! test for sufficient growth in the norm of (vr,vi). + vnorm = stdlib_qasum( n, vr, 1 ) + stdlib_qasum( n, vi, 1 ) + if( vnorm>=growto*scale )go to 280 + ! choose a new orthogonal starting vector and try again. + y = eps3 / ( rootn+one ) + vr( 1 ) = eps3 + vi( 1 ) = zero + do i = 2, n + vr( i ) = y + vi( i ) = zero + end do + vr( n-its+1 ) = vr( n-its+1 ) - eps3*rootn + end do loop_270 + ! failure to find eigenvector in n iterations + info = 1 + 280 continue + ! normalize eigenvector. + vnorm = zero + do i = 1, n + vnorm = max( vnorm, abs( vr( i ) )+abs( vi( i ) ) ) + end do + call stdlib_qscal( n, one / vnorm, vr, 1 ) + call stdlib_qscal( n, one / vnorm, vi, 1 ) + end if + return + end subroutine stdlib_qlaein + + !> DLAEV2: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> [ A B ] + !> [ B C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + !> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + !> eigenvector for RT1, giving the decomposition + !> [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + !> [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + pure subroutine stdlib_qlaev2( a, b, c, rt1, rt2, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: a, b, c + real(qp), intent(out) :: cs1, rt1, rt2, sn1 + ! ===================================================================== + + + + + ! Local Scalars + integer(ilp) :: sgn1, sgn2 + real(qp) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ! compute the eigenvalues + sm = a + c + df = a - c + adf = abs( df ) + tb = b + b + ab = abs( tb ) + if( abs( a )>abs( c ) ) then + acmx = a + acmn = c + else + acmx = c + acmn = a + end if + if( adf>ab ) then + rt = adf*sqrt( one+( ab / adf )**2 ) + else if( adfzero ) then + rt1 = half*( sm+rt ) + sgn1 = 1 + ! order of execution important. + ! to get fully accurate smaller eigenvalue, + ! next line needs to be executed in higher precision. + rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b + else + ! includes case rt1 = rt2 = 0 + rt1 = half*rt + rt2 = -half*rt + sgn1 = 1 + end if + ! compute the eigenvector + if( df>=zero ) then + cs = df + rt + sgn2 = 1 + else + cs = df - rt + sgn2 = -1 + end if + acs = abs( cs ) + if( acs>ab ) then + ct = -tb / cs + sn1 = one / sqrt( one+ct*ct ) + cs1 = ct*sn1 + else + if( ab==zero ) then + cs1 = one + sn1 = zero + else + tn = -cs / tb + cs1 = one / sqrt( one+tn*tn ) + sn1 = tn*cs1 + end if + end if + if( sgn1==sgn2 ) then + tn = cs1 + cs1 = -sn1 + sn1 = tn + end if + return + end subroutine stdlib_qlaev2 + + !> DLAEXC: swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in + !> an upper quasi-triangular matrix T by an orthogonal similarity + !> transformation. + !> T must be in Schur canonical form, that is, block upper triangular + !> with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block + !> has its diagonal elements equal and its off-diagonal elements of + !> opposite sign. + + subroutine stdlib_qlaexc( wantq, n, t, ldt, q, ldq, j1, n1, n2, work,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, ldq, ldt, n, n1, n2 + ! Array Arguments + real(qp), intent(inout) :: q(ldq,*), t(ldt,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ldd = 4 + integer(ilp), parameter :: ldx = 2 + + + + ! Local Scalars + integer(ilp) :: ierr, j2, j3, j4, k, nd + real(qp) :: cs, dnorm, eps, scale, smlnum, sn, t11, t22, t33, tau, tau1, tau2, temp, & + thresh, wi1, wi2, wr1, wr2, xnorm + ! Local Arrays + real(qp) :: d(ldd,4), u(3), u1(3), u2(3), x(ldx,2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 .or. n1==0 .or. n2==0 )return + if( j1+n1>n )return + j2 = j1 + 1 + j3 = j1 + 2 + j4 = j1 + 3 + if( n1==1 .and. n2==1 ) then + ! swap two 1-by-1 blocks. + t11 = t( j1, j1 ) + t22 = t( j2, j2 ) + ! determine the transformation to perform the interchange. + call stdlib_qlartg( t( j1, j2 ), t22-t11, cs, sn, temp ) + ! apply transformation to the matrix t. + if( j3<=n )call stdlib_qrot( n-j1-1, t( j1, j3 ), ldt, t( j2, j3 ), ldt, cs,sn ) + + call stdlib_qrot( j1-1, t( 1, j1 ), 1, t( 1, j2 ), 1, cs, sn ) + t( j1, j1 ) = t22 + t( j2, j2 ) = t11 + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_qrot( n, q( 1, j1 ), 1, q( 1, j2 ), 1, cs, sn ) + end if + else + ! swapping involves at least one 2-by-2 block. + ! copy the diagonal block of order n1+n2 to the local array d + ! and compute its norm. + nd = n1 + n2 + call stdlib_qlacpy( 'FULL', nd, nd, t( j1, j1 ), ldt, d, ldd ) + dnorm = stdlib_qlange( 'MAX', nd, nd, d, ldd, work ) + ! compute machine-dependent threshold for test for accepting + ! swap. + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + thresh = max( ten*eps*dnorm, smlnum ) + ! solve t11*x - x*t22 = scale*t12 for x. + call stdlib_qlasy2( .false., .false., -1, n1, n2, d, ldd,d( n1+1, n1+1 ), ldd, d( 1,& + n1+1 ), ldd, scale, x,ldx, xnorm, ierr ) + ! swap the adjacent diagonal blocks. + k = n1 + n1 + n2 - 3 + go to ( 10, 20, 30 )k + 10 continue + ! n1 = 1, n2 = 2: generate elementary reflector h so that: + ! ( scale, x11, x12 ) h = ( 0, 0, * ) + u( 1 ) = scale + u( 2 ) = x( 1, 1 ) + u( 3 ) = x( 1, 2 ) + call stdlib_qlarfg( 3, u( 3 ), u, 1, tau ) + u( 3 ) = one + t11 = t( j1, j1 ) + ! perform swap provisionally on diagonal block in d. + call stdlib_qlarfx( 'L', 3, 3, u, tau, d, ldd, work ) + call stdlib_qlarfx( 'R', 3, 3, u, tau, d, ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 3, 1 ) ), abs( d( 3, 2 ) ), abs( d( 3,3 )-t11 ) )>thresh )go to & + 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_qlarfx( 'L', 3, n-j1+1, u, tau, t( j1, j1 ), ldt, work ) + call stdlib_qlarfx( 'R', j2, 3, u, tau, t( 1, j1 ), ldt, work ) + t( j3, j1 ) = zero + t( j3, j2 ) = zero + t( j3, j3 ) = t11 + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_qlarfx( 'R', n, 3, u, tau, q( 1, j1 ), ldq, work ) + end if + go to 40 + 20 continue + ! n1 = 2, n2 = 1: generate elementary reflector h so that: + ! h ( -x11 ) = ( * ) + ! ( -x21 ) = ( 0 ) + ! ( scale ) = ( 0 ) + u( 1 ) = -x( 1, 1 ) + u( 2 ) = -x( 2, 1 ) + u( 3 ) = scale + call stdlib_qlarfg( 3, u( 1 ), u( 2 ), 1, tau ) + u( 1 ) = one + t33 = t( j3, j3 ) + ! perform swap provisionally on diagonal block in d. + call stdlib_qlarfx( 'L', 3, 3, u, tau, d, ldd, work ) + call stdlib_qlarfx( 'R', 3, 3, u, tau, d, ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 2, 1 ) ), abs( d( 3, 1 ) ), abs( d( 1,1 )-t33 ) )>thresh )go to & + 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_qlarfx( 'R', j3, 3, u, tau, t( 1, j1 ), ldt, work ) + call stdlib_qlarfx( 'L', 3, n-j1, u, tau, t( j1, j2 ), ldt, work ) + t( j1, j1 ) = t33 + t( j2, j1 ) = zero + t( j3, j1 ) = zero + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_qlarfx( 'R', n, 3, u, tau, q( 1, j1 ), ldq, work ) + end if + go to 40 + 30 continue + ! n1 = 2, n2 = 2: generate elementary reflectors h(1) and h(2) so + ! that: + ! h(2) h(1) ( -x11 -x12 ) = ( * * ) + ! ( -x21 -x22 ) ( 0 * ) + ! ( scale 0 ) ( 0 0 ) + ! ( 0 scale ) ( 0 0 ) + u1( 1 ) = -x( 1, 1 ) + u1( 2 ) = -x( 2, 1 ) + u1( 3 ) = scale + call stdlib_qlarfg( 3, u1( 1 ), u1( 2 ), 1, tau1 ) + u1( 1 ) = one + temp = -tau1*( x( 1, 2 )+u1( 2 )*x( 2, 2 ) ) + u2( 1 ) = -temp*u1( 2 ) - x( 2, 2 ) + u2( 2 ) = -temp*u1( 3 ) + u2( 3 ) = scale + call stdlib_qlarfg( 3, u2( 1 ), u2( 2 ), 1, tau2 ) + u2( 1 ) = one + ! perform swap provisionally on diagonal block in d. + call stdlib_qlarfx( 'L', 3, 4, u1, tau1, d, ldd, work ) + call stdlib_qlarfx( 'R', 4, 3, u1, tau1, d, ldd, work ) + call stdlib_qlarfx( 'L', 3, 4, u2, tau2, d( 2, 1 ), ldd, work ) + call stdlib_qlarfx( 'R', 4, 3, u2, tau2, d( 1, 2 ), ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 3, 1 ) ), abs( d( 3, 2 ) ), abs( d( 4, 1 ) ),abs( d( 4, 2 ) ) )& + >thresh )go to 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_qlarfx( 'L', 3, n-j1+1, u1, tau1, t( j1, j1 ), ldt, work ) + call stdlib_qlarfx( 'R', j4, 3, u1, tau1, t( 1, j1 ), ldt, work ) + call stdlib_qlarfx( 'L', 3, n-j1+1, u2, tau2, t( j2, j1 ), ldt, work ) + call stdlib_qlarfx( 'R', j4, 3, u2, tau2, t( 1, j2 ), ldt, work ) + t( j3, j1 ) = zero + t( j3, j2 ) = zero + t( j4, j1 ) = zero + t( j4, j2 ) = zero + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_qlarfx( 'R', n, 3, u1, tau1, q( 1, j1 ), ldq, work ) + call stdlib_qlarfx( 'R', n, 3, u2, tau2, q( 1, j2 ), ldq, work ) + end if + 40 continue + if( n2==2 ) then + ! standardize new 2-by-2 block t11 + call stdlib_qlanv2( t( j1, j1 ), t( j1, j2 ), t( j2, j1 ),t( j2, j2 ), wr1, wi1, & + wr2, wi2, cs, sn ) + call stdlib_qrot( n-j1-1, t( j1, j1+2 ), ldt, t( j2, j1+2 ), ldt,cs, sn ) + call stdlib_qrot( j1-1, t( 1, j1 ), 1, t( 1, j2 ), 1, cs, sn ) + if( wantq )call stdlib_qrot( n, q( 1, j1 ), 1, q( 1, j2 ), 1, cs, sn ) + end if + if( n1==2 ) then + ! standardize new 2-by-2 block t22 + j3 = j1 + n2 + j4 = j3 + 1 + call stdlib_qlanv2( t( j3, j3 ), t( j3, j4 ), t( j4, j3 ),t( j4, j4 ), wr1, wi1, & + wr2, wi2, cs, sn ) + if( j3+2<=n )call stdlib_qrot( n-j3-1, t( j3, j3+2 ), ldt, t( j4, j3+2 ),ldt, cs,& + sn ) + call stdlib_qrot( j3-1, t( 1, j3 ), 1, t( 1, j4 ), 1, cs, sn ) + if( wantq )call stdlib_qrot( n, q( 1, j3 ), 1, q( 1, j4 ), 1, cs, sn ) + end if + end if + return + ! exit with info = 1 if swap was rejected. + 50 continue + info = 1 + return + end subroutine stdlib_qlaexc + + !> DLAG2: computes the eigenvalues of a 2 x 2 generalized eigenvalue + !> problem A - w B, with scaling as necessary to avoid over-/underflow. + !> The scaling factor "s" results in a modified eigenvalue equation + !> s A - w B + !> where s is a non-negative scaling factor chosen so that w, w B, + !> and s A do not overflow and, if possible, do not underflow, either. + + pure subroutine stdlib_qlag2( a, lda, b, ldb, safmin, scale1, scale2, wr1,wr2, wi ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb + real(qp), intent(in) :: safmin + real(qp), intent(out) :: scale1, scale2, wi, wr1, wr2 + ! Array Arguments + real(qp), intent(in) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: fuzzy1 = one+1.0e-5_qp + + + + ! Local Scalars + real(qp) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, & + binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r,& + rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, & + wsmall + ! Intrinsic Functions + intrinsic :: abs,max,min,sign,sqrt + ! Executable Statements + rtmin = sqrt( safmin ) + rtmax = one / rtmin + safmax = one / safmin + ! scale a + anorm = max( abs( a( 1, 1 ) )+abs( a( 2, 1 ) ),abs( a( 1, 2 ) )+abs( a( 2, 2 ) ), & + safmin ) + ascale = one / anorm + a11 = ascale*a( 1, 1 ) + a21 = ascale*a( 2, 1 ) + a12 = ascale*a( 1, 2 ) + a22 = ascale*a( 2, 2 ) + ! perturb b if necessary to insure non-singularity + b11 = b( 1, 1 ) + b12 = b( 1, 2 ) + b22 = b( 2, 2 ) + bmin = rtmin*max( abs( b11 ), abs( b12 ), abs( b22 ), rtmin ) + if( abs( b11 )=one ) then + discr = ( rtmin*pp )**2 + qq*safmin + r = sqrt( abs( discr ) )*rtmax + else + if( pp**2+abs( qq )<=safmin ) then + discr = ( rtmax*pp )**2 + qq*safmax + r = sqrt( abs( discr ) )*rtmin + else + discr = pp**2 + qq + r = sqrt( abs( discr ) ) + end if + end if + ! note: the test of r in the following if is to cover the case when + ! discr is small and negative and is flushed to zero during + ! the calculation of r. on machines which have a consistent + ! flush-to-zero threshold and handle numbers above that + ! threshold correctly, it would not be necessary. + if( discr>=zero .or. r==zero ) then + sum = pp + sign( r, pp ) + diff = pp - sign( r, pp ) + wbig = shift + sum + ! compute smaller eigenvalue + wsmall = shift + diff + if( half*abs( wbig )>max( abs( wsmall ), safmin ) ) then + wdet = ( a11*a22-a12*a21 )*( binv11*binv22 ) + wsmall = wdet / wbig + end if + ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1) + ! for wr1. + if( pp>abi22 ) then + wr1 = min( wbig, wsmall ) + wr2 = max( wbig, wsmall ) + else + wr1 = max( wbig, wsmall ) + wr2 = min( wbig, wsmall ) + end if + wi = zero + else + ! complex eigenvalues + wr1 = shift + pp + wr2 = wr1 + wi = r + end if + ! further scaling to avoid underflow and overflow in computing + ! scale1 and overflow in computing w*b. + ! this scale factor (wscale) is bounded from above using c1 and c2, + ! and from below using c3 and c4. + ! c1 implements the condition s a must never overflow. + ! c2 implements the condition w b must never overflow. + ! c3, with c2, + ! implement the condition that s a - w b must never overflow. + ! c4 implements the condition s should not underflow. + ! c5 implements the condition max(s,|w|) should be at least 2. + c1 = bsize*( safmin*max( one, ascale ) ) + c2 = safmin*max( one, bnorm ) + c3 = bsize*safmin + if( ascale<=one .and. bsize<=one ) then + c4 = min( one, ( ascale / safmin )*bsize ) + else + c4 = one + end if + if( ascale<=one .or. bsize<=one ) then + c5 = min( one, ascale*bsize ) + else + c5 = one + end if + ! scale first eigenvalue + wabs = abs( wr1 ) + abs( wi ) + wsize = max( safmin, c1, fuzzy1*( wabs*c2+c3 ),min( c4, half*max( wabs, c5 ) ) ) + + if( wsize/=one ) then + wscale = one / wsize + if( wsize>one ) then + scale1 = ( max( ascale, bsize )*wscale )*min( ascale, bsize ) + else + scale1 = ( min( ascale, bsize )*wscale )*max( ascale, bsize ) + end if + wr1 = wr1*wscale + if( wi/=zero ) then + wi = wi*wscale + wr2 = wr1 + scale2 = scale1 + end if + else + scale1 = ascale*bsize + scale2 = scale1 + end if + ! scale second eigenvalue (if real) + if( wi==zero ) then + wsize = max( safmin, c1, fuzzy1*( abs( wr2 )*c2+c3 ),min( c4, half*max( abs( wr2 ), & + c5 ) ) ) + if( wsize/=one ) then + wscale = one / wsize + if( wsize>one ) then + scale2 = ( max( ascale, bsize )*wscale )*min( ascale, bsize ) + else + scale2 = ( min( ascale, bsize )*wscale )*max( ascale, bsize ) + end if + wr2 = wr2*wscale + else + scale2 = ascale*bsize + end if + end if + return + end subroutine stdlib_qlag2 + + !> DLAG2S: converts a DOUBLE PRECISION matrix, SA, to a SINGLE + !> PRECISION matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> DLAG2S checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_qlag2s( m, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + real(dp), intent(out) :: sa(ldsa,*) + real(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: rmax + ! Executable Statements + rmax = stdlib_dlamch( 'O' ) + do j = 1, n + do i = 1, m + if( ( a( i, j )<-rmax ) .or. ( a( i, j )>rmax ) ) then + info = 1 + go to 30 + end if + sa( i, j ) = a( i, j ) + end do + end do + info = 0 + 30 continue + return + end subroutine stdlib_qlag2s + + !> DLAGS2: computes 2-by-2 orthogonal matrices U, V and Q, such + !> that if ( UPPER ) then + !> U**T *A*Q = U**T *( A1 A2 )*Q = ( x 0 ) + !> ( 0 A3 ) ( x x ) + !> and + !> V**T*B*Q = V**T *( B1 B2 )*Q = ( x 0 ) + !> ( 0 B3 ) ( x x ) + !> or if ( .NOT.UPPER ) then + !> U**T *A*Q = U**T *( A1 0 )*Q = ( x x ) + !> ( A2 A3 ) ( 0 x ) + !> and + !> V**T*B*Q = V**T*( B1 0 )*Q = ( x x ) + !> ( B2 B3 ) ( 0 x ) + !> The rows of the transformed A and B are parallel, where + !> U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) + !> ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) + !> Z**T denotes the transpose of Z. + + pure subroutine stdlib_qlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu, csv,snv, csq, snq ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: upper + real(qp), intent(in) :: a1, a2, a3, b1, b2, b3 + real(qp), intent(out) :: csq, csu, csv, snq, snu, snv + ! ===================================================================== + + ! Local Scalars + real(qp) :: a, aua11, aua12, aua21, aua22, avb11, avb12, avb21, avb22, b, c, csl, csr, & + d, r, s1, s2, snl, snr, ua11, ua11r, ua12, ua21, ua22, ua22r, vb11, vb11r, vb12, vb21, & + vb22, vb22r + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + if( upper ) then + ! input matrices a and b are upper triangular matrices + ! form matrix c = a*adj(b) = ( a b ) + ! ( 0 d ) + a = a1*b3 + d = a3*b1 + b = a2*b1 - a1*b2 + ! the svd of real 2-by-2 triangular c + ! ( csl -snl )*( a b )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( 0 d ) ( -snr csr ) ( 0 t ) + call stdlib_qlasv2( a, b, d, s1, s2, snr, csr, snl, csl ) + if( abs( csl )>=abs( snl ) .or. abs( csr )>=abs( snr ) )then + ! compute the (1,1) and (1,2) elements of u**t *a and v**t *b, + ! and (1,2) element of |u|**t *|a| and |v|**t *|b|. + ua11r = csl*a1 + ua12 = csl*a2 + snl*a3 + vb11r = csr*b1 + vb12 = csr*b2 + snr*b3 + aua12 = abs( csl )*abs( a2 ) + abs( snl )*abs( a3 ) + avb12 = abs( csr )*abs( b2 ) + abs( snr )*abs( b3 ) + ! zero (1,2) elements of u**t *a and v**t *b + if( ( abs( ua11r )+abs( ua12 ) )/=zero ) then + if( aua12 / ( abs( ua11r )+abs( ua12 ) )<=avb12 /( abs( vb11r )+abs( vb12 ) ) & + ) then + call stdlib_qlartg( -ua11r, ua12, csq, snq, r ) + else + call stdlib_qlartg( -vb11r, vb12, csq, snq, r ) + end if + else + call stdlib_qlartg( -vb11r, vb12, csq, snq, r ) + end if + csu = csl + snu = -snl + csv = csr + snv = -snr + else + ! compute the (2,1) and (2,2) elements of u**t *a and v**t *b, + ! and (2,2) element of |u|**t *|a| and |v|**t *|b|. + ua21 = -snl*a1 + ua22 = -snl*a2 + csl*a3 + vb21 = -snr*b1 + vb22 = -snr*b2 + csr*b3 + aua22 = abs( snl )*abs( a2 ) + abs( csl )*abs( a3 ) + avb22 = abs( snr )*abs( b2 ) + abs( csr )*abs( b3 ) + ! zero (2,2) elements of u**t*a and v**t*b, and then swap. + if( ( abs( ua21 )+abs( ua22 ) )/=zero ) then + if( aua22 / ( abs( ua21 )+abs( ua22 ) )<=avb22 /( abs( vb21 )+abs( vb22 ) ) ) & + then + call stdlib_qlartg( -ua21, ua22, csq, snq, r ) + else + call stdlib_qlartg( -vb21, vb22, csq, snq, r ) + end if + else + call stdlib_qlartg( -vb21, vb22, csq, snq, r ) + end if + csu = snl + snu = csl + csv = snr + snv = csr + end if + else + ! input matrices a and b are lower triangular matrices + ! form matrix c = a*adj(b) = ( a 0 ) + ! ( c d ) + a = a1*b3 + d = a3*b1 + c = a2*b3 - a3*b2 + ! the svd of real 2-by-2 triangular c + ! ( csl -snl )*( a 0 )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( c d ) ( -snr csr ) ( 0 t ) + call stdlib_qlasv2( a, c, d, s1, s2, snr, csr, snl, csl ) + if( abs( csr )>=abs( snr ) .or. abs( csl )>=abs( snl ) )then + ! compute the (2,1) and (2,2) elements of u**t *a and v**t *b, + ! and (2,1) element of |u|**t *|a| and |v|**t *|b|. + ua21 = -snr*a1 + csr*a2 + ua22r = csr*a3 + vb21 = -snl*b1 + csl*b2 + vb22r = csl*b3 + aua21 = abs( snr )*abs( a1 ) + abs( csr )*abs( a2 ) + avb21 = abs( snl )*abs( b1 ) + abs( csl )*abs( b2 ) + ! zero (2,1) elements of u**t *a and v**t *b. + if( ( abs( ua21 )+abs( ua22r ) )/=zero ) then + if( aua21 / ( abs( ua21 )+abs( ua22r ) )<=avb21 /( abs( vb21 )+abs( vb22r ) ) & + ) then + call stdlib_qlartg( ua22r, ua21, csq, snq, r ) + else + call stdlib_qlartg( vb22r, vb21, csq, snq, r ) + end if + else + call stdlib_qlartg( vb22r, vb21, csq, snq, r ) + end if + csu = csr + snu = -snr + csv = csl + snv = -snl + else + ! compute the (1,1) and (1,2) elements of u**t *a and v**t *b, + ! and (1,1) element of |u|**t *|a| and |v|**t *|b|. + ua11 = csr*a1 + snr*a2 + ua12 = snr*a3 + vb11 = csl*b1 + snl*b2 + vb12 = snl*b3 + aua11 = abs( csr )*abs( a1 ) + abs( snr )*abs( a2 ) + avb11 = abs( csl )*abs( b1 ) + abs( snl )*abs( b2 ) + ! zero (1,1) elements of u**t*a and v**t*b, and then swap. + if( ( abs( ua11 )+abs( ua12 ) )/=zero ) then + if( aua11 / ( abs( ua11 )+abs( ua12 ) )<=avb11 /( abs( vb11 )+abs( vb12 ) ) ) & + then + call stdlib_qlartg( ua12, ua11, csq, snq, r ) + else + call stdlib_qlartg( vb12, vb11, csq, snq, r ) + end if + else + call stdlib_qlartg( vb12, vb11, csq, snq, r ) + end if + csu = snr + snu = csr + csv = snl + snv = csl + end if + end if + return + end subroutine stdlib_qlags2 + + !> DLAGTF: factorizes the matrix (T - lambda*I), where T is an n by n + !> tridiagonal matrix and lambda is a scalar, as + !> T - lambda*I = PLU, + !> where P is a permutation matrix, L is a unit lower tridiagonal matrix + !> with at most one non-zero sub-diagonal elements per column and U is + !> an upper triangular matrix with at most two non-zero super-diagonal + !> elements per column. + !> The factorization is obtained by Gaussian elimination with partial + !> pivoting and implicit row scaling. + !> The parameter LAMBDA is included in the routine so that DLAGTF may + !> be used, in conjunction with DLAGTS, to obtain eigenvectors of T by + !> inverse iteration. + + pure subroutine stdlib_qlagtf( n, a, lambda, b, c, tol, d, in, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: lambda, tol + ! Array Arguments + integer(ilp), intent(out) :: in(*) + real(qp), intent(inout) :: a(*), b(*), c(*) + real(qp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: k + real(qp) :: eps, mult, piv1, piv2, scale1, scale2, temp, tl + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DLAGTF', -info ) + return + end if + if( n==0 )return + a( 1 ) = a( 1 ) - lambda + in( n ) = 0 + if( n==1 ) then + if( a( 1 )==zero )in( 1 ) = 1 + return + end if + eps = stdlib_qlamch( 'EPSILON' ) + tl = max( tol, eps ) + scale1 = abs( a( 1 ) ) + abs( b( 1 ) ) + loop_10: do k = 1, n - 1 + a( k+1 ) = a( k+1 ) - lambda + scale2 = abs( c( k ) ) + abs( a( k+1 ) ) + if( k<( n-1 ) )scale2 = scale2 + abs( b( k+1 ) ) + if( a( k )==zero ) then + piv1 = zero + else + piv1 = abs( a( k ) ) / scale1 + end if + if( c( k )==zero ) then + in( k ) = 0 + piv2 = zero + scale1 = scale2 + if( k<( n-1 ) )d( k ) = zero + else + piv2 = abs( c( k ) ) / scale2 + if( piv2<=piv1 ) then + in( k ) = 0 + scale1 = scale2 + c( k ) = c( k ) / a( k ) + a( k+1 ) = a( k+1 ) - c( k )*b( k ) + if( k<( n-1 ) )d( k ) = zero + else + in( k ) = 1 + mult = a( k ) / c( k ) + a( k ) = c( k ) + temp = a( k+1 ) + a( k+1 ) = b( k ) - mult*temp + if( k<( n-1 ) ) then + d( k ) = b( k+1 ) + b( k+1 ) = -mult*d( k ) + end if + b( k ) = temp + c( k ) = mult + end if + end if + if( ( max( piv1, piv2 )<=tl ) .and. ( in( n )==0 ) )in( n ) = k + end do loop_10 + if( ( abs( a( n ) )<=scale1*tl ) .and. ( in( n )==0 ) )in( n ) = n + return + end subroutine stdlib_qlagtf + + !> DLAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + + pure subroutine stdlib_qlagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(in) :: alpha, beta + ! Array Arguments + real(qp), intent(inout) :: b(ldb,*) + real(qp), intent(in) :: d(*), dl(*), du(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + if( n==0 )return + ! multiply b by beta if beta/=1. + if( beta==zero ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = zero + end do + end do + else if( beta==-one ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = -b( i, j ) + end do + end do + end if + if( alpha==one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b + a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + dl( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + dl( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + du( i & + )*x( i+1, j ) + end do + end if + end do + else + ! compute b := b + a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + du( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + du( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + dl( i & + )*x( i+1, j ) + end do + end if + end do + end if + else if( alpha==-one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b - a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - dl( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - dl( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - du( i & + )*x( i+1, j ) + end do + end if + end do + else + ! compute b := b - a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - du( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - du( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - dl( i & + )*x( i+1, j ) + end do + end if + end do + end if + end if + return + end subroutine stdlib_qlagtm + + !> DLAGTS: may be used to solve one of the systems of equations + !> (T - lambda*I)*x = y or (T - lambda*I)**T*x = y, + !> where T is an n by n tridiagonal matrix, for x, following the + !> factorization of (T - lambda*I) as + !> (T - lambda*I) = P*L*U , + !> by routine DLAGTF. The choice of equation to be solved is + !> controlled by the argument JOB, and in each case there is an option + !> to perturb zero or very small diagonal elements of U, this option + !> being intended for use in applications such as inverse iteration. + + pure subroutine stdlib_qlagts( job, n, a, b, c, d, in, y, tol, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: job, n + real(qp), intent(inout) :: tol + ! Array Arguments + integer(ilp), intent(in) :: in(*) + real(qp), intent(in) :: a(*), b(*), c(*), d(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: k + real(qp) :: absak, ak, bignum, eps, pert, sfmin, temp + ! Intrinsic Functions + intrinsic :: abs,max,sign + ! Executable Statements + info = 0 + if( ( abs( job )>2 ) .or. ( job==0 ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAGTS', -info ) + return + end if + if( n==0 )return + eps = stdlib_qlamch( 'EPSILON' ) + sfmin = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / sfmin + if( job<0 ) then + if( tol<=zero ) then + tol = abs( a( 1 ) ) + if( n>1 )tol = max( tol, abs( a( 2 ) ), abs( b( 1 ) ) ) + do k = 3, n + tol = max( tol, abs( a( k ) ), abs( b( k-1 ) ),abs( d( k-2 ) ) ) + end do + tol = tol*eps + if( tol==zero )tol = eps + end if + end if + if( abs( job )==1 ) then + do k = 2, n + if( in( k-1 )==0 ) then + y( k ) = y( k ) - c( k-1 )*y( k-1 ) + else + temp = y( k-1 ) + y( k-1 ) = y( k ) + y( k ) = temp - c( k-1 )*y( k ) + end if + end do + if( job==1 ) then + loop_30: do k = n, 1, -1 + if( k<=n-2 ) then + temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 ) + else if( k==n-1 ) then + temp = y( k ) - b( k )*y( k+1 ) + else + temp = y( k ) + end if + ak = a( k ) + absak = abs( ak ) + if( absakabsak )then + info = k + return + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + info = k + return + end if + end if + y( k ) = temp / ak + end do loop_30 + else + loop_50: do k = n, 1, -1 + if( k<=n-2 ) then + temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 ) + else if( k==n-1 ) then + temp = y( k ) - b( k )*y( k+1 ) + else + temp = y( k ) + end if + ak = a( k ) + pert = sign( tol, ak ) + 40 continue + absak = abs( ak ) + if( absakabsak )then + ak = ak + pert + pert = 2*pert + go to 40 + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + ak = ak + pert + pert = 2*pert + go to 40 + end if + end if + y( k ) = temp / ak + end do loop_50 + end if + else + ! come to here if job = 2 or -2 + if( job==2 ) then + loop_60: do k = 1, n + if( k>=3 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 ) + else if( k==2 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) + else + temp = y( k ) + end if + ak = a( k ) + absak = abs( ak ) + if( absakabsak )then + info = k + return + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + info = k + return + end if + end if + y( k ) = temp / ak + end do loop_60 + else + loop_80: do k = 1, n + if( k>=3 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 ) + else if( k==2 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) + else + temp = y( k ) + end if + ak = a( k ) + pert = sign( tol, ak ) + 70 continue + absak = abs( ak ) + if( absakabsak )then + ak = ak + pert + pert = 2*pert + go to 70 + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + ak = ak + pert + pert = 2*pert + go to 70 + end if + end if + y( k ) = temp / ak + end do loop_80 + end if + do k = n, 2, -1 + if( in( k-1 )==0 ) then + y( k-1 ) = y( k-1 ) - c( k-1 )*y( k ) + else + temp = y( k-1 ) + y( k-1 ) = y( k ) + y( k ) = temp - c( k-1 )*y( k ) + end if + end do + end if + end subroutine stdlib_qlagts + + !> DLAGV2: computes the Generalized Schur factorization of a real 2-by-2 + !> matrix pencil (A,B) where B is upper triangular. This routine + !> computes orthogonal (rotation) matrices given by CSL, SNL and CSR, + !> SNR such that + !> 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 + !> types), then + !> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] + !> [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] + !> [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] + !> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], + !> 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, + !> then + !> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] + !> [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] + !> [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] + !> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] + !> where b11 >= b22 > 0. + + pure subroutine stdlib_qlagv2( a, lda, b, ldb, alphar, alphai, beta, csl, snl,csr, snr ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb + real(qp), intent(out) :: csl, csr, snl, snr + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: alphai(2), alphar(2), beta(2) + ! ===================================================================== + + ! Local Scalars + real(qp) :: anorm, ascale, bnorm, bscale, h1, h2, h3, qq, r, rr, safmin, scale1, & + scale2, t, ulp, wi, wr1, wr2 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + safmin = stdlib_qlamch( 'S' ) + ulp = stdlib_qlamch( 'P' ) + ! scale a + anorm = max( abs( a( 1, 1 ) )+abs( a( 2, 1 ) ),abs( a( 1, 2 ) )+abs( a( 2, 2 ) ), & + safmin ) + ascale = one / anorm + a( 1, 1 ) = ascale*a( 1, 1 ) + a( 1, 2 ) = ascale*a( 1, 2 ) + a( 2, 1 ) = ascale*a( 2, 1 ) + a( 2, 2 ) = ascale*a( 2, 2 ) + ! scale b + bnorm = max( abs( b( 1, 1 ) ), abs( b( 1, 2 ) )+abs( b( 2, 2 ) ),safmin ) + bscale = one / bnorm + b( 1, 1 ) = bscale*b( 1, 1 ) + b( 1, 2 ) = bscale*b( 1, 2 ) + b( 2, 2 ) = bscale*b( 2, 2 ) + ! check if a can be deflated + if( abs( a( 2, 1 ) )<=ulp ) then + csl = one + snl = zero + csr = one + snr = zero + a( 2, 1 ) = zero + b( 2, 1 ) = zero + wi = zero + ! check if b is singular + else if( abs( b( 1, 1 ) )<=ulp ) then + call stdlib_qlartg( a( 1, 1 ), a( 2, 1 ), csl, snl, r ) + csr = one + snr = zero + call stdlib_qrot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_qrot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + a( 2, 1 ) = zero + b( 1, 1 ) = zero + b( 2, 1 ) = zero + wi = zero + else if( abs( b( 2, 2 ) )<=ulp ) then + call stdlib_qlartg( a( 2, 2 ), a( 2, 1 ), csr, snr, t ) + snr = -snr + call stdlib_qrot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_qrot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + csl = one + snl = zero + a( 2, 1 ) = zero + b( 2, 1 ) = zero + b( 2, 2 ) = zero + wi = zero + else + ! b is nonsingular, first compute the eigenvalues of (a,b) + call stdlib_qlag2( a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2,wi ) + if( wi==zero ) then + ! two real eigenvalues, compute s*a-w*b + h1 = scale1*a( 1, 1 ) - wr1*b( 1, 1 ) + h2 = scale1*a( 1, 2 ) - wr1*b( 1, 2 ) + h3 = scale1*a( 2, 2 ) - wr1*b( 2, 2 ) + rr = stdlib_qlapy2( h1, h2 ) + qq = stdlib_qlapy2( scale1*a( 2, 1 ), h3 ) + if( rr>qq ) then + ! find right rotation matrix to zero 1,1 element of + ! (sa - wb) + call stdlib_qlartg( h2, h1, csr, snr, t ) + else + ! find right rotation matrix to zero 2,1 element of + ! (sa - wb) + call stdlib_qlartg( h3, scale1*a( 2, 1 ), csr, snr, t ) + end if + snr = -snr + call stdlib_qrot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_qrot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + ! compute inf norms of a and b + h1 = max( abs( a( 1, 1 ) )+abs( a( 1, 2 ) ),abs( a( 2, 1 ) )+abs( a( 2, 2 ) ) ) + + h2 = max( abs( b( 1, 1 ) )+abs( b( 1, 2 ) ),abs( b( 2, 1 ) )+abs( b( 2, 2 ) ) ) + + if( ( scale1*h1 )>=abs( wr1 )*h2 ) then + ! find left rotation matrix q to zero out b(2,1) + call stdlib_qlartg( b( 1, 1 ), b( 2, 1 ), csl, snl, r ) + else + ! find left rotation matrix q to zero out a(2,1) + call stdlib_qlartg( a( 1, 1 ), a( 2, 1 ), csl, snl, r ) + end if + call stdlib_qrot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_qrot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + a( 2, 1 ) = zero + b( 2, 1 ) = zero + else + ! a pair of complex conjugate eigenvalues + ! first compute the svd of the matrix b + call stdlib_qlasv2( b( 1, 1 ), b( 1, 2 ), b( 2, 2 ), r, t, snr,csr, snl, csl ) + + ! form (a,b) := q(a,b)z**t where q is left rotation matrix and + ! z is right rotation matrix computed from stdlib_qlasv2 + call stdlib_qrot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_qrot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + call stdlib_qrot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_qrot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + b( 2, 1 ) = zero + b( 1, 2 ) = zero + end if + end if + ! unscaling + a( 1, 1 ) = anorm*a( 1, 1 ) + a( 2, 1 ) = anorm*a( 2, 1 ) + a( 1, 2 ) = anorm*a( 1, 2 ) + a( 2, 2 ) = anorm*a( 2, 2 ) + b( 1, 1 ) = bnorm*b( 1, 1 ) + b( 2, 1 ) = bnorm*b( 2, 1 ) + b( 1, 2 ) = bnorm*b( 1, 2 ) + b( 2, 2 ) = bnorm*b( 2, 2 ) + if( wi==zero ) then + alphar( 1 ) = a( 1, 1 ) + alphar( 2 ) = a( 2, 2 ) + alphai( 1 ) = zero + alphai( 2 ) = zero + beta( 1 ) = b( 1, 1 ) + beta( 2 ) = b( 2, 2 ) + else + alphar( 1 ) = anorm*wr1 / scale1 / bnorm + alphai( 1 ) = anorm*wi / scale1 / bnorm + alphar( 2 ) = alphar( 1 ) + alphai( 2 ) = -alphai( 1 ) + beta( 1 ) = one + beta( 2 ) = one + end if + return + end subroutine stdlib_qlagv2 + + !> DLAHQR: is an auxiliary routine called by DHSEQR to update the + !> eigenvalues and Schur decomposition already computed by DHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + + pure subroutine stdlib_qlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), z(ldz,*) + real(qp), intent(out) :: wi(*), wr(*) + ! ========================================================= + ! Parameters + real(qp), parameter :: dat1 = 3.0_qp/4.0_qp + real(qp), parameter :: dat2 = -0.4375_qp + integer(ilp), parameter :: kexsh = 10 + + + + ! Local Scalars + real(qp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, & + rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3 + integer(ilp) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl + ! Local Arrays + real(qp) :: v(3) + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + if( ilo==ihi ) then + wr( ilo ) = h( ilo, ilo ) + wi( ilo ) = zero + return + end if + ! ==== clear out the trash ==== + do j = ilo, ihi - 3 + h( j+2, j ) = zero + h( j+3, j ) = zero + end do + if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero + nh = ihi - ilo + 1 + nz = ihiz - iloz + 1 + ! set machine-dependent constants for the stopping criterion. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( nh,KIND=qp) / ulp ) + ! i1 and i2 are the indices of the first row and last column of h + ! to which transformations must be applied. if eigenvalues only are + ! being computed, i1 and i2 are set inside the main loop. + if( wantt ) then + i1 = 1 + i2 = n + end if + ! itmax is the total number of qr iterations allowed. + itmax = 30 * max( 10, nh ) + ! kdefl counts the number of iterations since a deflation + kdefl = 0 + ! the main loop begins here. i is the loop index and decreases from + ! ihi to ilo in steps of 1 or 2. each iteration of the loop works + ! with the active submatrix in rows and columns l to i. + ! eigenvalues i+1 to ihi have already converged. either l = ilo or + ! h(l,l-1) is negligible so that the matrix splits. + i = ihi + 20 continue + l = ilo + if( i=ilo )tst = tst + abs( h( k-1, k-2 ) ) + if( k+1<=ihi )tst = tst + abs( h( k+1, k ) ) + end if + ! ==== the following is a conservative small subdiagonal + ! . deflation criterion due to ahues + ! . 1997). it has better mathematical foundation and + ! . improves accuracy in some cases. ==== + if( abs( h( k, k-1 ) )<=ulp*tst ) then + ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) + ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) + aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) + bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) + s = aa + ab + if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40 + end if + end do + 40 continue + l = k + if( l>ilo ) then + ! h(l,l-1) is negligible + h( l, l-1 ) = zero + end if + ! exit from loop if a submatrix of order 1 or 2 has split off. + if( l>=i-1 )go to 150 + kdefl = kdefl + 1 + ! now the active submatrix is in rows and columns l to i. if + ! eigenvalues only are being computed, only the active submatrix + ! need be transformed. + if( .not.wantt ) then + i1 = l + i2 = i + end if + if( mod(kdefl,2*kexsh)==0 ) then + ! exceptional shift. + s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + h11 = dat1*s + h( i, i ) + h12 = dat2*s + h21 = s + h22 = h11 + else if( mod(kdefl,kexsh)==0 ) then + ! exceptional shift. + s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) ) + h11 = dat1*s + h( l, l ) + h12 = dat2*s + h21 = s + h22 = h11 + else + ! prepare to use francis' double shift + ! (i.e. 2nd degree generalized rayleigh quotient) + h11 = h( i-1, i-1 ) + h21 = h( i, i-1 ) + h12 = h( i-1, i ) + h22 = h( i, i ) + end if + s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 ) + if( s==zero ) then + rt1r = zero + rt1i = zero + rt2r = zero + rt2i = zero + else + h11 = h11 / s + h21 = h21 / s + h12 = h12 / s + h22 = h22 / s + tr = ( h11+h22 ) / two + det = ( h11-tr )*( h22-tr ) - h12*h21 + rtdisc = sqrt( abs( det ) ) + if( det>=zero ) then + ! ==== complex conjugate shifts ==== + rt1r = tr*s + rt2r = rt1r + rt1i = rtdisc*s + rt2i = -rt1i + else + ! ==== realshifts (use only one of them,KIND=qp) ==== + rt1r = tr + rtdisc + rt2r = tr - rtdisc + if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then + rt1r = rt1r*s + rt2r = rt1r + else + rt2r = rt2r*s + rt1r = rt2r + end if + rt1i = zero + rt2i = zero + end if + end if + ! look for two consecutive small subdiagonal elements. + do m = i - 2, l, -1 + ! determine the effect of starting the double-shift qr + ! iteration at row m, and see if this would make h(m,m-1) + ! negligible. (the following uses scaling to avoid + ! overflows and most underflows.) + h21s = h( m+1, m ) + s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s ) + h21s = h( m+1, m ) / s + v( 1 ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - & + rt1i*( rt2i / s ) + v( 2 ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r ) + v( 3 ) = h21s*h( m+2, m+1 ) + s = abs( v( 1 ) ) + abs( v( 2 ) ) + abs( v( 3 ) ) + v( 1 ) = v( 1 ) / s + v( 2 ) = v( 2 ) / s + v( 3 ) = v( 3 ) / s + if( m==l )go to 60 + if( abs( h( m, m-1 ) )*( abs( v( 2 ) )+abs( v( 3 ) ) )<=ulp*abs( v( 1 ) )*( abs( & + h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60 + end do + 60 continue + ! double-shift qr step + loop_130: do k = m, i - 1 + ! the first iteration of this loop determines a reflection g + ! from the vector v and applies it from left and right to h, + ! thus creating a nonzero bulge below the subdiagonal. + ! each subsequent iteration determines a reflection g to + ! restore the hessenberg form in the (k-1)th column, and thus + ! chases the bulge one step toward the bottom of the active + ! submatrix. nr is the order of g. + nr = min( 3, i-k+1 ) + if( k>m )call stdlib_qcopy( nr, h( k, k-1 ), 1, v, 1 ) + call stdlib_qlarfg( nr, v( 1 ), v( 2 ), 1, t1 ) + if( k>m ) then + h( k, k-1 ) = v( 1 ) + h( k+1, k-1 ) = zero + if( kl ) then + ! ==== use the following instead of + ! . h( k, k-1 ) = -h( k, k-1 ) to + ! . avoid a bug when v(2) and v(3) + ! . underflow. ==== + h( k, k-1 ) = h( k, k-1 )*( one-t1 ) + end if + v2 = v( 2 ) + t2 = t1*v2 + if( nr==3 ) then + v3 = v( 3 ) + t3 = t1*v3 + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) + h( k, j ) = h( k, j ) - sum*t1 + h( k+1, j ) = h( k+1, j ) - sum*t2 + h( k+2, j ) = h( k+2, j ) - sum*t3 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+3,i). + do j = i1, min( k+3, i ) + sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) + h( j, k ) = h( j, k ) - sum*t1 + h( j, k+1 ) = h( j, k+1 ) - sum*t2 + h( j, k+2 ) = h( j, k+2 ) - sum*t3 + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 ) + z( j, k ) = z( j, k ) - sum*t1 + z( j, k+1 ) = z( j, k+1 ) - sum*t2 + z( j, k+2 ) = z( j, k+2 ) - sum*t3 + end do + end if + else if( nr==2 ) then + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = h( k, j ) + v2*h( k+1, j ) + h( k, j ) = h( k, j ) - sum*t1 + h( k+1, j ) = h( k+1, j ) - sum*t2 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+3,i). + do j = i1, i + sum = h( j, k ) + v2*h( j, k+1 ) + h( j, k ) = h( j, k ) - sum*t1 + h( j, k+1 ) = h( j, k+1 ) - sum*t2 + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = z( j, k ) + v2*z( j, k+1 ) + z( j, k ) = z( j, k ) - sum*t1 + z( j, k+1 ) = z( j, k+1 ) - sum*t2 + end do + end if + end if + end do loop_130 + end do loop_140 + ! failure to converge in remaining number of iterations + info = i + return + 150 continue + if( l==i ) then + ! h(i,i-1) is negligible: one eigenvalue has converged. + wr( i ) = h( i, i ) + wi( i ) = zero + else if( l==i-1 ) then + ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged. + ! transform the 2-by-2 submatrix to standard schur form, + ! and compute and store the eigenvalues. + call stdlib_qlanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), & + wi( i-1 ), wr( i ), wi( i ),cs, sn ) + if( wantt ) then + ! apply the transformation to the rest of h. + if( i2>i )call stdlib_qrot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn ) + + call stdlib_qrot( i-i1-1, h( i1, i-1 ), 1, h( i1, i ), 1, cs, sn ) + end if + if( wantz ) then + ! apply the transformation to z. + call stdlib_qrot( nz, z( iloz, i-1 ), 1, z( iloz, i ), 1, cs, sn ) + end if + end if + ! reset deflation counter + kdefl = 0 + ! return to start of the main loop with new value of i. + i = l - 1 + go to 20 + 160 continue + return + end subroutine stdlib_qlahqr + + !> DLAHR2: reduces the first NB columns of A real general n-BY-(n-k+1) + !> matrix A so that elements below the k-th subdiagonal are zero. The + !> reduction is performed by an orthogonal similarity transformation + !> Q**T * A * Q. The routine returns the matrices V and T which determine + !> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. + !> This is an auxiliary routine called by DGEHRD. + + pure subroutine stdlib_qlahr2( n, k, nb, a, lda, tau, t, ldt, y, ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: k, lda, ldt, ldy, n, nb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: t(ldt,nb), tau(nb), y(ldy,nb) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: ei + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( n<=1 )return + loop_10: do i = 1, nb + if( i>1 ) then + ! update a(k+1:n,i) + ! update i-th column of a - y * v**t + call stdlib_qgemv( 'NO TRANSPOSE', n-k, i-1, -one, y(k+1,1), ldy,a( k+i-1, 1 ), & + lda, one, a( k+1, i ), 1 ) + ! apply i - v * t**t * v**t to this column (call it b) from the + ! left, using the last column of t as workspace + ! let v = ( v1 ) and b = ( b1 ) (first i-1 rows) + ! ( v2 ) ( b2 ) + ! where v1 is unit lower triangular + ! w := v1**t * b1 + call stdlib_qcopy( i-1, a( k+1, i ), 1, t( 1, nb ), 1 ) + call stdlib_qtrmv( 'LOWER', 'TRANSPOSE', 'UNIT',i-1, a( k+1, 1 ),lda, t( 1, nb ),& + 1 ) + ! w := w + v2**t * b2 + call stdlib_qgemv( 'TRANSPOSE', n-k-i+1, i-1,one, a( k+i, 1 ),lda, a( k+i, i ), & + 1, one, t( 1, nb ), 1 ) + ! w := t**t * w + call stdlib_qtrmv( 'UPPER', 'TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, nb ), 1 ) + + ! b2 := b2 - v2*w + call stdlib_qgemv( 'NO TRANSPOSE', n-k-i+1, i-1, -one,a( k+i, 1 ),lda, t( 1, nb )& + , 1, one, a( k+i, i ), 1 ) + ! b1 := b1 - v1*w + call stdlib_qtrmv( 'LOWER', 'NO TRANSPOSE','UNIT', i-1,a( k+1, 1 ), lda, t( 1, & + nb ), 1 ) + call stdlib_qaxpy( i-1, -one, t( 1, nb ), 1, a( k+1, i ), 1 ) + a( k+i-1, i-1 ) = ei + end if + ! generate the elementary reflector h(i) to annihilate + ! a(k+i+1:n,i) + call stdlib_qlarfg( n-k-i+1, a( k+i, i ), a( min( k+i+1, n ), i ), 1,tau( i ) ) + + ei = a( k+i, i ) + a( k+i, i ) = one + ! compute y(k+1:n,i) + call stdlib_qgemv( 'NO TRANSPOSE', n-k, n-k-i+1,one, a( k+1, i+1 ),lda, a( k+i, i ),& + 1, zero, y( k+1, i ), 1 ) + call stdlib_qgemv( 'TRANSPOSE', n-k-i+1, i-1,one, a( k+i, 1 ), lda,a( k+i, i ), 1, & + zero, t( 1, i ), 1 ) + call stdlib_qgemv( 'NO TRANSPOSE', n-k, i-1, -one,y( k+1, 1 ), ldy,t( 1, i ), 1, & + one, y( k+1, i ), 1 ) + call stdlib_qscal( n-k, tau( i ), y( k+1, i ), 1 ) + ! compute t(1:i,i) + call stdlib_qscal( i-1, -tau( i ), t( 1, i ), 1 ) + call stdlib_qtrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, i ), 1 ) + + t( i, i ) = tau( i ) + end do loop_10 + a( k+nb, nb ) = ei + ! compute y(1:k,1:nb) + call stdlib_qlacpy( 'ALL', k, nb, a( 1, 2 ), lda, y, ldy ) + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE','UNIT', k, nb,one, a( k+1, 1 ), & + lda, y, ldy ) + if( n>k+nb )call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', k,nb, n-k-nb, one,a( 1, & + 2+nb ), lda, a( k+1+nb, 1 ), lda, one, y,ldy ) + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE','NON-UNIT', k, nb,one, t, ldt, y, & + ldy ) + return + end subroutine stdlib_qlahr2 + + !> DLAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then DLAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**T gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**T and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ alpha ] + !> [ gamma ] + !> where alpha = x**T*w. + + pure subroutine stdlib_qlaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: j, job + real(qp), intent(out) :: c, s, sestpr + real(qp), intent(in) :: gamma, sest + ! Array Arguments + real(qp), intent(in) :: w(j), x(j) + ! ===================================================================== + + + ! Local Scalars + real(qp) :: absalp, absest, absgam, alpha, b, cosine, eps, norma, s1, s2, sine, t, & + test, tmp, zeta1, zeta2 + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + eps = stdlib_qlamch( 'EPSILON' ) + alpha = stdlib_qdot( j, x, 1, w, 1 ) + absalp = abs( alpha ) + absgam = abs( gamma ) + absest = abs( sest ) + if( job==1 ) then + ! estimating largest singular value + ! special cases + if( sest==zero ) then + s1 = max( absgam, absalp ) + if( s1==zero ) then + s = zero + c = one + sestpr = zero + else + s = alpha / s1 + c = gamma / s1 + tmp = sqrt( s*s+c*c ) + s = s / tmp + c = c / tmp + sestpr = s1*tmp + end if + return + else if( absgam<=eps*absest ) then + s = one + c = zero + tmp = max( absest, absalp ) + s1 = absest / tmp + s2 = absalp / tmp + sestpr = tmp*sqrt( s1*s1+s2*s2 ) + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = one + c = zero + sestpr = s2 + else + s = zero + c = one + sestpr = s1 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + s = sqrt( one+tmp*tmp ) + sestpr = s2*s + c = ( gamma / s2 ) / s + s = sign( one, alpha ) / s + else + tmp = s2 / s1 + c = sqrt( one+tmp*tmp ) + sestpr = s1*c + s = ( alpha / s1 ) / c + c = sign( one, gamma ) / c + end if + return + else + ! normal case + zeta1 = alpha / absest + zeta2 = gamma / absest + b = ( one-zeta1*zeta1-zeta2*zeta2 )*half + c = zeta1*zeta1 + if( b>zero ) then + t = c / ( b+sqrt( b*b+c ) ) + else + t = sqrt( b*b+c ) - b + end if + sine = -zeta1 / t + cosine = -zeta2 / ( one+t ) + tmp = sqrt( sine*sine+cosine*cosine ) + s = sine / tmp + c = cosine / tmp + sestpr = sqrt( t+one )*absest + return + end if + else if( job==2 ) then + ! estimating smallest singular value + ! special cases + if( sest==zero ) then + sestpr = zero + if( max( absgam, absalp )==zero ) then + sine = one + cosine = zero + else + sine = -gamma + cosine = alpha + end if + s1 = max( abs( sine ), abs( cosine ) ) + s = sine / s1 + c = cosine / s1 + tmp = sqrt( s*s+c*c ) + s = s / tmp + c = c / tmp + return + else if( absgam<=eps*absest ) then + s = zero + c = one + sestpr = absgam + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = zero + c = one + sestpr = s1 + else + s = one + c = zero + sestpr = s2 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + c = sqrt( one+tmp*tmp ) + sestpr = absest*( tmp / c ) + s = -( gamma / s2 ) / c + c = sign( one, alpha ) / c + else + tmp = s2 / s1 + s = sqrt( one+tmp*tmp ) + sestpr = absest / s + c = ( alpha / s1 ) / s + s = -sign( one, gamma ) / s + end if + return + else + ! normal case + zeta1 = alpha / absest + zeta2 = gamma / absest + norma = max( one+zeta1*zeta1+abs( zeta1*zeta2 ),abs( zeta1*zeta2 )+zeta2*zeta2 ) + + ! see if root is closer to zero or to one + test = one + two*( zeta1-zeta2 )*( zeta1+zeta2 ) + if( test>=zero ) then + ! root is close to zero, compute directly + b = ( zeta1*zeta1+zeta2*zeta2+one )*half + c = zeta2*zeta2 + t = c / ( b+sqrt( abs( b*b-c ) ) ) + sine = zeta1 / ( one-t ) + cosine = -zeta2 / t + sestpr = sqrt( t+four*eps*eps*norma )*absest + else + ! root is closer to one, shift by that amount + b = ( zeta2*zeta2+zeta1*zeta1-one )*half + c = zeta1*zeta1 + if( b>=zero ) then + t = -c / ( b+sqrt( b*b+c ) ) + else + t = b - sqrt( b*b+c ) + end if + sine = -zeta1 / t + cosine = -zeta2 / ( one+t ) + sestpr = sqrt( one+t+four*eps*eps*norma )*absest + end if + tmp = sqrt( sine*sine+cosine*cosine ) + s = sine / tmp + c = cosine / tmp + return + end if + end if + return + end subroutine stdlib_qlaic1 + + !> This routine is not for general use. It exists solely to avoid + !> over-optimization in DISNAN. + !> DLAISNAN: checks for NaNs by comparing its two arguments for + !> inequality. NaN is the only floating-point value where NaN != NaN + !> returns .TRUE. To check for NaNs, pass the same variable as both + !> arguments. + !> A compiler must assume that the two arguments are + !> not the same variable, and the test will not be optimized away. + !> Interprocedural or whole-program optimization may delete this + !> test. The ISNAN functions will be replaced by the correct + !> Fortran 03 intrinsic once the intrinsic is widely available. + + pure logical(lk) function stdlib_qlaisnan( din1, din2 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: din1, din2 + ! ===================================================================== + ! Executable Statements + stdlib_qlaisnan = (din1/=din2) + return + end function stdlib_qlaisnan + + !> DLALN2: solves a system of the form (ca A - w D ) X = s B + !> or (ca A**T - w D) X = s B with possible scaling ("s") and + !> perturbation of A. (A**T means A-transpose.) + !> A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + !> real diagonal matrix, w is a real or complex value, and X and B are + !> NA x 1 matrices -- real if w is real, complex if w is complex. NA + !> may be 1 or 2. + !> If w is complex, X and B are represented as NA x 2 matrices, + !> the first column of each being the real part and the second + !> being the imaginary part. + !> "s" is a scaling factor (<= 1), computed by DLALN2, which is + !> so chosen that X can be computed without overflow. X is further + !> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + !> than overflow. + !> If both singular values of (ca A - w D) are less than SMIN, + !> SMIN*identity will be used instead of (ca A - w D). If only one + !> singular value is less than SMIN, one element of (ca A - w D) will be + !> perturbed enough to make the smallest singular value roughly SMIN. + !> If both singular values are at least SMIN, (ca A - w D) will not be + !> perturbed. In any case, the perturbation will be at most some small + !> multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + !> are computed by infinity-norm approximations, and thus will only be + !> correct to a factor of 2 or so. + !> Note: all input quantities are assumed to be smaller than overflow + !> by a reasonable factor. (See BIGNUM.) + + pure subroutine stdlib_qlaln2( ltrans, na, nw, smin, ca, a, lda, d1, d2, b,ldb, wr, wi, x, & + ldx, scale, xnorm, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ltrans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, na, nw + real(qp), intent(in) :: ca, d1, d2, smin, wi, wr + real(qp), intent(out) :: scale, xnorm + ! Array Arguments + real(qp), intent(in) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: icmax, j + real(qp) :: bbnd, bi1, bi2, bignum, bnorm, br1, br2, ci21, ci22, cmax, cnorm, cr21, & + cr22, csi, csr, li21, lr21, smini, smlnum, temp, u22abs, ui11, ui11r, ui12, ui12s, & + ui22, ur11, ur11r, ur12, ur12s, ur22, xi1, xi2, xr1, xr2 + ! Local Arrays + logical(lk) :: rswap(4), zswap(4) + integer(ilp) :: ipivot(4,4) + real(qp) :: ci(2,2), civ(4), cr(2,2), crv(4) + ! Intrinsic Functions + intrinsic :: abs,max + ! Equivalences + equivalence ( ci( 1, 1 ), civ( 1 ) ),( cr( 1, 1 ), crv( 1 ) ) + ! Data Statements + zswap = [.false.,.false.,.true.,.true.] + rswap = [.false.,.true.,.false.,.true.] + ipivot = reshape([1,2,3,4,2,1,4,3,3,4,1,2,4,3,2,1],[4,4]) + ! Executable Statements + ! compute bignum + smlnum = two*stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + smini = max( smin, smlnum ) + ! don't check for input errors + info = 0 + ! standard initializations + scale = one + if( na==1 ) then + ! 1 x 1 (i.e., scalar) system c x = b + if( nw==1 ) then + ! real 1x1 system. + ! c = ca a - w d + csr = ca*a( 1, 1 ) - wr*d1 + cnorm = abs( csr ) + ! if | c | < smini, use c = smini + if( cnormone ) then + if( bnorm>bignum*cnorm )scale = one / bnorm + end if + ! compute x + x( 1, 1 ) = ( b( 1, 1 )*scale ) / csr + xnorm = abs( x( 1, 1 ) ) + else + ! complex 1x1 system (w is complex) + ! c = ca a - w d + csr = ca*a( 1, 1 ) - wr*d1 + csi = -wi*d1 + cnorm = abs( csr ) + abs( csi ) + ! if | c | < smini, use c = smini + if( cnormone ) then + if( bnorm>bignum*cnorm )scale = one / bnorm + end if + ! compute x + call stdlib_qladiv( scale*b( 1, 1 ), scale*b( 1, 2 ), csr, csi,x( 1, 1 ), x( 1, & + 2 ) ) + xnorm = abs( x( 1, 1 ) ) + abs( x( 1, 2 ) ) + end if + else + ! 2x2 system + ! compute the realpart of c = ca a - w d (or ca a**t - w d,KIND=qp) + cr( 1, 1 ) = ca*a( 1, 1 ) - wr*d1 + cr( 2, 2 ) = ca*a( 2, 2 ) - wr*d2 + if( ltrans ) then + cr( 1, 2 ) = ca*a( 2, 1 ) + cr( 2, 1 ) = ca*a( 1, 2 ) + else + cr( 2, 1 ) = ca*a( 2, 1 ) + cr( 1, 2 ) = ca*a( 1, 2 ) + end if + if( nw==1 ) then + ! real2x2 system (w is real,KIND=qp) + ! find the largest element in c + cmax = zero + icmax = 0 + do j = 1, 4 + if( abs( crv( j ) )>cmax ) then + cmax = abs( crv( j ) ) + icmax = j + end if + end do + ! if norm(c) < smini, use smini*identity. + if( cmaxone ) then + if( bnorm>bignum*smini )scale = one / bnorm + end if + temp = scale / smini + x( 1, 1 ) = temp*b( 1, 1 ) + x( 2, 1 ) = temp*b( 2, 1 ) + xnorm = temp*bnorm + info = 1 + return + end if + ! gaussian elimination with complete pivoting. + ur11 = crv( icmax ) + cr21 = crv( ipivot( 2, icmax ) ) + ur12 = crv( ipivot( 3, icmax ) ) + cr22 = crv( ipivot( 4, icmax ) ) + ur11r = one / ur11 + lr21 = ur11r*cr21 + ur22 = cr22 - ur12*lr21 + ! if smaller pivot < smini, use smini + if( abs( ur22 )one .and. abs( ur22 )=bignum*abs( ur22 ) )scale = one / bbnd + end if + xr2 = ( br2*scale ) / ur22 + xr1 = ( scale*br1 )*ur11r - xr2*( ur11r*ur12 ) + if( zswap( icmax ) ) then + x( 1, 1 ) = xr2 + x( 2, 1 ) = xr1 + else + x( 1, 1 ) = xr1 + x( 2, 1 ) = xr2 + end if + xnorm = max( abs( xr1 ), abs( xr2 ) ) + ! further scaling if norm(a) norm(x) > overflow + if( xnorm>one .and. cmax>one ) then + if( xnorm>bignum / cmax ) then + temp = cmax / bignum + x( 1, 1 ) = temp*x( 1, 1 ) + x( 2, 1 ) = temp*x( 2, 1 ) + xnorm = temp*xnorm + scale = temp*scale + end if + end if + else + ! complex 2x2 system (w is complex) + ! find the largest element in c + ci( 1, 1 ) = -wi*d1 + ci( 2, 1 ) = zero + ci( 1, 2 ) = zero + ci( 2, 2 ) = -wi*d2 + cmax = zero + icmax = 0 + do j = 1, 4 + if( abs( crv( j ) )+abs( civ( j ) )>cmax ) then + cmax = abs( crv( j ) ) + abs( civ( j ) ) + icmax = j + end if + end do + ! if norm(c) < smini, use smini*identity. + if( cmaxone ) then + if( bnorm>bignum*smini )scale = one / bnorm + end if + temp = scale / smini + x( 1, 1 ) = temp*b( 1, 1 ) + x( 2, 1 ) = temp*b( 2, 1 ) + x( 1, 2 ) = temp*b( 1, 2 ) + x( 2, 2 ) = temp*b( 2, 2 ) + xnorm = temp*bnorm + info = 1 + return + end if + ! gaussian elimination with complete pivoting. + ur11 = crv( icmax ) + ui11 = civ( icmax ) + cr21 = crv( ipivot( 2, icmax ) ) + ci21 = civ( ipivot( 2, icmax ) ) + ur12 = crv( ipivot( 3, icmax ) ) + ui12 = civ( ipivot( 3, icmax ) ) + cr22 = crv( ipivot( 4, icmax ) ) + ci22 = civ( ipivot( 4, icmax ) ) + if( icmax==1 .or. icmax==4 ) then + ! code when off-diagonals of pivoted c are real + if( abs( ur11 )>abs( ui11 ) ) then + temp = ui11 / ur11 + ur11r = one / ( ur11*( one+temp**2 ) ) + ui11r = -temp*ur11r + else + temp = ur11 / ui11 + ui11r = -one / ( ui11*( one+temp**2 ) ) + ur11r = -temp*ui11r + end if + lr21 = cr21*ur11r + li21 = cr21*ui11r + ur12s = ur12*ur11r + ui12s = ur12*ui11r + ur22 = cr22 - ur12*lr21 + ui22 = ci22 - ur12*li21 + else + ! code when diagonals of pivoted c are real + ur11r = one / ur11 + ui11r = zero + lr21 = cr21*ur11r + li21 = ci21*ur11r + ur12s = ur12*ur11r + ui12s = ui12*ur11r + ur22 = cr22 - ur12*lr21 + ui12*li21 + ui22 = -ur12*li21 - ui12*lr21 + end if + u22abs = abs( ur22 ) + abs( ui22 ) + ! if smaller pivot < smini, use smini + if( u22absone .and. u22abs=bignum*u22abs ) then + scale = one / bbnd + br1 = scale*br1 + bi1 = scale*bi1 + br2 = scale*br2 + bi2 = scale*bi2 + end if + end if + call stdlib_qladiv( br2, bi2, ur22, ui22, xr2, xi2 ) + xr1 = ur11r*br1 - ui11r*bi1 - ur12s*xr2 + ui12s*xi2 + xi1 = ui11r*br1 + ur11r*bi1 - ui12s*xr2 - ur12s*xi2 + if( zswap( icmax ) ) then + x( 1, 1 ) = xr2 + x( 2, 1 ) = xr1 + x( 1, 2 ) = xi2 + x( 2, 2 ) = xi1 + else + x( 1, 1 ) = xr1 + x( 2, 1 ) = xr2 + x( 1, 2 ) = xi1 + x( 2, 2 ) = xi2 + end if + xnorm = max( abs( xr1 )+abs( xi1 ), abs( xr2 )+abs( xi2 ) ) + ! further scaling if norm(a) norm(x) > overflow + if( xnorm>one .and. cmax>one ) then + if( xnorm>bignum / cmax ) then + temp = cmax / bignum + x( 1, 1 ) = temp*x( 1, 1 ) + x( 2, 1 ) = temp*x( 2, 1 ) + x( 1, 2 ) = temp*x( 1, 2 ) + x( 2, 2 ) = temp*x( 2, 2 ) + xnorm = temp*xnorm + scale = temp*scale + end if + end if + end if + end if + return + end subroutine stdlib_qlaln2 + + !> DLALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + + pure subroutine stdlib_qlals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: givptr, icompq, k, ldb, ldbx, ldgcol, ldgnum, nl, nr, nrhs,& + sqre + integer(ilp), intent(out) :: info + real(qp), intent(in) :: c, s + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), perm(*) + real(qp), intent(inout) :: b(ldb,*) + real(qp), intent(out) :: bx(ldbx,*), work(*) + real(qp), intent(in) :: difl(*), difr(ldgnum,*), givnum(ldgnum,*), poles(ldgnum,*), z(& + *) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, m, n, nlp1 + real(qp) :: diflj, difrj, dj, dsigj, dsigjp, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( nrhs<1 ) then + info = -5 + else if( ldb DLALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, DLALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by DLALSA. + + pure subroutine stdlib_qlalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, difl,& + difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, work,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldb, ldbx, ldgcol, ldu, n, nrhs, smlsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), givptr(*), k(*), perm(ldgcol,*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: b(ldb,*) + real(qp), intent(out) :: bx(ldbx,*), work(*) + real(qp), intent(in) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), s(& + *), u(ldu,*), vt(ldu,*), z(ldu,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, im1, inode, j, lf, ll, lvl, lvl2, nd, ndb1, ndiml, ndimr, & + nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqre + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n DLALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_qlalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: ldb, n, nrhs, smlsiz + real(qp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: b(ldb,*), d(*), e(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: bx, bxst, c, difl, difr, givcol, givnum, givptr, i, icmpq1, icmpq2, & + iwk, j, k, nlvl, nm1, nsize, nsub, nwork, perm, poles, s, sizei, smlszp, sqre, st, st1,& + u, vt, z + real(qp) :: cs, eps, orgnrm, r, rcnd, sn, tol + ! Intrinsic Functions + intrinsic :: abs,real,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -3 + else if( nrhs<1 ) then + info = -4 + else if( ( ldb<1 ) .or. ( ldb=one ) ) then + rcnd = eps + else + rcnd = rcond + end if + rank = 0 + ! quick return if possible. + if( n==0 ) then + return + else if( n==1 ) then + if( d( 1 )==zero ) then + call stdlib_qlaset( 'A', 1, nrhs, zero, zero, b, ldb ) + else + rank = 1 + call stdlib_qlascl( 'G', 0, 0, d( 1 ), one, 1, nrhs, b, ldb, info ) + d( 1 ) = abs( d( 1 ) ) + end if + return + end if + ! rotate the matrix if it is lower bidiagonal. + if( uplo=='L' ) then + do i = 1, n - 1 + call stdlib_qlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( nrhs==1 ) then + call stdlib_qrot( 1, b( i, 1 ), 1, b( i+1, 1 ), 1, cs, sn ) + else + work( i*2-1 ) = cs + work( i*2 ) = sn + end if + end do + if( nrhs>1 ) then + do i = 1, nrhs + do j = 1, n - 1 + cs = work( j*2-1 ) + sn = work( j*2 ) + call stdlib_qrot( 1, b( j, i ), 1, b( j+1, i ), 1, cs, sn ) + end do + end do + end if + end if + ! scale. + nm1 = n - 1 + orgnrm = stdlib_qlanst( 'M', n, d, e ) + if( orgnrm==zero ) then + call stdlib_qlaset( 'A', n, nrhs, zero, zero, b, ldb ) + return + end if + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, nm1, 1, e, nm1, info ) + ! if n is smaller than the minimum divide size smlsiz, then solve + ! the problem with another solver. + if( n<=smlsiz ) then + nwork = 1 + n*n + call stdlib_qlaset( 'A', n, n, zero, one, work, n ) + call stdlib_qlasdq( 'U', 0, n, n, 0, nrhs, d, e, work, n, work, n, b,ldb, work( & + nwork ), info ) + if( info/=0 ) then + return + end if + tol = rcnd*abs( d( stdlib_iqamax( n, d, 1 ) ) ) + do i = 1, n + if( d( i )<=tol ) then + call stdlib_qlaset( 'A', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + else + call stdlib_qlascl( 'G', 0, 0, d( i ), one, 1, nrhs, b( i, 1 ),ldb, info ) + + rank = rank + 1 + end if + end do + call stdlib_qgemm( 'T', 'N', n, nrhs, n, one, work, n, b, ldb, zero,work( nwork ), & + n ) + call stdlib_qlacpy( 'A', n, nrhs, work( nwork ), n, b, ldb ) + ! unscale. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_qlasrt( 'D', n, d, info ) + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end if + ! book-keeping and setting up some constants. + nlvl = int( log( real( n,KIND=qp) / real( smlsiz+1,KIND=qp) ) / log( two ),KIND=ilp) + & + 1 + smlszp = smlsiz + 1 + u = 1 + vt = 1 + smlsiz*n + difl = vt + smlszp*n + difr = difl + nlvl*n + z = difr + nlvl*n*2 + c = z + nlvl*n + s = c + n + poles = s + n + givnum = poles + 2*nlvl*n + bx = givnum + 2*nlvl*n + nwork = bx + n*nrhs + sizei = 1 + n + k = sizei + n + givptr = k + n + perm = givptr + n + givcol = perm + nlvl*n + iwk = givcol + nlvl*n*2 + st = 1 + sqre = 0 + icmpq1 = 1 + icmpq2 = 0 + nsub = 0 + do i = 1, n + if( abs( d( i ) )=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - st + 1 + iwork( sizei+nsub-1 ) = nsize + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n), which is not solved + ! explicitly. + nsize = i - st + 1 + iwork( sizei+nsub-1 ) = nsize + nsub = nsub + 1 + iwork( nsub ) = n + iwork( sizei+nsub-1 ) = 1 + call stdlib_qcopy( nrhs, b( n, 1 ), ldb, work( bx+nm1 ), n ) + end if + st1 = st - 1 + if( nsize==1 ) then + ! this is a 1-by-1 subproblem and is not solved + ! explicitly. + call stdlib_qcopy( nrhs, b( st, 1 ), ldb, work( bx+st1 ), n ) + else if( nsize<=smlsiz ) then + ! this is a small subproblem and is solved by stdlib_qlasdq. + call stdlib_qlaset( 'A', nsize, nsize, zero, one,work( vt+st1 ), n ) + call stdlib_qlasdq( 'U', 0, nsize, nsize, 0, nrhs, d( st ),e( st ), work( vt+& + st1 ), n, work( nwork ),n, b( st, 1 ), ldb, work( nwork ), info ) + if( info/=0 ) then + return + end if + call stdlib_qlacpy( 'A', nsize, nrhs, b( st, 1 ), ldb,work( bx+st1 ), n ) + + else + ! a large problem. solve it using divide and conquer. + call stdlib_qlasda( icmpq1, smlsiz, nsize, sqre, d( st ),e( st ), work( u+st1 & + ), n, work( vt+st1 ),iwork( k+st1 ), work( difl+st1 ),work( difr+st1 ), work( & + z+st1 ),work( poles+st1 ), iwork( givptr+st1 ),iwork( givcol+st1 ), n, iwork( & + perm+st1 ),work( givnum+st1 ), work( c+st1 ),work( s+st1 ), work( nwork ), & + iwork( iwk ),info ) + if( info/=0 ) then + return + end if + bxst = bx + st1 + call stdlib_qlalsa( icmpq2, smlsiz, nsize, nrhs, b( st, 1 ),ldb, work( bxst ),& + n, work( u+st1 ), n,work( vt+st1 ), iwork( k+st1 ),work( difl+st1 ), work( & + difr+st1 ),work( z+st1 ), work( poles+st1 ),iwork( givptr+st1 ), iwork( & + givcol+st1 ), n,iwork( perm+st1 ), work( givnum+st1 ),work( c+st1 ), work( s+& + st1 ), work( nwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + st = i + 1 + end if + end do loop_60 + ! apply the singular values and treat the tiny ones as zero. + tol = rcnd*abs( d( stdlib_iqamax( n, d, 1 ) ) ) + do i = 1, n + ! some of the elements in d can be negative because 1-by-1 + ! subproblems were not solved explicitly. + if( abs( d( i ) )<=tol ) then + call stdlib_qlaset( 'A', 1, nrhs, zero, zero, work( bx+i-1 ), n ) + else + rank = rank + 1 + call stdlib_qlascl( 'G', 0, 0, d( i ), one, 1, nrhs,work( bx+i-1 ), n, info ) + + end if + d( i ) = abs( d( i ) ) + end do + ! now apply back the right singular vectors. + icmpq2 = 1 + do i = 1, nsub + st = iwork( i ) + st1 = st - 1 + nsize = iwork( sizei+i-1 ) + bxst = bx + st1 + if( nsize==1 ) then + call stdlib_qcopy( nrhs, work( bxst ), n, b( st, 1 ), ldb ) + else if( nsize<=smlsiz ) then + call stdlib_qgemm( 'T', 'N', nsize, nrhs, nsize, one,work( vt+st1 ), n, work( & + bxst ), n, zero,b( st, 1 ), ldb ) + else + call stdlib_qlalsa( icmpq2, smlsiz, nsize, nrhs, work( bxst ), n,b( st, 1 ), ldb,& + work( u+st1 ), n,work( vt+st1 ), iwork( k+st1 ),work( difl+st1 ), work( difr+& + st1 ),work( z+st1 ), work( poles+st1 ),iwork( givptr+st1 ), iwork( givcol+st1 ),& + n,iwork( perm+st1 ), work( givnum+st1 ),work( c+st1 ), work( s+st1 ), work( & + nwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + end do + ! unscale and sort the singular values. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_qlasrt( 'D', n, d, info ) + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end subroutine stdlib_qlalsd + + !> DLAMCH: determines quad precision machine parameters. + + pure real(qp) function stdlib_qlamch( cmach ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: cmach + ! ===================================================================== + + ! Local Scalars + real(qp) :: rnd, eps, sfmin, small, rmach + ! Intrinsic Functions + intrinsic :: digits,epsilon,huge,maxexponent,minexponent,radix,tiny + ! Executable Statements + ! assume rounding, not chopping. always. + rnd = one + if( one==rnd ) then + eps = epsilon(zero) * 0.5 + else + eps = epsilon(zero) + end if + if( stdlib_lsame( cmach, 'E' ) ) then + rmach = eps + else if( stdlib_lsame( cmach, 'S' ) ) then + sfmin = tiny(zero) + small = one / huge(zero) + if( small>=sfmin ) then + ! use small plus a bit, to avoid the possibility of rounding + ! causing overflow when computing 1/sfmin. + sfmin = small*( one+eps ) + end if + rmach = sfmin + else if( stdlib_lsame( cmach, 'B' ) ) then + rmach = radix(zero) + else if( stdlib_lsame( cmach, 'P' ) ) then + rmach = eps * radix(zero) + else if( stdlib_lsame( cmach, 'N' ) ) then + rmach = digits(zero) + else if( stdlib_lsame( cmach, 'R' ) ) then + rmach = rnd + else if( stdlib_lsame( cmach, 'M' ) ) then + rmach = minexponent(zero) + else if( stdlib_lsame( cmach, 'U' ) ) then + rmach = tiny(zero) + else if( stdlib_lsame( cmach, 'L' ) ) then + rmach = maxexponent(zero) + else if( stdlib_lsame( cmach, 'O' ) ) then + rmach = huge(zero) + else + rmach = zero + end if + stdlib_qlamch = rmach + return + end function stdlib_qlamch + + + pure real(qp) function stdlib_qlamc3( a, b ) + ! -- lapack auxiliary routine -- + ! univ. of tennessee, univ. of california berkeley and nag ltd.. + ! Scalar Arguments + real(qp), intent(in) :: a, b + ! ===================================================================== + ! Executable Statements + stdlib_qlamc3 = a + b + return + end function stdlib_qlamc3 + + !> DLAMRG: will create a permutation list which will merge the elements + !> of A (which is composed of two independently sorted sets) into a + !> single set which is sorted in ascending order. + + pure subroutine stdlib_qlamrg( n1, n2, a, dtrd1, dtrd2, index ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dtrd1, dtrd2, n1, n2 + ! Array Arguments + integer(ilp), intent(out) :: index(*) + real(qp), intent(in) :: a(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ind1, ind2, n1sv, n2sv + ! Executable Statements + n1sv = n1 + n2sv = n2 + if( dtrd1>0 ) then + ind1 = 1 + else + ind1 = n1 + end if + if( dtrd2>0 ) then + ind2 = 1 + n1 + else + ind2 = n1 + n2 + end if + i = 1 + ! while ( (n1sv > 0) + 10 continue + if( n1sv>0 .and. n2sv>0 ) then + if( a( ind1 )<=a( ind2 ) ) then + index( i ) = ind1 + i = i + 1 + ind1 = ind1 + dtrd1 + n1sv = n1sv - 1 + else + index( i ) = ind2 + i = i + 1 + ind2 = ind2 + dtrd2 + n2sv = n2sv - 1 + end if + go to 10 + end if + ! end while + if( n1sv==0 ) then + do n1sv = 1, n2sv + index( i ) = ind2 + i = i + 1 + ind2 = ind2 + dtrd2 + end do + else + ! n2sv == 0 + do n2sv = 1, n1sv + index( i ) = ind1 + i = i + 1 + ind1 = ind1 + dtrd1 + end do + end if + return + end subroutine stdlib_qlamrg + + !> DLAMSWLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (DLASWLQ) + + pure subroutine stdlib_qlamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + real(qp), intent(in) :: a(lda,*), t(ldt,*) + real(qp), intent(out) :: work(*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, ctr, lw + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * mb + else + lw = m * mb + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( k<0 ) then + info = -5 + else if( m=max(m,n,k))) then + call stdlib_qgemlqt( side, trans, m, n, k, mb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.tran) then + ! multiply q to the last block of c + kk = mod((m-k),(nb-k)) + ctr = (m-k)/(nb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_qtpmlqt('L','T',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+nb) + ctr = ctr - 1 + call stdlib_qtpmlqt('L','T',nb-k , n, k, 0,mb, a(1,i), lda,t(1, ctr*k+1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:nb) + call stdlib_qgemlqt('L','T',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.notran) then + ! multiply q to the first block of c + kk = mod((m-k),(nb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_qgemlqt('L','N',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (i:i+nb,1:n) + call stdlib_qtpmlqt('L','N',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_qtpmlqt('L','N',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.notran) then + ! multiply q to the last block of c + kk = mod((n-k),(nb-k)) + ctr = (n-k)/(nb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_qtpmlqt('R','N',m , kk, k, 0, mb, a(1, ii), lda,t(1,ctr *k+1), ldt, & + c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_qtpmlqt('R','N', m, nb-k, k, 0, mb, a(1, i), lda,t(1,ctr*k+1), ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_qgemlqt('R','N',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.tran) then + ! multiply q to the first block of c + kk = mod((n-k),(nb-k)) + ctr = 1 + ii=n-kk+1 + call stdlib_qgemlqt('R','T',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_qtpmlqt('R','T',m , nb-k, k, 0,mb, a(1,i), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_qtpmlqt('R','T',m , kk, k, 0,mb, a(1,ii), lda,t(1,ctr*k+1),ldt, c(1,1),& + ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_qlamswlq + + !> DLAMTSQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (DLATSQR) + + pure subroutine stdlib_qlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + real(qp), intent(in) :: a(lda,*), t(ldt,*) + real(qp), intent(out) :: work(*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr, q + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * nb + q = m + else + lw = mb * nb + q = n + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m=max(m,n,k))) then + call stdlib_qgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.notran) then + ! multiply q to the last block of c + kk = mod((m-k),(mb-k)) + ctr = (m-k)/(mb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_qtpmqrt('L','N',kk , n, k, 0, nb, a(ii,1), lda,t(1,ctr*k+1),ldt , c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + ctr = ctr - 1 + call stdlib_qtpmqrt('L','N',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:mb,1:n) + call stdlib_qgemqrt('L','N',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.tran) then + ! multiply q to the first block of c + kk = mod((m-k),(mb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_qgemqrt('L','T',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + call stdlib_qtpmqrt('L','T',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_qtpmqrt('L','T',kk , n, k, 0,nb, a(ii,1), lda,t(1,ctr * k + 1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.tran) then + ! multiply q to the last block of c + kk = mod((n-k),(mb-k)) + ctr = (n-k)/(mb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_qtpmqrt('R','T',m , kk, k, 0, nb, a(ii,1), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_qtpmqrt('R','T',m , mb-k, k, 0,nb, a(i,1), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_qgemqrt('R','T',m , mb, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.notran) then + ! multiply q to the first block of c + kk = mod((n-k),(mb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_qgemqrt('R','N', m, mb , k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_qtpmqrt('R','N', m, mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_qtpmqrt('R','N', m, kk , k, 0,nb, a(ii,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_qlamtsqr + + !> DLANEG: computes the Sturm count, the number of negative pivots + !> encountered while factoring tridiagonal T - sigma I = L D L^T. + !> This implementation works directly on the factors without forming + !> the tridiagonal matrix T. The Sturm count is also the number of + !> eigenvalues of T less than sigma. + !> This routine is called from DLARRB. + !> The current routine does not use the PIVMIN parameter but rather + !> requires IEEE-754 propagation of Infinities and NaNs. This + !> routine also has no input range restrictions but does require + !> default exception handling such that x/0 produces Inf when x is + !> non-zero, and Inf/Inf produces NaN. For more information, see: + !> Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in + !> Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on + !> Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 + !> (Tech report version in LAWN 172 with the same title.) + + pure integer(ilp) function stdlib_qlaneg( n, d, lld, sigma, pivmin, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, r + real(qp), intent(in) :: pivmin, sigma + ! Array Arguments + real(qp), intent(in) :: d(*), lld(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blklen = 128 + + ! some architectures propagate infinities and nans very slowly, so + ! the code computes counts in blklen chunks. then a nan can + ! propagate at most blklen columns before being detected. this is + ! not a general tuning parameter; it needs only to be just large + ! enough that the overhead is tiny in common cases. + + ! Local Scalars + integer(ilp) :: bj, j, neg1, neg2, negcnt + real(qp) :: bsav, dminus, dplus, gamma, p, t, tmp + logical(lk) :: sawnan + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + negcnt = 0 + ! i) upper part: l d l^t - sigma i = l+ d+ l+^t + t = -sigma + loop_210: do bj = 1, r-1, blklen + neg1 = 0 + bsav = t + do j = bj, min(bj+blklen-1, r-1) + dplus = d( j ) + t + if( dplus DLANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + + real(qp) function stdlib_qlangb( norm, n, kl, ku, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: kl, ku, ldab, n + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k, l + real(qp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + temp = abs( ab( i, j ) ) + if( value DLANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real matrix A. + + real(qp) function stdlib_qlange( norm, m, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, m + temp = abs( a( i, j ) ) + if( value DLANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real tridiagonal matrix A. + + pure real(qp) function stdlib_qlangt( norm, n, dl, d, du ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: anorm, scale, sum, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + if( anorm1 ) then + call stdlib_qlassq( n-1, dl, 1, scale, sum ) + call stdlib_qlassq( n-1, du, 1, scale, sum ) + end if + anorm = scale*sqrt( sum ) + end if + stdlib_qlangt = anorm + return + end function stdlib_qlangt + + !> DLANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + + real(qp) function stdlib_qlanhs( norm, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, min( n, j+1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + do j = 1, n + sum = zero + do i = 1, min( n, j+1 ) + sum = sum + abs( a( i, j ) ) + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, min( n, j+1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + do j = 1, n + call stdlib_qlassq( min( n, j+1 ), a( 1, j ), 1, scale, sum ) + end do + value = scale*sqrt( sum ) + end if + stdlib_qlanhs = value + return + end function stdlib_qlanhs + + !> DLANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + + real(qp) function stdlib_qlansb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( ab( k+1, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ab( 1, j ) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_qlassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_qlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + call stdlib_qlassq( n, ab( l, 1 ), ldab, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_qlansb = value + return + end function stdlib_qlansb + + !> DLANSF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A in RFP format. + + real(qp) function stdlib_qlansf( norm, transr, uplo, n, a, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, transr, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: a(0:*) + real(qp), intent(out) :: work(0:*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, ifm, ilu, noe, n1, k, l, lda + real(qp) :: scale, s, value, aa, temp + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + if( n==0 ) then + stdlib_qlansf = zero + return + else if( n==1 ) then + stdlib_qlansf = abs( a(0) ) + return + end if + ! set noe = 1 if n is odd. if n is even set noe=0 + noe = 1 + if( mod( n, 2 )==0 )noe = 0 + ! set ifm = 0 when form='t or 't' and 1 otherwise + ifm = 1 + if( stdlib_lsame( transr, 'T' ) )ifm = 0 + ! set ilu = 0 when uplo='u or 'u' and 1 otherwise + ilu = 1 + if( stdlib_lsame( uplo, 'U' ) )ilu = 0 + ! set lda = (n+1)/2 when ifm = 0 + ! set lda = n when ifm = 1 and noe = 1 + ! set lda = n+1 when ifm = 1 and noe = 0 + if( ifm==1 ) then + if( noe==1 ) then + lda = n + else + ! noe=0 + lda = n + 1 + end if + else + ! ifm=0 + lda = ( n+1 ) / 2 + end if + if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = ( n+1 ) / 2 + value = zero + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is n by k + do j = 0, k - 1 + do i = 0, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + else + ! xpose case; a is k by n + do j = 0, n - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + end if + else + ! n is even + if( ifm==1 ) then + ! a is n+1 by k + do j = 0, k - 1 + do i = 0, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + else + ! xpose case; a is k by n+1 + do j = 0, n + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + if( ifm==1 ) then + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + if( i==k+k )go to 10 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + 10 continue + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu = 1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + if( j>0 ) then + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + end if + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu = 1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + end if + else + ! ifm=0 + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + n1 = k + ! n/2 + k = k + 1 + ! k is the row size and lda + do i = n1, n - 1 + work( i ) = zero + end do + do j = 0, n1 - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,n1+i) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=n1=k-1 is special + s = abs( a( 0+j*lda ) ) + ! a(k-1,k-1) + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,i+n1) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k, n - 1 + s = zero + do i = 0, j - k - 1 + aa = abs( a( i+j*lda ) ) + ! a(i,j-k) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-k + aa = abs( a( i+j*lda ) ) + ! a(j-k,j-k) + s = s + aa + work( j-k ) = work( j-k ) + s + i = i + 1 + s = abs( a( i+j*lda ) ) + ! a(j,j) + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu=1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 2 + ! process + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( a( i+j*lda ) ) + ! i=j so process of a(j,j) + s = s + aa + work( j ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( a( i+j*lda ) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k-1 is special :process col a(k-1,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k, n - 1 + ! process col j of a = a(j,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i+k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=k + aa = abs( a( 0+j*lda ) ) + ! a(k,k) + s = aa + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k,k+i) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k + 1, n - 1 + s = zero + do i = 0, j - 2 - k + aa = abs( a( i+j*lda ) ) + ! a(i,j-k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-1-k + aa = abs( a( i+j*lda ) ) + ! a(j-k-1,j-k-1) + s = s + aa + work( j-k-1 ) = work( j-k-1 ) + s + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,j) + s = aa + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + ! j=n + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(i,k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = work( i ) + s + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + ! j=0 is special :process col a(k:n-1,k) + s = abs( a( 0 ) ) + ! a(k,k) + do i = 1, k - 1 + aa = abs( a( i ) ) + ! a(k+i,k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( k ) = work( k ) + s + do j = 1, k - 1 + ! process + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( a( i+j*lda ) ) + ! i=j-1 so process of a(j-1,j-1) + s = s + aa + work( j-1 ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( a( i+j*lda ) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k is special :process col a(k,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k + 1, n + ! process col j-1 of a = a(j-1,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j-1 ) = work( j-1 ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + end if + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + k = ( n+1 ) / 2 + scale = zero + s = one + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 3 + call stdlib_qlassq( k-j-2, a( k+j+1+j*lda ), 1, scale, s ) + ! l at a(k,0) + end do + do j = 0, k - 1 + call stdlib_qlassq( k+j-1, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k-1, a( k ), lda+1, scale, s ) + ! tri l at a(k,0) + call stdlib_qlassq( k, a( k-1 ), lda+1, scale, s ) + ! tri u at a(k-1,0) + else + ! ilu=1 + do j = 0, k - 1 + call stdlib_qlassq( n-j-1, a( j+1+j*lda ), 1, scale, s ) + ! trap l at a(0,0) + end do + do j = 0, k - 2 + call stdlib_qlassq( j, a( 0+( 1+j )*lda ), 1, scale, s ) + ! u at a(0,1) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k, a( 0 ), lda+1, scale, s ) + ! tri l at a(0,0) + call stdlib_qlassq( k-1, a( 0+lda ), lda+1, scale, s ) + ! tri u at a(0,1) + end if + else + ! a is xpose + if( ilu==0 ) then + ! a**t is upper + do j = 1, k - 2 + call stdlib_qlassq( j, a( 0+( k+j )*lda ), 1, scale, s ) + ! u at a(0,k) + end do + do j = 0, k - 2 + call stdlib_qlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k-1 rect. at a(0,0) + end do + do j = 0, k - 2 + call stdlib_qlassq( k-j-1, a( j+1+( j+k-1 )*lda ), 1,scale, s ) + ! l at a(0,k-1) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k-1, a( 0+k*lda ), lda+1, scale, s ) + ! tri u at a(0,k) + call stdlib_qlassq( k, a( 0+( k-1 )*lda ), lda+1, scale, s ) + ! tri l at a(0,k-1) + else + ! a**t is lower + do j = 1, k - 1 + call stdlib_qlassq( j, a( 0+j*lda ), 1, scale, s ) + ! u at a(0,0) + end do + do j = k, n - 1 + call stdlib_qlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k-1 rect. at a(0,k) + end do + do j = 0, k - 3 + call stdlib_qlassq( k-j-2, a( j+2+j*lda ), 1, scale, s ) + ! l at a(1,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + call stdlib_qlassq( k-1, a( 1 ), lda+1, scale, s ) + ! tri l at a(1,0) + end if + end if + else + ! n is even + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 2 + call stdlib_qlassq( k-j-1, a( k+j+2+j*lda ), 1, scale, s ) + ! l at a(k+1,0) + end do + do j = 0, k - 1 + call stdlib_qlassq( k+j, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k, a( k+1 ), lda+1, scale, s ) + ! tri l at a(k+1,0) + call stdlib_qlassq( k, a( k ), lda+1, scale, s ) + ! tri u at a(k,0) + else + ! ilu=1 + do j = 0, k - 1 + call stdlib_qlassq( n-j-1, a( j+2+j*lda ), 1, scale, s ) + ! trap l at a(1,0) + end do + do j = 1, k - 1 + call stdlib_qlassq( j, a( 0+j*lda ), 1, scale, s ) + ! u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k, a( 1 ), lda+1, scale, s ) + ! tri l at a(1,0) + call stdlib_qlassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + end if + else + ! a is xpose + if( ilu==0 ) then + ! a**t is upper + do j = 1, k - 1 + call stdlib_qlassq( j, a( 0+( k+1+j )*lda ), 1, scale, s ) + ! u at a(0,k+1) + end do + do j = 0, k - 1 + call stdlib_qlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k rect. at a(0,0) + end do + do j = 0, k - 2 + call stdlib_qlassq( k-j-1, a( j+1+( j+k )*lda ), 1, scale,s ) + ! l at a(0,k) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k, a( 0+( k+1 )*lda ), lda+1, scale, s ) + ! tri u at a(0,k+1) + call stdlib_qlassq( k, a( 0+k*lda ), lda+1, scale, s ) + ! tri l at a(0,k) + else + ! a**t is lower + do j = 1, k - 1 + call stdlib_qlassq( j, a( 0+( j+1 )*lda ), 1, scale, s ) + ! u at a(0,1) + end do + do j = k + 1, n + call stdlib_qlassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k rect. at a(0,k+1) + end do + do j = 0, k - 2 + call stdlib_qlassq( k-j-1, a( j+1+j*lda ), 1, scale, s ) + ! l at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_qlassq( k, a( lda ), lda+1, scale, s ) + ! tri l at a(0,1) + call stdlib_qlassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + end if + end if + end if + value = scale*sqrt( s ) + end if + stdlib_qlansf = value + return + end function stdlib_qlansf + + !> DLANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A, supplied in packed form. + + real(qp) function stdlib_qlansp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: ap(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 1 + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + end do + else + k = 1 + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( ap( k ) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ap( k ) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_qlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_qlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( ap( k )/=zero ) then + absa = abs( ap( k ) ) + if( scale DLANST: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric tridiagonal matrix A. + + pure real(qp) function stdlib_qlanst( norm, n, d, e ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: anorm, scale, sum + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + sum = abs( d( i ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + sum = abs( e( i ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + end do + else if( stdlib_lsame( norm, 'O' ) .or. norm=='1' .or.stdlib_lsame( norm, 'I' ) ) & + then + ! find norm1(a). + if( n==1 ) then + anorm = abs( d( 1 ) ) + else + anorm = abs( d( 1 ) )+abs( e( 1 ) ) + sum = abs( e( n-1 ) )+abs( d( n ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + do i = 2, n - 1 + sum = abs( d( i ) )+abs( e( i ) )+abs( e( i-1 ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( n>1 ) then + call stdlib_qlassq( n-1, e, 1, scale, sum ) + sum = 2*sum + end if + call stdlib_qlassq( n, d, 1, scale, sum ) + anorm = scale*sqrt( sum ) + end if + stdlib_qlanst = anorm + return + end function stdlib_qlanst + + !> DLANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A. + + real(qp) function stdlib_qlansy( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( a( j, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( a( j, j ) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_qlassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_qlassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + call stdlib_qlassq( n, a, lda+1, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_qlansy = value + return + end function stdlib_qlansy + + !> DLANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + + real(qp) function stdlib_qlantb( norm, uplo, diag, n, k, ab,ldab, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, l + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = max( k+2-j, 1 ), k + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = max( k+2-j, 1 ), k + 1 + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = 2, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = 1, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = 1 - j + do i = j + 1, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = 1 - j + do i = j, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + end if + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 2, n + call stdlib_qlassq( min( j-1, k ),ab( max( k+2-j, 1 ), j ), 1, scale,& + sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_qlassq( min( j, k+1 ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 1, n - 1 + call stdlib_qlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_qlassq( min( n-j+1, k+1 ), ab( 1, j ), 1, scale,sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_qlantb = value + return + end function stdlib_qlantb + + !> DLANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + + real(qp) function stdlib_qlantp( norm, uplo, diag, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: ap(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, k + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = 1 + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 2 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + k = 1 + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = k, k + j - 2 + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + j - 1 + sum = sum + abs( ap( i ) ) + end do + end if + k = k + j + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = k + 1, k + n - j + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + n - j + sum = sum + abs( ap( i ) ) + end do + end if + k = k + n - j + 1 + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + do i = 1, j - 1 + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + k = k + 1 + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, j + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + k = k + 1 + do i = j + 1, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = j, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + end if + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 2, n + call stdlib_qlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_qlassq( j, ap( k ), 1, scale, sum ) + k = k + j + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 1, n - 1 + call stdlib_qlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_qlassq( n-j+1, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_qlantp = value + return + end function stdlib_qlantp + + !> DLANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + + real(qp) function stdlib_qlantr( norm, uplo, diag, m, n, a, lda,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j-1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j + 1, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( ( udiag ) .and. ( j<=m ) ) then + sum = one + do i = 1, j - 1 + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = 1, min( m, j ) + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = j + 1, m + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = j, m + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, m + work( i ) = one + end do + do j = 1, n + do i = 1, min( m, j-1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = 1, min( m, j ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, min( m, n ) + work( i ) = one + end do + do i = n + 1, m + work( i ) = zero + end do + do j = 1, n + do i = j + 1, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = j, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + end if + value = zero + do i = 1, m + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 2, n + call stdlib_qlassq( min( m, j-1 ), a( 1, j ), 1, scale, sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_qlassq( min( m, j ), a( 1, j ), 1, scale, sum ) + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 1, n + call stdlib_qlassq( m-j, a( min( m, j+1 ), j ), 1, scale,sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_qlassq( m-j+1, a( j, j ), 1, scale, sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_qlantr = value + return + end function stdlib_qlantr + + !> DLANV2: computes the Schur factorization of a real 2-by-2 nonsymmetric + !> matrix in standard form: + !> [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + !> [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + !> where either + !> 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + !> 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + !> conjugate eigenvalues. + + pure subroutine stdlib_qlanv2( a, b, c, d, rt1r, rt1i, rt2r, rt2i, cs, sn ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(inout) :: a, b, c, d + real(qp), intent(out) :: cs, rt1i, rt1r, rt2i, rt2r, sn + ! ===================================================================== + ! Parameters + real(qp), parameter :: multpl = 4.0e+0_qp + + + ! Local Scalars + real(qp) :: aa, bb, bcmax, bcmis, cc, cs1, dd, eps, p, sab, sac, scale, sigma, sn1, & + tau, temp, z, safmin, safmn2, safmx2 + integer(ilp) :: count + ! Intrinsic Functions + intrinsic :: abs,max,min,sign,sqrt + ! Executable Statements + safmin = stdlib_qlamch( 'S' ) + eps = stdlib_qlamch( 'P' ) + safmn2 = stdlib_qlamch( 'B' )**int( log( safmin / eps ) /log( stdlib_qlamch( 'B' ) ) / & + two,KIND=ilp) + safmx2 = one / safmn2 + if( c==zero ) then + cs = one + sn = zero + else if( b==zero ) then + ! swap rows and columns + cs = zero + sn = one + temp = d + d = a + a = temp + b = -c + c = zero + else if( ( a-d )==zero .and. sign( one, b )/=sign( one, c ) )then + cs = one + sn = zero + else + temp = a - d + p = half*temp + bcmax = max( abs( b ), abs( c ) ) + bcmis = min( abs( b ), abs( c ) )*sign( one, b )*sign( one, c ) + scale = max( abs( p ), bcmax ) + z = ( p / scale )*p + ( bcmax / scale )*bcmis + ! if z is of the order of the machine accuracy, postpone the + ! decision on the nature of eigenvalues + if( z>=multpl*eps ) then + ! real eigenvalues. compute a and d. + z = p + sign( sqrt( scale )*sqrt( z ), p ) + a = d + z + d = d - ( bcmax / z )*bcmis + ! compute b and the rotation matrix + tau = stdlib_qlapy2( c, z ) + cs = z / tau + sn = c / tau + b = b - c + c = zero + else + ! complex eigenvalues, or real(almost,KIND=qp) equal eigenvalues. + ! make diagonal elements equal. + count = 0 + sigma = b + c + 10 continue + count = count + 1 + scale = max( abs(temp), abs(sigma) ) + if( scale>=safmx2 ) then + sigma = sigma * safmn2 + temp = temp * safmn2 + if (count <= 20)goto 10 + end if + if( scale<=safmn2 ) then + sigma = sigma * safmx2 + temp = temp * safmx2 + if (count <= 20)goto 10 + end if + p = half*temp + tau = stdlib_qlapy2( sigma, temp ) + cs = sqrt( half*( one+abs( sigma ) / tau ) ) + sn = -( p / ( tau*cs ) )*sign( one, sigma ) + ! compute [ aa bb ] = [ a b ] [ cs -sn ] + ! [ cc dd ] [ c d ] [ sn cs ] + aa = a*cs + b*sn + bb = -a*sn + b*cs + cc = c*cs + d*sn + dd = -c*sn + d*cs + ! compute [ a b ] = [ cs sn ] [ aa bb ] + ! [ c d ] [-sn cs ] [ cc dd ] + a = aa*cs + cc*sn + b = bb*cs + dd*sn + c = -aa*sn + cc*cs + d = -bb*sn + dd*cs + temp = half*( a+d ) + a = temp + d = temp + if( c/=zero ) then + if( b/=zero ) then + if( sign( one, b )==sign( one, c ) ) then + ! real eigenvalues: reduce to upper triangular form + sab = sqrt( abs( b ) ) + sac = sqrt( abs( c ) ) + p = sign( sab*sac, c ) + tau = one / sqrt( abs( b+c ) ) + a = temp + p + d = temp - p + b = b - c + c = zero + cs1 = sab*tau + sn1 = sac*tau + temp = cs*cs1 - sn*sn1 + sn = cs*sn1 + sn*cs1 + cs = temp + end if + else + b = -c + c = zero + temp = cs + cs = -sn + sn = temp + end if + end if + end if + end if + ! store eigenvalues in (rt1r,rt1i) and (rt2r,rt2i). + rt1r = a + rt2r = d + if( c==zero ) then + rt1i = zero + rt2i = zero + else + rt1i = sqrt( abs( b ) )*sqrt( abs( c ) ) + rt2i = -rt1i + end if + return + end subroutine stdlib_qlanv2 + + !> DLAORHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine DORHR_COL. In DORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine DLAORHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + + pure subroutine stdlib_qlaorhr_col_getrfnp( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_qlaorhr_col_getrfnp2( m, n, a, lda, d, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks. + call stdlib_qlaorhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo ) + + if( j+jb<=n ) then + ! compute block row of u. + call stdlib_qtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, & + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_qlaorhr_col_getrfnp + + !> DLAORHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine DORHR_COL. In DORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine DLAORHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2 + !> is self-sufficient and can be used without DLAORHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + + pure recursive subroutine stdlib_qlaorhr_col_getrfnp2( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: sfmin + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: abs,sign,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_qscal( m-1, one / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 2, m + a( i, 1 ) = a( i, 1 ) / a( 1, 1 ) + end do + end if + else + ! divide the matrix b into four submatrices + n1 = min( m, n ) / 2 + n2 = n-n1 + ! factor b11, recursive call + call stdlib_qlaorhr_col_getrfnp2( n1, n1, a, lda, d, iinfo ) + ! solve for b21 + call stdlib_qtrsm( 'R', 'U', 'N', 'N', m-n1, n1, one, a, lda,a( n1+1, 1 ), lda ) + + ! solve for b12 + call stdlib_qtrsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1, n1+1 ), lda ) + + ! update b22, i.e. compute the schur complement + ! b22 := b22 - b21*b12 + call stdlib_qgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, one, a( n1+1, n1+1 ), lda ) + ! factor b22, recursive call + call stdlib_qlaorhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo ) + + end if + return + end subroutine stdlib_qlaorhr_col_getrfnp2 + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + + pure subroutine stdlib_qlapll( n, x, incx, y, incy, ssmin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(qp), intent(out) :: ssmin + ! Array Arguments + real(qp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: a11, a12, a22, c, ssmax, tau + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + ssmin = zero + return + end if + ! compute the qr factorization of the n-by-2 matrix ( x y ) + call stdlib_qlarfg( n, x( 1 ), x( 1+incx ), incx, tau ) + a11 = x( 1 ) + x( 1 ) = one + c = -tau*stdlib_qdot( n, x, incx, y, incy ) + call stdlib_qaxpy( n, c, x, incx, y, incy ) + call stdlib_qlarfg( n-1, y( 1+incy ), y( 1+2*incy ), incy, tau ) + a12 = y( 1 ) + a22 = y( 1+incy ) + ! compute the svd of 2-by-2 upper triangular matrix. + call stdlib_qlas2( a11, a12, a22, ssmin, ssmax ) + return + end subroutine stdlib_qlapll + + !> DLAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + + pure subroutine stdlib_qlapmr( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + real(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, in, j, jj + real(qp) :: temp + ! Executable Statements + if( m<=1 )return + do i = 1, m + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, m + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do jj = 1, n + temp = x( j, jj ) + x( j, jj ) = x( in, jj ) + x( in, jj ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, m + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do jj = 1, n + temp = x( i, jj ) + x( i, jj ) = x( j, jj ) + x( j, jj ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_qlapmr + + !> DLAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + + pure subroutine stdlib_qlapmt( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + real(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ii, in, j + real(qp) :: temp + ! Executable Statements + if( n<=1 )return + do i = 1, n + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, n + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do ii = 1, m + temp = x( ii, j ) + x( ii, j ) = x( ii, in ) + x( ii, in ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, n + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do ii = 1, m + temp = x( ii, i ) + x( ii, i ) = x( ii, j ) + x( ii, j ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_qlapmt + + !> DLAPY2: returns sqrt(x**2+y**2), taking care not to cause unnecessary + !> overflow and unnecessary underflow. + + pure real(qp) function stdlib_qlapy2( x, y ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: x, y + ! ===================================================================== + + + ! Local Scalars + real(qp) :: w, xabs, yabs, z, hugeval + logical(lk) :: x_is_nan, y_is_nan + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + x_is_nan = stdlib_qisnan( x ) + y_is_nan = stdlib_qisnan( y ) + if ( x_is_nan ) stdlib_qlapy2 = x + if ( y_is_nan ) stdlib_qlapy2 = y + hugeval = stdlib_qlamch( 'OVERFLOW' ) + if ( .not.( x_is_nan.or.y_is_nan ) ) then + xabs = abs( x ) + yabs = abs( y ) + w = max( xabs, yabs ) + z = min( xabs, yabs ) + if( z==zero .or. w>hugeval ) then + stdlib_qlapy2 = w + else + stdlib_qlapy2 = w*sqrt( one+( z / w )**2 ) + end if + end if + return + end function stdlib_qlapy2 + + !> DLAPY3: returns sqrt(x**2+y**2+z**2), taking care not to cause + !> unnecessary overflow and unnecessary underflow. + + pure real(qp) function stdlib_qlapy3( x, y, z ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: x, y, z + ! ===================================================================== + + ! Local Scalars + real(qp) :: w, xabs, yabs, zabs, hugeval + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + hugeval = stdlib_qlamch( 'OVERFLOW' ) + xabs = abs( x ) + yabs = abs( y ) + zabs = abs( z ) + w = max( xabs, yabs, zabs ) + if( w==zero .or. w>hugeval ) then + ! w can be zero for max(0,nan,0) + ! adding all three entries together will make sure + ! nan will not disappear. + stdlib_qlapy3 = xabs + yabs + zabs + else + stdlib_qlapy3 = w*sqrt( ( xabs / w )**2+( yabs / w )**2+( zabs / w )**2 ) + end if + return + end function stdlib_qlapy3 + + !> DLAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + + pure subroutine stdlib_qlaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(qp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(inout) :: ab(ldab,*) + real(qp), intent(in) :: c(*), r(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*ab( ku+1+i-j, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_qlaqgb + + !> DLAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + + pure subroutine stdlib_qlaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: c(*), r(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*a( i, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = 1, m + a( i, j ) = r( i )*a( i, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*r( i )*a( i, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_qlaqge + + !> DLAQP2: computes a QR factorization with column pivoting of + !> the block A(OFFSET+1:M,1:N). + !> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_qlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, m, n, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(inout) :: a(lda,*), vn1(*), vn2(*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, itemp, j, mn, offpi, pvt + real(qp) :: aii, temp, temp2, tol3z + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + mn = min( m-offset, n ) + tol3z = sqrt(stdlib_qlamch('EPSILON')) + ! compute factorization. + loop_20: do i = 1, mn + offpi = offset + i + ! determine ith pivot column and swap if necessary. + pvt = ( i-1 ) + stdlib_iqamax( n-i+1, vn1( i ), 1 ) + if( pvt/=i ) then + call stdlib_qswap( m, a( 1, pvt ), 1, a( 1, i ), 1 ) + itemp = jpvt( pvt ) + jpvt( pvt ) = jpvt( i ) + jpvt( i ) = itemp + vn1( pvt ) = vn1( i ) + vn2( pvt ) = vn2( i ) + end if + ! generate elementary reflector h(i). + if( offpi DLAQPS: computes a step of QR factorization with column pivoting + !> of a real M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_qlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda, ldf, m, n, nb, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*) + real(qp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: itemp, j, k, lastrk, lsticc, pvt, rk + real(qp) :: akk, temp, temp2, tol3z + ! Intrinsic Functions + intrinsic :: abs,real,max,min,nint,sqrt + ! Executable Statements + lastrk = min( m, n+offset ) + lsticc = 0 + k = 0 + tol3z = sqrt(stdlib_qlamch('EPSILON')) + ! beginning of while loop. + 10 continue + if( ( k1 ) then + call stdlib_qgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1 ),lda, f( k, 1 ), & + ldf, one, a( rk, k ), 1 ) + end if + ! generate elementary reflector h(k). + if( rk1 ) then + call stdlib_qgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1 ),lda, a( rk, k & + ), 1, zero, auxv( 1 ), 1 ) + call stdlib_qgemv( 'NO TRANSPOSE', n, k-1, one, f( 1, 1 ), ldf,auxv( 1 ), 1, one,& + f( 1, k ), 1 ) + end if + ! update the current row of a: + ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t. + if( k0 ) then + itemp = nint( vn2( lsticc ),KIND=ilp) + vn1( lsticc ) = stdlib_qnrm2( m-rk, a( rk+1, lsticc ), 1 ) + ! note: the computation of vn1( lsticc ) relies on the fact that + ! stdlib_dnrm2 does not fail on vectors with norm below the value of + ! sqrt(stdlib_qlamch('s')) + vn2( lsticc ) = vn1( lsticc ) + lsticc = itemp + go to 40 + end if + return + end subroutine stdlib_qlaqps + + !> DLAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_qlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), z(ldz,*) + real(qp), intent(out) :: wi(*), work(*), wr(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(qp), parameter :: wilk1 = 0.75_qp + real(qp), parameter :: wilk2 = -0.4375_qp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_qlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constants wilk1 and wilk2 are used to form the + ! . exceptional shifts. ==== + + + ! Local Scalars + real(qp) :: aa, bb, cc, cs, dd, sn, ss, swap + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + real(qp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,mod + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = one + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_qlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_qlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & + ihiz, z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_qlaqr3 ==== + call stdlib_qlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_qlaqr5, stdlib_qlaqr3) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=qp) + return + end if + ! ==== stdlib_qlahqr/stdlib_qlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_80: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_qlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, wr, wi, h( kv, 1 ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh,& + work, lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_qlaqr3 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_qlaqr3 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, max( ks+1, ktop+2 ), -2 + ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + aa = wilk1*ss + h( i, i ) + bb = ss + cc = wilk2*ss + dd = aa + call stdlib_qlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & + ), cs, sn ) + end do + if( ks==ktop ) then + wr( ks+1 ) = h( ks+1, ks+1 ) + wi( ks+1 ) = zero + wr( ks ) = wr( ks+1 ) + wi( ks ) = wi( ks+1 ) + end if + else + ! ==== got ns/2 or fewer shifts? use stdlib_qlaqr4 or + ! . stdlib_qlahqr on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_qlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + if( ns>nmin ) then + call stdlib_qlaqr4( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( & + ks ),wi( ks ), 1, 1, zdum, 1, work,lwork, inf ) + else + call stdlib_qlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( & + ks ),wi( ks ), 1, 1, zdum, 1, inf ) + end if + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. ==== + if( ks>=kbot ) then + aa = h( kbot-1, kbot-1 ) + cc = h( kbot, kbot-1 ) + bb = h( kbot-1, kbot ) + dd = h( kbot, kbot ) + call stdlib_qlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & + kbot ),wi( kbot ), cs, sn ) + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) + ! . bubble sort keeps complex conjugate + ! . pairs together. ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( abs( wr( i ) )+abs( wi( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_80 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 90 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = real( lwkopt,KIND=qp) + end subroutine stdlib_qlaqr0 + + !> Given a 2-by-2 or 3-by-3 matrix H, DLAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) + !> scaling to avoid overflows and most underflows. It + !> is assumed that either + !> 1) sr1 = sr2 and si1 = -si2 + !> or + !> 2) si1 = si2 = 0. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + + pure subroutine stdlib_qlaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: si1, si2, sr1, sr2 + integer(ilp), intent(in) :: ldh, n + ! Array Arguments + real(qp), intent(in) :: h(ldh,*) + real(qp), intent(out) :: v(*) + ! ================================================================ + + ! Local Scalars + real(qp) :: h21s, h31s, s + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n/=2 .and. n/=3 ) then + return + end if + if( n==2 ) then + s = abs( h( 1, 1 )-sr2 ) + abs( si2 ) + abs( h( 2, 1 ) ) + if( s==zero ) then + v( 1 ) = zero + v( 2 ) = zero + else + h21s = h( 2, 1 ) / s + v( 1 ) = h21s*h( 1, 2 ) + ( h( 1, 1 )-sr1 )*( ( h( 1, 1 )-sr2 ) / s ) - si1*( & + si2 / s ) + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-sr1-sr2 ) + end if + else + s = abs( h( 1, 1 )-sr2 ) + abs( si2 ) + abs( h( 2, 1 ) ) +abs( h( 3, 1 ) ) + if( s==zero ) then + v( 1 ) = zero + v( 2 ) = zero + v( 3 ) = zero + else + h21s = h( 2, 1 ) / s + h31s = h( 3, 1 ) / s + v( 1 ) = ( h( 1, 1 )-sr1 )*( ( h( 1, 1 )-sr2 ) / s ) -si1*( si2 / s ) + h( 1, 2 )& + *h21s + h( 1, 3 )*h31s + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-sr1-sr2 ) +h( 2, 3 )*h31s + v( 3 ) = h31s*( h( 1, 1 )+h( 3, 3 )-sr1-sr2 ) +h21s*h( 3, 2 ) + end if + end if + end subroutine stdlib_qlaqr1 + + !> DLAQR2: is identical to DLAQR3 except that it avoids + !> recursion by calling DLAHQR instead of DLAQR4. + !> Aggressive early deflation: + !> This subroutine accepts as input an upper Hessenberg matrix + !> H and performs an orthogonal similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an orthogonal similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + subroutine stdlib_qlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& + sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), z(ldz,*) + real(qp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(qp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & + tau, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & + ltop, lwk1, lwk2, lwkopt + logical(lk) :: bulge, sorted + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,sqrt + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_qgehrd ==== + call stdlib_qgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_qormhr ==== + call stdlib_qormhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = jw + max( lwk1, lwk2 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=qp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = one + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = zero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sr( kwtop ) = h( kwtop, kwtop ) + si( kwtop ) = zero + ns = 1 + nd = 0 + if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero + end if + work( 1 ) = one + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_qlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_qcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_qlaset( 'A', jw, jw, zero, one, v, ldv ) + call stdlib_qlahqr( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, jw, & + v, ldv, infqr ) + ! ==== stdlib_qtrexc needs a clean margin near the diagonal ==== + do j = 1, jw - 3 + t( j+2, j ) = zero + t( j+3, j ) = zero + end do + if( jw>2 )t( jw, jw-2 ) = zero + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + 20 continue + if( ilst<=ns ) then + if( ns==1 ) then + bulge = .false. + else + bulge = t( ns, ns-1 )/=zero + end if + ! ==== small spike tip test for deflation ==== + if( .not.bulge ) then + ! ==== real eigenvalue ==== + foo = abs( t( ns, ns ) ) + if( foo==zero )foo = abs( s ) + if( abs( s*v( 1, ns ) )<=max( smlnum, ulp*foo ) ) then + ! ==== deflatable ==== + ns = ns - 1 + else + ! ==== undeflatable. move it up out of the way. + ! . (stdlib_qtrexc can not fail in this case.) ==== + ifst = ns + call stdlib_qtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 1 + end if + else + ! ==== complex conjugate pair ==== + foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & + ) ) + if( foo==zero )foo = abs( s ) + if( max( abs( s*v( 1, ns ) ), abs( s*v( 1, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & + then + ! ==== deflatable ==== + ns = ns - 2 + else + ! ==== undeflatable. move them up out of the way. + ! . fortunately, stdlib_qtrexc does the right thing with + ! . ilst in case of a rare exchange failure. ==== + ifst = ns + call stdlib_qtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 2 + end if + end if + ! ==== end deflation detection loop ==== + go to 20 + end if + ! ==== return to hessenberg form ==== + if( ns==0 )s = zero + if( ns=evk ) then + i = k + else + sorted = .false. + ifst = i + ilst = k + call stdlib_qtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + if( info==0 ) then + i = ilst + else + i = k + end if + end if + if( i==kend ) then + k = i + 1 + else if( t( i+1, i )==zero ) then + k = i + 1 + else + k = i + 2 + end if + go to 40 + end if + go to 30 + 50 continue + end if + ! ==== restore shift/eigenvalue array from t ==== + i = jw + 60 continue + if( i>=infqr+1 ) then + if( i==infqr+1 ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else if( t( i, i-1 )==zero ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else + aa = t( i-1, i-1 ) + cc = t( i, i-1 ) + bb = t( i-1, i ) + dd = t( i, i ) + call stdlib_qlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& + 1 ),si( kwtop+i-1 ), cs, sn ) + i = i - 2 + end if + go to 60 + end if + if( ns1 .and. s/=zero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_qcopy( ns, v, ldv, work, 1 ) + beta = work( 1 ) + call stdlib_qlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = one + call stdlib_qlaset( 'L', jw-2, jw-2, zero, zero, t( 3, 1 ), ldt ) + call stdlib_qlarf( 'L', ns, jw, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_qlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_qlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_qgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*v( 1, 1 ) + call stdlib_qlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_qcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=zero )call stdlib_qormhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_qgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & + zero, wv, ldwv ) + call stdlib_qlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_qgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & + zero, t, ldt ) + call stdlib_qlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_qgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & + zero, wv, ldwv ) + call stdlib_qlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = real( lwkopt,KIND=qp) + end subroutine stdlib_qlaqr2 + + !> Aggressive early deflation: + !> DLAQR3: accepts as input an upper Hessenberg matrix + !> H and performs an orthogonal similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an orthogonal similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + subroutine stdlib_qlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& + sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), z(ldz,*) + real(qp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(qp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & + tau, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & + ltop, lwk1, lwk2, lwk3, lwkopt, nmin + logical(lk) :: bulge, sorted + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,sqrt + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_qgehrd ==== + call stdlib_qgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_qormhr ==== + call stdlib_qormhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_qlaqr4 ==== + call stdlib_qlaqr4( .true., .true., jw, 1, jw, t, ldt, sr, si, 1, jw,v, ldv, work, -& + 1, infqr ) + lwk3 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=qp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = one + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = zero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sr( kwtop ) = h( kwtop, kwtop ) + si( kwtop ) = zero + ns = 1 + nd = 0 + if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero + end if + work( 1 ) = one + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_qlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_qcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_qlaset( 'A', jw, jw, zero, one, v, ldv ) + nmin = stdlib_ilaenv( 12, 'DLAQR3', 'SV', jw, 1, jw, lwork ) + if( jw>nmin ) then + call stdlib_qlaqr4( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, & + jw, v, ldv, work, lwork, infqr ) + else + call stdlib_qlahqr( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, & + jw, v, ldv, infqr ) + end if + ! ==== stdlib_qtrexc needs a clean margin near the diagonal ==== + do j = 1, jw - 3 + t( j+2, j ) = zero + t( j+3, j ) = zero + end do + if( jw>2 )t( jw, jw-2 ) = zero + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + 20 continue + if( ilst<=ns ) then + if( ns==1 ) then + bulge = .false. + else + bulge = t( ns, ns-1 )/=zero + end if + ! ==== small spike tip test for deflation ==== + if( .not. bulge ) then + ! ==== real eigenvalue ==== + foo = abs( t( ns, ns ) ) + if( foo==zero )foo = abs( s ) + if( abs( s*v( 1, ns ) )<=max( smlnum, ulp*foo ) ) then + ! ==== deflatable ==== + ns = ns - 1 + else + ! ==== undeflatable. move it up out of the way. + ! . (stdlib_qtrexc can not fail in this case.) ==== + ifst = ns + call stdlib_qtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 1 + end if + else + ! ==== complex conjugate pair ==== + foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & + ) ) + if( foo==zero )foo = abs( s ) + if( max( abs( s*v( 1, ns ) ), abs( s*v( 1, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & + then + ! ==== deflatable ==== + ns = ns - 2 + else + ! ==== undeflatable. move them up out of the way. + ! . fortunately, stdlib_qtrexc does the right thing with + ! . ilst in case of a rare exchange failure. ==== + ifst = ns + call stdlib_qtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 2 + end if + end if + ! ==== end deflation detection loop ==== + go to 20 + end if + ! ==== return to hessenberg form ==== + if( ns==0 )s = zero + if( ns=evk ) then + i = k + else + sorted = .false. + ifst = i + ilst = k + call stdlib_qtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + if( info==0 ) then + i = ilst + else + i = k + end if + end if + if( i==kend ) then + k = i + 1 + else if( t( i+1, i )==zero ) then + k = i + 1 + else + k = i + 2 + end if + go to 40 + end if + go to 30 + 50 continue + end if + ! ==== restore shift/eigenvalue array from t ==== + i = jw + 60 continue + if( i>=infqr+1 ) then + if( i==infqr+1 ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else if( t( i, i-1 )==zero ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else + aa = t( i-1, i-1 ) + cc = t( i, i-1 ) + bb = t( i-1, i ) + dd = t( i, i ) + call stdlib_qlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& + 1 ),si( kwtop+i-1 ), cs, sn ) + i = i - 2 + end if + go to 60 + end if + if( ns1 .and. s/=zero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_qcopy( ns, v, ldv, work, 1 ) + beta = work( 1 ) + call stdlib_qlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = one + call stdlib_qlaset( 'L', jw-2, jw-2, zero, zero, t( 3, 1 ), ldt ) + call stdlib_qlarf( 'L', ns, jw, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_qlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_qlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_qgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*v( 1, 1 ) + call stdlib_qlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_qcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=zero )call stdlib_qormhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_qgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & + zero, wv, ldwv ) + call stdlib_qlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_qgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & + zero, t, ldt ) + call stdlib_qlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_qgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & + zero, wv, ldwv ) + call stdlib_qlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = real( lwkopt,KIND=qp) + end subroutine stdlib_qlaqr3 + + !> DLAQR4: implements one level of recursion for DLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by DLAQR0 and, for large enough + !> deflation window size, it may be called by DLAQR3. This + !> subroutine is identical to DLAQR0 except that it calls DLAQR2 + !> instead of DLAQR3. + !> DLAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_qlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), z(ldz,*) + real(qp), intent(out) :: wi(*), work(*), wr(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(qp), parameter :: wilk1 = 0.75_qp + real(qp), parameter :: wilk2 = -0.4375_qp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_qlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constants wilk1 and wilk2 are used to form the + ! . exceptional shifts. ==== + + + ! Local Scalars + real(qp) :: aa, bb, cc, cs, dd, sn, ss, swap + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + real(qp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,int,max,min,mod + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = one + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_qlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_qlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & + ihiz, z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_qlaqr2 ==== + call stdlib_qlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_qlaqr5, stdlib_qlaqr2) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=qp) + return + end if + ! ==== stdlib_qlahqr/stdlib_qlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_80: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_qlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, wr, wi, h( kv, 1 ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh,& + work, lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_qlaqr2 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_qlaqr2 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, max( ks+1, ktop+2 ), -2 + ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + aa = wilk1*ss + h( i, i ) + bb = ss + cc = wilk2*ss + dd = aa + call stdlib_qlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & + ), cs, sn ) + end do + if( ks==ktop ) then + wr( ks+1 ) = h( ks+1, ks+1 ) + wi( ks+1 ) = zero + wr( ks ) = wr( ks+1 ) + wi( ks ) = wi( ks+1 ) + end if + else + ! ==== got ns/2 or fewer shifts? use stdlib_qlahqr + ! . on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_qlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + call stdlib_qlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( ks & + ), wi( ks ),1, 1, zdum, 1, inf ) + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. ==== + if( ks>=kbot ) then + aa = h( kbot-1, kbot-1 ) + cc = h( kbot, kbot-1 ) + bb = h( kbot-1, kbot ) + dd = h( kbot, kbot ) + call stdlib_qlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & + kbot ),wi( kbot ), cs, sn ) + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) + ! . bubble sort keeps complex conjugate + ! . pairs together. ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( abs( wr( i ) )+abs( wi( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_80 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 90 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = real( lwkopt,KIND=qp) + end subroutine stdlib_qlaqr4 + + !> DLAQR5:, called by DLAQR0, performs a + !> single small-bulge multi-shift QR sweep. + + pure subroutine stdlib_qlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, & + iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & + ldz, n, nh, nshfts, nv + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(qp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*) + real(qp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(qp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,& + tst1, tst2, ulp + integer(ilp) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, & + krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu + logical(lk) :: accum, bmp22 + ! Intrinsic Functions + intrinsic :: abs,real,max,min,mod + ! Local Arrays + real(qp) :: vt(3) + ! Executable Statements + ! ==== if there are no shifts, then there is nothing to do. ==== + if( nshfts<2 )return + ! ==== if the active block is empty or 1-by-1, then there + ! . is nothing to do. ==== + if( ktop>=kbot )return + ! ==== shuffle shifts into pairs of real shifts and pairs + ! . of complex conjugate shifts assuming complex + ! . conjugate shifts are already adjacent to one + ! . another. ==== + do i = 1, nshfts - 2, 2 + if( si( i )/=-si( i+1 ) ) then + swap = sr( i ) + sr( i ) = sr( i+1 ) + sr( i+1 ) = sr( i+2 ) + sr( i+2 ) = swap + swap = si( i ) + si( i ) = si( i+1 ) + si( i+1 ) = si( i+2 ) + si( i+2 ) = swap + end if + end do + ! ==== nshfts is supposed to be even, but if it is odd, + ! . then simply reduce it by one. the shuffle above + ! . ensures that the dropped shift is real and that + ! . the remaining shifts are paired. ==== + ns = nshfts - mod( nshfts, 2 ) + ! ==== machine constants for deflation ==== + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp) / ulp ) + ! ==== use accumulated reflections to update far-from-diagonal + ! . entries ? ==== + accum = ( kacc22==1 ) .or. ( kacc22==2 ) + ! ==== clear trash ==== + if( ktop+2<=kbot )h( ktop+2, ktop ) = zero + ! ==== nbmps = number of 2-shift bulges in the chain ==== + nbmps = ns / 2 + ! ==== kdu = width of slab ==== + kdu = 4*nbmps + ! ==== create and chase chains of nbmps bulges ==== + loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps + ! jtop = index from which updates from the right start. + if( accum ) then + jtop = max( ktop, incol ) + else if( wantt ) then + jtop = 1 + else + jtop = ktop + end if + ndcol = incol + kdu + if( accum )call stdlib_qlaset( 'ALL', kdu, kdu, zero, one, u, ldu ) + ! ==== near-the-diagonal bulge chase. the following loop + ! . performs the near-the-diagonal part of a small bulge + ! . multi-shift qr sweep. each 4*nbmps column diagonal + ! . chunk extends from column incol to column ndcol + ! . (including both column incol and column ndcol). the + ! . following loop chases a 2*nbmps+1 column long chain of + ! . nbmps bulges 2*nbmps columns to the right. (incol + ! . may be less than ktop and and ndcol may be greater than + ! . kbot indicating phantom columns from which to chase + ! . bulges before they are actually introduced or to which + ! . to chase bulges beyond column kbot.) ==== + loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) + ! ==== bulges number mtop to mbot are active double implicit + ! . shift bulges. there may or may not also be small + ! . 2-by-2 bulge, if there is room. the inactive bulges + ! . (if any) must wait until the active bulges have moved + ! . down the diagonal to make room. the phantom matrix + ! . paradigm described above helps keep track. ==== + mtop = max( 1, ( ktop-krcol ) / 2+1 ) + mbot = min( nbmps, ( kbot-krcol-1 ) / 2 ) + m22 = mbot + 1 + bmp22 = ( mbot=ktop ) then + if( h( k+1, k )/=zero ) then + tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) + if( tst1==zero ) then + if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) + end if + if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then + h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) + h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) + h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & + then + h( k+1, k ) = zero + end if + end if + end if + end if + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + kms = k - incol + do j = max( 1, ktop-incol ), kdu + refsum = v( 1, m22 )*( u( j, kms+1 )+v( 2, m22 )*u( j, kms+2 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m22 ) + end do + else if( wantz ) then + do j = iloz, ihiz + refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*z( j, k+2 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 ) + end do + end if + end if + ! ==== normal case: chain of 3-by-3 reflections ==== + loop_80: do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + if( k==ktop-1 ) then + call stdlib_qlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),si( 2*m-1 ), sr( & + 2*m ), si( 2*m ),v( 1, m ) ) + alpha = v( 1, m ) + call stdlib_qlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) ) + else + ! ==== perform delayed transformation of row below + ! . mth bulge. exploit fact that first two elements + ! . of row are actually zero. ==== + refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 ) + h( k+3, k ) = -refsum + h( k+3, k+1 ) = -refsum*v( 2, m ) + h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m ) + ! ==== calculate reflection to move + ! . mth bulge one step. ==== + beta = h( k+1, k ) + v( 2, m ) = h( k+2, k ) + v( 3, m ) = h( k+3, k ) + call stdlib_qlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) ) + ! ==== a bulge may collapse because of vigilant + ! . deflation or destructive underflow. in the + ! . underflow case, try the two-small-subdiagonals + ! . trick to try to reinflate the bulge. ==== + if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) & + then + ! ==== typical case: not collapsed (yet). ==== + h( k+1, k ) = beta + h( k+2, k ) = zero + h( k+3, k ) = zero + else + ! ==== atypical case: collapsed. attempt to + ! . reintroduce ignoring h(k+1,k) and h(k+2,k). + ! . if the fill resulting from the new + ! . reflector is too large, then abandon it. + ! . otherwise, use the new one. ==== + call stdlib_qlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),si( 2*m-1 ), sr( & + 2*m ), si( 2*m ),vt ) + alpha = vt( 1 ) + call stdlib_qlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) ) + refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*h( k+2, k ) ) + if( abs( h( k+2, k )-refsum*vt( 2 ) )+abs( refsum*vt( 3 ) )>ulp*( abs( & + h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then + ! ==== starting a new bulge here would + ! . create non-negligible fill. use + ! . the old one with trepidation. ==== + h( k+1, k ) = beta + h( k+2, k ) = zero + h( k+3, k ) = zero + else + ! ==== starting a new bulge here would + ! . create only negligible fill. + ! . replace the old reflector with + ! . the new one. ==== + h( k+1, k ) = h( k+1, k ) - refsum + h( k+2, k ) = zero + h( k+3, k ) = zero + v( 1, m ) = vt( 1 ) + v( 2, m ) = vt( 2 ) + v( 3, m ) = vt( 3 ) + end if + end if + end if + ! ==== apply reflection from the right and + ! . the first column of update from the left. + ! . these updates are required for the vigilant + ! . deflation check. we still delay most of the + ! . updates from the left for efficiency. ==== + do j = jtop, min( kbot, k+3 ) + refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*h( j, k+2 )+v( 3, m )*h( j, k+3 & + ) ) + h( j, k+1 ) = h( j, k+1 ) - refsum + h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m ) + h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m ) + end do + ! ==== perform update from left for subsequent + ! . column. ==== + refsum = v( 1, m )*( h( k+1, k+1 )+v( 2, m )*h( k+2, k+1 )+v( 3, m )*h( k+3, & + k+1 ) ) + h( k+1, k+1 ) = h( k+1, k+1 ) - refsum + h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m ) + h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m ) + ! ==== the following convergence test requires that + ! . the tradition small-compared-to-nearby-diagonals + ! . criterion and the ahues + ! . criteria both be satisfied. the latter improves + ! . accuracy in some examples. falling back on an + ! . alternate convergence criterion when tst1 or tst2 + ! . is zero (as done here) is traditional but probably + ! . unnecessary. ==== + if( k=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) + end if + if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) + h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) + h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & + then + h( k+1, k ) = zero + end if + end if + end if + end do loop_80 + ! ==== multiply h by reflections from the left ==== + if( accum ) then + jbot = min( ndcol, kbot ) + else if( wantt ) then + jbot = n + else + jbot = kbot + end if + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = max( ktop, krcol + 2*m ), jbot + refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*h( k+2, j )+v( 3, m )*h( k+3, j & + ) ) + h( k+1, j ) = h( k+1, j ) - refsum + h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m ) + h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m ) + end do + end do + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + ! ==== accumulate u. (if needed, update z later + ! . with an efficient matrix-matrix + ! . multiply.) ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + kms = k - incol + i2 = max( 1, ktop-incol ) + i2 = max( i2, kms-(krcol-incol)+1 ) + i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 ) + do j = i2, i4 + refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*u( j, kms+2 )+v( 3, m )*u( & + j, kms+3 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m ) + u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m ) + end do + end do + else if( wantz ) then + ! ==== u is not accumulated, so update z + ! . now by multiplying by reflections + ! . from the right. ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = iloz, ihiz + refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*z( j, k+2 )+v( 3, m )*z( j, & + k+3 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m ) + z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m ) + end do + end do + end if + ! ==== end of near-the-diagonal bulge chase. ==== + end do loop_145 + ! ==== use u (if accumulated) to update far-from-diagonal + ! . entries in h. if required, use u to update z as + ! . well. ==== + if( accum ) then + if( wantt ) then + jtop = 1 + jbot = n + else + jtop = ktop + jbot = kbot + end if + k1 = max( 1, ktop-incol ) + nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1 + ! ==== horizontal multiply ==== + do jcol = min( ndcol, kbot ) + 1, jbot, nh + jlen = min( nh, jbot-jcol+1 ) + call stdlib_qgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, & + jcol ), ldh, zero, wh,ldwh ) + call stdlib_qlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) + + end do + ! ==== vertical multiply ==== + do jrow = jtop, max( ktop, incol ) - 1, nv + jlen = min( nv, max( ktop, incol )-jrow ) + call stdlib_qgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( & + k1, k1 ),ldu, zero, wv, ldwv ) + call stdlib_qlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) + + end do + ! ==== z multiply (also vertical) ==== + if( wantz ) then + do jrow = iloz, ihiz, nv + jlen = min( nv, ihiz-jrow+1 ) + call stdlib_qgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(& + k1, k1 ),ldu, zero, wv, ldwv ) + call stdlib_qlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) + + end do + end if + end if + end do loop_180 + end subroutine stdlib_qlaqr5 + + !> DLAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_qlaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(inout) :: ab(ldab,*) + real(qp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_qlaqsb + + !> DLAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_qlaqsp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(inout) :: ap(*) + real(qp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(qp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = j, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_qlaqsp + + !> DLAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_qlaqsy( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_qlaqsy + + !> DLAQTR: solves the real quasi-triangular system + !> op(T)*p = scale*c, if LREAL = .TRUE. + !> or the complex quasi-triangular systems + !> op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. + !> in real arithmetic, where T is upper quasi-triangular. + !> If LREAL = .FALSE., then the first diagonal block of T must be + !> 1 by 1, B is the specially structured matrix + !> B = [ b(1) b(2) ... b(n) ] + !> [ w ] + !> [ w ] + !> [ . ] + !> [ w ] + !> op(A) = A or A**T, A**T denotes the transpose of + !> matrix A. + !> On input, X = [ c ]. On output, X = [ p ]. + !> [ d ] [ q ] + !> This subroutine is designed for the condition number estimation + !> in routine DTRSNA. + + subroutine stdlib_qlaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: lreal, ltran + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldt, n + real(qp), intent(out) :: scale + real(qp), intent(in) :: w + ! Array Arguments + real(qp), intent(in) :: b(*), t(ldt,*) + real(qp), intent(out) :: work(*) + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ierr, j, j1, j2, jnext, k, n1, n2 + real(qp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, & + xnorm, z + ! Local Arrays + real(qp) :: d(2,2), v(2,2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! do not test the input parameters for errors + notran = .not.ltran + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + bignum = one / smlnum + xnorm = stdlib_qlange( 'M', n, n, t, ldt, d ) + if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib_qlange( 'M', n, 1, b, n, d ) ) + + smin = max( smlnum, eps*xnorm ) + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + work( 1 ) = zero + do j = 2, n + work( j ) = stdlib_qasum( j-1, t( 1, j ), 1 ) + end do + if( .not.lreal ) then + do i = 2, n + work( i ) = work( i ) + abs( b( i ) ) + end do + end if + n2 = 2*n + n1 = n + if( .not.lreal )n1 = n2 + k = stdlib_iqamax( n1, x, 1 ) + xmax = abs( x( k ) ) + scale = one + if( xmax>bignum ) then + scale = bignum / xmax + call stdlib_qscal( n1, scale, x, 1 ) + xmax = bignum + end if + if( lreal ) then + if( notran ) then + ! solve t*p = scale*c + jnext = n + loop_30: do j = n, 1, -1 + if( j>jnext )cycle loop_30 + j1 = j + j2 = j + jnext = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnext = j - 2 + end if + end if + if( j1==j2 ) then + ! meet 1 by 1 diagonal block + ! scale to avoid overflow when computing + ! x(j) = b(j)/t(j,j) + xj = abs( x( j1 ) ) + tjj = abs( t( j1, j1 ) ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_qscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) / tmp + xj = abs( x( j1 ) ) + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j1 of t. + if( xj>one ) then + rec = one / xj + if( work( j1 )>( bignum-xmax )*rec ) then + call stdlib_qscal( n, rec, x, 1 ) + scale = scale*rec + end if + end if + if( j1>1 ) then + call stdlib_qaxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + k = stdlib_iqamax( j1-1, x, 1 ) + xmax = abs( x( k ) ) + end if + else + ! meet 2 by 2 diagonal block + ! call 2 by 2 linear system solve, to take + ! care of possible overflow by scaling factor. + d( 1, 1 ) = x( j1 ) + d( 2, 1 ) = x( j2 ) + call stdlib_qlaln2( .false., 2, 1, smin, one, t( j1, j1 ),ldt, one, one, d,& + 2, zero, zero, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_qscal( n, scaloc, x, 1 ) + scale = scale*scaloc + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2)) + ! to avoid overflow in updating right-hand side. + xj = max( abs( v( 1, 1 ) ), abs( v( 2, 1 ) ) ) + if( xj>one ) then + rec = one / xj + if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then + call stdlib_qscal( n, rec, x, 1 ) + scale = scale*rec + end if + end if + ! update right-hand side + if( j1>1 ) then + call stdlib_qaxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_qaxpy( j1-1, -x( j2 ), t( 1, j2 ), 1, x, 1 ) + k = stdlib_iqamax( j1-1, x, 1 ) + xmax = abs( x( k ) ) + end if + end if + end do loop_30 + else + ! solve t**t*p = scale*c + jnext = 1 + loop_40: do j = 1, n + if( jone ) then + rec = one / xmax + if( work( j1 )>( bignum-xj )*rec ) then + call stdlib_qscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) - stdlib_qdot( j1-1, t( 1, j1 ), 1, x, 1 ) + xj = abs( x( j1 ) ) + tjj = abs( t( j1, j1 ) ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_qscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) / tmp + xmax = max( xmax, abs( x( j1 ) ) ) + else + ! 2 by 2 diagonal block + ! scale if necessary to avoid overflow in forming the + ! right-hand side elements by inner product. + xj = max( abs( x( j1 ) ), abs( x( j2 ) ) ) + if( xmax>one ) then + rec = one / xmax + if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then + call stdlib_qscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + d( 1, 1 ) = x( j1 ) - stdlib_qdot( j1-1, t( 1, j1 ), 1, x,1 ) + d( 2, 1 ) = x( j2 ) - stdlib_qdot( j1-1, t( 1, j2 ), 1, x,1 ) + call stdlib_qlaln2( .true., 2, 1, smin, one, t( j1, j1 ),ldt, one, one, d, & + 2, zero, zero, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_qscal( n, scaloc, x, 1 ) + scale = scale*scaloc + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax ) + end if + end do loop_40 + end if + else + sminw = max( eps*abs( w ), smin ) + if( notran ) then + ! solve (t + ib)*(p+iq) = c+id + jnext = n + loop_70: do j = n, 1, -1 + if( j>jnext )cycle loop_70 + j1 = j + j2 = j + jnext = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnext = j - 2 + end if + end if + if( j1==j2 ) then + ! 1 by 1 diagonal block + ! scale if necessary to avoid overflow in division + z = w + if( j1==1 )z = b( 1 ) + xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) + tjj = abs( t( j1, j1 ) ) + abs( z ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_qscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + call stdlib_qladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si ) + x( j1 ) = sr + x( n+j1 ) = si + xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j1 of t. + if( xj>one ) then + rec = one / xj + if( work( j1 )>( bignum-xmax )*rec ) then + call stdlib_qscal( n2, rec, x, 1 ) + scale = scale*rec + end if + end if + if( j1>1 ) then + call stdlib_qaxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_qaxpy( j1-1, -x( n+j1 ), t( 1, j1 ), 1,x( n+1 ), 1 ) + x( 1 ) = x( 1 ) + b( j1 )*x( n+j1 ) + x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) + xmax = zero + do k = 1, j1 - 1 + xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) ) + end do + end if + else + ! meet 2 by 2 diagonal block + d( 1, 1 ) = x( j1 ) + d( 2, 1 ) = x( j2 ) + d( 1, 2 ) = x( n+j1 ) + d( 2, 2 ) = x( n+j2 ) + call stdlib_qlaln2( .false., 2, 2, sminw, one, t( j1, j1 ),ldt, one, one, & + d, 2, zero, -w, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_qscal( 2*n, scaloc, x, 1 ) + scale = scaloc*scale + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + x( n+j1 ) = v( 1, 2 ) + x( n+j2 ) = v( 2, 2 ) + ! scale x(j1), .... to avoid overflow in + ! updating right hand side. + xj = max( abs( v( 1, 1 ) )+abs( v( 1, 2 ) ),abs( v( 2, 1 ) )+abs( v( 2, 2 )& + ) ) + if( xj>one ) then + rec = one / xj + if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then + call stdlib_qscal( n2, rec, x, 1 ) + scale = scale*rec + end if + end if + ! update the right-hand side. + if( j1>1 ) then + call stdlib_qaxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_qaxpy( j1-1, -x( j2 ), t( 1, j2 ), 1, x, 1 ) + call stdlib_qaxpy( j1-1, -x( n+j1 ), t( 1, j1 ), 1,x( n+1 ), 1 ) + call stdlib_qaxpy( j1-1, -x( n+j2 ), t( 1, j2 ), 1,x( n+1 ), 1 ) + x( 1 ) = x( 1 ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 ) + x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 ) + xmax = zero + do k = 1, j1 - 1 + xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax ) + end do + end if + end if + end do loop_70 + else + ! solve (t + ib)**t*(p+iq) = c+id + jnext = 1 + loop_80: do j = 1, n + if( jone ) then + rec = one / xmax + if( work( j1 )>( bignum-xj )*rec ) then + call stdlib_qscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) - stdlib_qdot( j1-1, t( 1, j1 ), 1, x, 1 ) + x( n+j1 ) = x( n+j1 ) - stdlib_qdot( j1-1, t( 1, j1 ), 1,x( n+1 ), 1 ) + + if( j1>1 ) then + x( j1 ) = x( j1 ) - b( j1 )*x( n+1 ) + x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1 ) + end if + xj = abs( x( j1 ) ) + abs( x( j1+n ) ) + z = w + if( j1==1 )z = b( 1 ) + ! scale if necessary to avoid overflow in + ! complex division + tjj = abs( t( j1, j1 ) ) + abs( z ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_qscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + call stdlib_qladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si ) + x( j1 ) = sr + x( j1+n ) = si + xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax ) + else + ! 2 by 2 diagonal block + ! scale if necessary to avoid overflow in forming the + ! right-hand side element by inner product. + xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) ) + + if( xmax>one ) then + rec = one / xmax + if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then + call stdlib_qscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + d( 1, 1 ) = x( j1 ) - stdlib_qdot( j1-1, t( 1, j1 ), 1, x,1 ) + d( 2, 1 ) = x( j2 ) - stdlib_qdot( j1-1, t( 1, j2 ), 1, x,1 ) + d( 1, 2 ) = x( n+j1 ) - stdlib_qdot( j1-1, t( 1, j1 ), 1,x( n+1 ), 1 ) + + d( 2, 2 ) = x( n+j2 ) - stdlib_qdot( j1-1, t( 1, j2 ), 1,x( n+1 ), 1 ) + + d( 1, 1 ) = d( 1, 1 ) - b( j1 )*x( n+1 ) + d( 2, 1 ) = d( 2, 1 ) - b( j2 )*x( n+1 ) + d( 1, 2 ) = d( 1, 2 ) + b( j1 )*x( 1 ) + d( 2, 2 ) = d( 2, 2 ) + b( j2 )*x( 1 ) + call stdlib_qlaln2( .true., 2, 2, sminw, one, t( j1, j1 ),ldt, one, one, d,& + 2, zero, w, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_qscal( n2, scaloc, x, 1 ) + scale = scaloc*scale + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + x( n+j1 ) = v( 1, 2 ) + x( n+j2 ) = v( 2, 2 ) + xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )& + , xmax ) + end if + end do loop_80 + end if + end if + return + end subroutine stdlib_qlaqtr + + !> DLAQZ0: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**T, B = Q1*T*Z1**T, + !> as computed by DGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**T, T = Q*P*Z**T, + !> where Q and Z are orthogonal matrices, P is an upper triangular + !> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 + !> diagonal blocks. + !> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair + !> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of + !> eigenvalues. + !> Additionally, the 2-by-2 upper triangular diagonal blocks of P + !> corresponding to 2-by-2 blocks of S are reduced to positive diagonal + !> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, + !> P(j,j) > 0, and P(j+1,j+1) > 0. + !> Optionally, the orthogonal matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> orthogonal matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the orthogonal factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Real eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + + recursive subroutine stdlib_qlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, & + alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) + ! arguments + character, intent( in ) :: wants, wantq, wantz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec + integer(ilp), intent( out ) :: info + real(qp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& + * ),alphai( * ), beta( * ), work( * ) + + ! local scalars + real(qp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap + integer(ilp) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& + nibble, n_undeflated, n_qeflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & + istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & + rcost, i + logical(lk) :: ilschur, ilq, ilz + character :: jbcmpz*3 + if( stdlib_lsame( wants, 'E' ) ) then + ilschur = .false. + iwants = 1 + else if( stdlib_lsame( wants, 'S' ) ) then + ilschur = .true. + iwants = 2 + else + iwants = 0 + end if + if( stdlib_lsame( wantq, 'N' ) ) then + ilq = .false. + iwantq = 1 + else if( stdlib_lsame( wantq, 'V' ) ) then + ilq = .true. + iwantq = 2 + else if( stdlib_lsame( wantq, 'I' ) ) then + ilq = .true. + iwantq = 3 + else + iwantq = 0 + end if + if( stdlib_lsame( wantz, 'N' ) ) then + ilz = .false. + iwantz = 1 + else if( stdlib_lsame( wantz, 'V' ) ) then + ilz = .true. + iwantz = 2 + else if( stdlib_lsame( wantz, 'I' ) ) then + ilz = .true. + iwantz = 3 + else + iwantz = 0 + end if + ! check argument values + info = 0 + if( iwants==0 ) then + info = -1 + else if( iwantq==0 ) then + info = -2 + else if( iwantz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi= 2 ) then + call stdlib_qhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,& + beta, q, ldq, z, ldz, work,lwork, info ) + return + end if + ! find out required workspace + ! workspace query to stdlib_qlaqz3 + nw = max( nwr, nmin ) + call stdlib_qlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & + n_undeflated, n_qeflated, alphar,alphai, beta, work, nw, work, nw, work, -1, rec,& + aed_info ) + itemp1 = int( work( 1 ),KIND=ilp) + ! workspace query to stdlib_qlaqz4 + call stdlib_qlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, & + lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1, sweep_info ) + itemp2 = int( work( 1 ),KIND=ilp) + lworkreq = max( itemp1+2*nw**2, itemp2+2*nbr**2 ) + if ( lwork ==-1 ) then + work( 1 ) = real( lworkreq,KIND=qp) + return + else if ( lwork < lworkreq ) then + info = -19 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAQZ0', info ) + return + end if + ! initialize q and z + if( iwantq==3 ) call stdlib_qlaset( 'FULL', n, n, zero, one, q, ldq ) + if( iwantz==3 ) call stdlib_qlaset( 'FULL', n, n, zero, one, z, ldz ) + ! get machine constants + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp)/ulp ) + istart = ilo + istop = ihi + maxit = 3*( ihi-ilo+1 ) + ld = 0 + do iiter = 1, maxit + if( iiter >= maxit ) then + info = istop+1 + goto 80 + end if + if ( istart+1 >= istop ) then + istop = istart + exit + end if + ! check deflations at the end + if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+& + abs( a( istop-2,istop-2 ) ) ) ) ) then + a( istop-1, istop-2 ) = zero + istop = istop-2 + ld = 0 + eshift = zero + else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+& + abs( a( istop-1,istop-1 ) ) ) ) ) then + a( istop, istop-1 ) = zero + istop = istop-1 + ld = 0 + eshift = zero + end if + ! check deflations at the start + if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 & + ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then + a( istart+2, istart+1 ) = zero + istart = istart+2 + ld = 0 + eshift = zero + else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )& + )+abs( a( istart+1,istart+1 ) ) ) ) ) then + a( istart+1, istart ) = zero + istart = istart+1 + ld = 0 + eshift = zero + end if + if ( istart+1 >= istop ) then + exit + end if + ! check interior deflations + istart2 = istart + do k = istop, istart+1, -1 + if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & + ) ) ) ) then + a( k, k-1 ) = zero + istart2 = k + exit + end if + end do + ! get range to apply rotations to + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = istart2 + istopm = istop + end if + ! check infinite eigenvalues, this is done without blocking so might + ! slow down the method when many infinite eigenvalues are present + k = istop + do while ( k>=istart2 ) + temp = zero + if( k < istop ) then + temp = temp+abs( b( k, k+1 ) ) + end if + if( k > istart2 ) then + temp = temp+abs( b( k-1, k ) ) + end if + if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then + ! a diagonal element of b is negligable, move it + ! to the top and deflate it + do k2 = k, istart2+1, -1 + call stdlib_qlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) + b( k2-1, k2 ) = temp + b( k2-1, k2-1 ) = zero + call stdlib_qrot( k2-2-istartm+1, b( istartm, k2 ), 1,b( istartm, k2-1 ), & + 1, c1, s1 ) + call stdlib_qrot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1, a( & + istartm, k2-1 ), 1, c1, s1 ) + if ( ilz ) then + call stdlib_qrot( n, z( 1, k2 ), 1, z( 1, k2-1 ), 1, c1,s1 ) + end if + if( k2= istop ) then + istop = istart2-1 + ld = 0 + eshift = zero + cycle + end if + nw = nwr + nshifts = nsr + nblock = nbr + if ( istop-istart2+1 < nmin ) then + ! setting nw to the size of the subblock will make aed deflate + ! all the eigenvalues. this is slightly more efficient than just + ! using stdlib_qhgeqz because the off diagonal part gets updated via blas. + if ( istop-istart+1 < nmin ) then + nw = istop-istart+1 + istart2 = istart + else + nw = istop-istart2+1 + end if + end if + ! time for aed + call stdlib_qlaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& + z, ldz, n_undeflated, n_qeflated,alphar, alphai, beta, work, nw, work( nw**2+1 ),& + nw, work( 2*nw**2+1 ), lwork-2*nw**2, rec,aed_info ) + if ( n_qeflated > 0 ) then + istop = istop-n_qeflated + ld = 0 + eshift = zero + end if + if ( 100*n_qeflated > nibble*( n_qeflated+n_undeflated ) .or.istop-istart2+1 < nmin & + ) then + ! aed has uncovered many eigenvalues. skip a qz sweep and run + ! aed again. + cycle + end if + ld = ld+1 + ns = min( nshifts, istop-istart2 ) + ns = min( ns, n_undeflated ) + shiftpos = istop-n_qeflated-n_undeflated+1 + ! shuffle shifts to put double shifts in front + ! this ensures that we don't split up a double shift + do i = shiftpos, shiftpos+n_undeflated-1, 2 + if( alphai( i )/=-alphai( i+1 ) ) then + swap = alphar( i ) + alphar( i ) = alphar( i+1 ) + alphar( i+1 ) = alphar( i+2 ) + alphar( i+2 ) = swap + swap = alphai( i ) + alphai( i ) = alphai( i+1 ) + alphai( i+1 ) = alphai( i+2 ) + alphai( i+2 ) = swap + swap = beta( i ) + beta( i ) = beta( i+1 ) + beta( i+1 ) = beta( i+2 ) + beta( i+2 ) = swap + end if + end do + if ( mod( ld, 6 ) == 0 ) then + ! exceptional shift. chosen for no particularly good reason. + if( ( real( maxit,KIND=qp)*safmin )*abs( a( istop,istop-1 ) ) Given a 3-by-3 matrix pencil (A,B), DLAQZ1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1). + !> It is assumed that either + !> 1) sr1 = sr2 + !> or + !> 2) si = 0. + !> This is useful for starting double implicit shift bulges + !> in the QZ algorithm. + + pure subroutine stdlib_qlaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) + ! arguments + integer(ilp), intent( in ) :: lda, ldb + real(qp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1,sr2, si, beta1, beta2 + real(qp), intent( out ) :: v( * ) + + ! local scalars + real(qp) :: w(2), safmin, safmax, scale1, scale2 + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + ! calculate first shifted vector + w( 1 ) = beta1*a( 1, 1 )-sr1*b( 1, 1 ) + w( 2 ) = beta1*a( 2, 1 )-sr1*b( 2, 1 ) + scale1 = sqrt( abs( w( 1 ) ) ) * sqrt( abs( w( 2 ) ) ) + if( scale1 >= safmin .and. scale1 <= safmax ) then + w( 1 ) = w( 1 )/scale1 + w( 2 ) = w( 2 )/scale1 + end if + ! solve linear system + w( 2 ) = w( 2 )/b( 2, 2 ) + w( 1 ) = ( w( 1 )-b( 1, 2 )*w( 2 ) )/b( 1, 1 ) + scale2 = sqrt( abs( w( 1 ) ) ) * sqrt( abs( w( 2 ) ) ) + if( scale2 >= safmin .and. scale2 <= safmax ) then + w( 1 ) = w( 1 )/scale2 + w( 2 ) = w( 2 )/scale2 + end if + ! apply second shift + v( 1 ) = beta2*( a( 1, 1 )*w( 1 )+a( 1, 2 )*w( 2 ) )-sr2*( b( 1,1 )*w( 1 )+b( 1, 2 )*w(& + 2 ) ) + v( 2 ) = beta2*( a( 2, 1 )*w( 1 )+a( 2, 2 )*w( 2 ) )-sr2*( b( 2,1 )*w( 1 )+b( 2, 2 )*w(& + 2 ) ) + v( 3 ) = beta2*( a( 3, 1 )*w( 1 )+a( 3, 2 )*w( 2 ) )-sr2*( b( 3,1 )*w( 1 )+b( 3, 2 )*w(& + 2 ) ) + ! account for imaginary part + v( 1 ) = v( 1 )+si*si*b( 1, 1 )/scale1/scale2 + ! check for overflow + if( abs( v( 1 ) )>safmax .or. abs( v( 2 ) ) > safmax .or.abs( v( 3 ) )>safmax .or. & + stdlib_qisnan( v( 1 ) ) .or.stdlib_qisnan( v( 2 ) ) .or. stdlib_qisnan( v( 3 ) ) ) & + then + v( 1 ) = zero + v( 2 ) = zero + v( 3 ) = zero + end if + end subroutine stdlib_qlaqz1 + + !> DLAQZ2: chases a 2x2 shift bulge in a matrix pencil down a single position + + pure subroutine stdlib_qlaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & + q, ldq, nz, zstart, z, ldz ) + ! arguments + logical(lk), intent( in ) :: ilq, ilz + integer(ilp), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & + zstart, ihi + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + + ! local variables + real(qp) :: h(2,3), c1, s1, c2, s2, temp + if( k+2 == ihi ) then + ! shift is located on the edge of the matrix, remove it + h = b( ihi-1:ihi, ihi-2:ihi ) + ! make h upper triangular + call stdlib_qlartg( h( 1, 1 ), h( 2, 1 ), c1, s1, temp ) + h( 2, 1 ) = zero + h( 1, 1 ) = temp + call stdlib_qrot( 2, h( 1, 2 ), 2, h( 2, 2 ), 2, c1, s1 ) + call stdlib_qlartg( h( 2, 3 ), h( 2, 2 ), c1, s1, temp ) + call stdlib_qrot( 1, h( 1, 3 ), 1, h( 1, 2 ), 1, c1, s1 ) + call stdlib_qlartg( h( 1, 2 ), h( 1, 1 ), c2, s2, temp ) + call stdlib_qrot( ihi-istartm+1, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c1, & + s1 ) + call stdlib_qrot( ihi-istartm+1, b( istartm, ihi-1 ), 1, b( istartm,ihi-2 ), 1, c2, & + s2 ) + b( ihi-1, ihi-2 ) = zero + b( ihi, ihi-2 ) = zero + call stdlib_qrot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c1, & + s1 ) + call stdlib_qrot( ihi-istartm+1, a( istartm, ihi-1 ), 1, a( istartm,ihi-2 ), 1, c2, & + s2 ) + if ( ilz ) then + call stdlib_qrot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c1, s1 & + ) + call stdlib_qrot( nz, z( 1, ihi-1-zstart+1 ), 1, z( 1,ihi-2-zstart+1 ), 1, c2, & + s2 ) + end if + call stdlib_qlartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp ) + a( ihi-1, ihi-2 ) = temp + a( ihi, ihi-2 ) = zero + call stdlib_qrot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 & + ) + call stdlib_qrot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 & + ) + if ( ilq ) then + call stdlib_qrot( nq, q( 1, ihi-1-qstart+1 ), 1, q( 1, ihi-qstart+1 ), 1, c1, s1 & + ) + end if + call stdlib_qlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp ) + b( ihi, ihi ) = temp + b( ihi, ihi-1 ) = zero + call stdlib_qrot( ihi-istartm, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c1, s1 ) + + call stdlib_qrot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c1, & + s1 ) + if ( ilz ) then + call stdlib_qrot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c1, s1 & + ) + end if + else + ! normal operation, move bulge down + h = b( k+1:k+2, k:k+2 ) + ! make h upper triangular + call stdlib_qlartg( h( 1, 1 ), h( 2, 1 ), c1, s1, temp ) + h( 2, 1 ) = zero + h( 1, 1 ) = temp + call stdlib_qrot( 2, h( 1, 2 ), 2, h( 2, 2 ), 2, c1, s1 ) + ! calculate z1 and z2 + call stdlib_qlartg( h( 2, 3 ), h( 2, 2 ), c1, s1, temp ) + call stdlib_qrot( 1, h( 1, 3 ), 1, h( 1, 2 ), 1, c1, s1 ) + call stdlib_qlartg( h( 1, 2 ), h( 1, 1 ), c2, s2, temp ) + ! apply transformations from the right + call stdlib_qrot( k+3-istartm+1, a( istartm, k+2 ), 1, a( istartm,k+1 ), 1, c1, s1 ) + + call stdlib_qrot( k+3-istartm+1, a( istartm, k+1 ), 1, a( istartm,k ), 1, c2, s2 ) + + call stdlib_qrot( k+2-istartm+1, b( istartm, k+2 ), 1, b( istartm,k+1 ), 1, c1, s1 ) + + call stdlib_qrot( k+2-istartm+1, b( istartm, k+1 ), 1, b( istartm,k ), 1, c2, s2 ) + + if ( ilz ) then + call stdlib_qrot( nz, z( 1, k+2-zstart+1 ), 1, z( 1, k+1-zstart+1 ), 1, c1, s1 ) + + call stdlib_qrot( nz, z( 1, k+1-zstart+1 ), 1, z( 1, k-zstart+1 ),1, c2, s2 ) + + end if + b( k+1, k ) = zero + b( k+2, k ) = zero + ! calculate q1 and q2 + call stdlib_qlartg( a( k+2, k ), a( k+3, k ), c1, s1, temp ) + a( k+2, k ) = temp + a( k+3, k ) = zero + call stdlib_qlartg( a( k+1, k ), a( k+2, k ), c2, s2, temp ) + a( k+1, k ) = temp + a( k+2, k ) = zero + ! apply transformations from the left + call stdlib_qrot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 ) + call stdlib_qrot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 ) + call stdlib_qrot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 ) + call stdlib_qrot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 ) + if ( ilq ) then + call stdlib_qrot( nq, q( 1, k+2-qstart+1 ), 1, q( 1, k+3-qstart+1 ), 1, c1, s1 ) + + call stdlib_qrot( nq, q( 1, k+1-qstart+1 ), 1, q( 1, k+2-qstart+1 ), 1, c2, s2 ) + + end if + end if + end subroutine stdlib_qlaqz2 + + !> DLAQZ3: performs AED + + recursive subroutine stdlib_qlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & + ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info ) + ! arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & + rec + real(qp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& + * ),alphai( * ), beta( * ) + integer(ilp), intent( out ) :: ns, nd, info + real(qp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) + real(qp), intent(out) :: work(*) + + ! local scalars + logical(lk) :: bulge + integer(ilp) :: jw, kwtop, kwbot, istopm, istartm, k, k2, dtgexc_info, ifst, ilst, & + lworkreq, qz_small_info + real(qp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp + info = 0 + ! set up deflation window + jw = min( nw, ihi-ilo+1 ) + kwtop = ihi-jw+1 + if ( kwtop == ilo ) then + s = zero + else + s = a( kwtop, kwtop-1 ) + end if + ! determine required workspace + ifst = 1 + ilst = jw + call stdlib_qtgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, & + work, -1, dtgexc_info ) + lworkreq = int( work( 1 ),KIND=ilp) + call stdlib_qlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1, rec+1, qz_small_info ) + lworkreq = max( lworkreq, int( work( 1 ),KIND=ilp)+2*jw**2 ) + lworkreq = max( lworkreq, n*nw, 2*nw**2+n ) + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = lworkreq + return + else if ( lwork < lworkreq ) then + info = -26 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAQZ3', -info ) + return + end if + ! get machine constants + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp)/ulp ) + if ( ihi == kwtop ) then + ! 1 by 1 deflation window, just try a regular deflation + alphar( kwtop ) = a( kwtop, kwtop ) + alphai( kwtop ) = zero + beta( kwtop ) = b( kwtop, kwtop ) + ns = 1 + nd = 0 + if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if ( kwtop > ilo ) then + a( kwtop, kwtop-1 ) = zero + end if + end if + end if + ! store window in case of convergence failure + call stdlib_qlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) + call stdlib_qlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2+1 ), jw ) + + ! transform window to real schur form + call stdlib_qlaset( 'FULL', jw, jw, zero, one, qc, ldqc ) + call stdlib_qlaset( 'FULL', jw, jw, zero, one, zc, ldzc ) + call stdlib_qlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2*jw**2+1 ), lwork-2*jw**2,rec+1, & + qz_small_info ) + if( qz_small_info /= 0 ) then + ! convergence failure, restore the window and exit + nd = 0 + ns = jw-qz_small_info + call stdlib_qlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) + call stdlib_qlacpy( 'ALL', jw, jw, work( jw**2+1 ), jw, b( kwtop,kwtop ), ldb ) + + return + end if + ! deflation detection loop + if ( kwtop == ilo .or. s == zero ) then + kwbot = kwtop-1 + else + kwbot = ihi + k = 1 + k2 = 1 + do while ( k <= jw ) + bulge = .false. + if ( kwbot-kwtop+1 >= 2 ) then + bulge = a( kwbot, kwbot-1 ) /= zero + end if + if ( bulge ) then + ! try to deflate complex conjugate eigenvalue pair + temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( & + a( kwbot-1, kwbot ) ) ) + if( temp == zero )then + temp = abs( s ) + end if + if ( max( abs( s*qc( 1, kwbot-kwtop ) ), abs( s*qc( 1,kwbot-kwtop+1 ) ) ) <= & + max( smlnum,ulp*temp ) ) then + ! deflatable + kwbot = kwbot-2 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_qtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) + + k2 = k2+2 + end if + k = k+2 + else + ! try to deflate real eigenvalue + temp = abs( a( kwbot, kwbot ) ) + if( temp == zero ) then + temp = abs( s ) + end if + if ( ( abs( s*qc( 1, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) & + then + ! deflatable + kwbot = kwbot-1 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_qtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) + + k2 = k2+1 + end if + k = k+1 + end if + end do + end if + ! store eigenvalues + nd = ihi-kwbot + ns = jw-nd + k = kwtop + do while ( k <= ihi ) + bulge = .false. + if ( k < ihi ) then + if ( a( k+1, k ) /= zero ) then + bulge = .true. + end if + end if + if ( bulge ) then + ! 2x2 eigenvalue block + call stdlib_qlag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),& + alphar( k ),alphar( k+1 ), alphai( k ) ) + alphai( k+1 ) = -alphai( k ) + k = k+2 + else + ! 1x1 eigenvalue block + alphar( k ) = a( k, k ) + alphai( k ) = zero + beta( k ) = b( k, k ) + k = k+1 + end if + end do + if ( kwtop /= ilo .and. s /= zero ) then + ! reflect spike back, this will create optimally packed bulges + a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1,1:jw-nd ) + do k = kwbot-1, kwtop, -1 + call stdlib_qlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) + a( k, kwtop-1 ) = temp + a( k+1, kwtop-1 ) = zero + k2 = max( kwtop, k-1 ) + call stdlib_qrot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) + call stdlib_qrot( ihi-( k-1 )+1, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) + + call stdlib_qrot( jw, qc( 1, k-kwtop+1 ), 1, qc( 1, k+1-kwtop+1 ),1, c1, s1 ) + + end do + ! chase bulges down + istartm = kwtop + istopm = ihi + k = kwbot-1 + do while ( k >= kwtop ) + if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then + ! move double pole block down and remove it + do k2 = k-1, kwbot-2 + call stdlib_qlaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,& + ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc ) + end do + k = k-2 + else + ! k points to single shift + do k2 = k, kwbot-2 + ! move shift down + call stdlib_qlartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp ) + b( k2+1, k2+1 ) = temp + b( k2+1, k2 ) = zero + call stdlib_qrot( k2+2-istartm+1, a( istartm, k2+1 ), 1,a( istartm, k2 ), & + 1, c1, s1 ) + call stdlib_qrot( k2-istartm+1, b( istartm, k2+1 ), 1,b( istartm, k2 ), 1, & + c1, s1 ) + call stdlib_qrot( jw, zc( 1, k2+1-kwtop+1 ), 1, zc( 1,k2-kwtop+1 ), 1, c1, & + s1 ) + call stdlib_qlartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp ) + a( k2+1, k2 ) = temp + a( k2+2, k2 ) = zero + call stdlib_qrot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,& + s1 ) + call stdlib_qrot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,& + s1 ) + call stdlib_qrot( jw, qc( 1, k2+1-kwtop+1 ), 1, qc( 1,k2+2-kwtop+1 ), 1, & + c1, s1 ) + end do + ! remove the shift + call stdlib_qlartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp ) + + b( kwbot, kwbot ) = temp + b( kwbot, kwbot-1 ) = zero + call stdlib_qrot( kwbot-istartm, b( istartm, kwbot ), 1,b( istartm, kwbot-1 ),& + 1, c1, s1 ) + call stdlib_qrot( kwbot-istartm+1, a( istartm, kwbot ), 1,a( istartm, kwbot-1 & + ), 1, c1, s1 ) + call stdlib_qrot( jw, zc( 1, kwbot-kwtop+1 ), 1, zc( 1,kwbot-1-kwtop+1 ), 1, & + c1, s1 ) + k = k-1 + end if + end do + end if + ! apply qc and zc to rest of the matrix + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + if ( istopm-ihi > 0 ) then + call stdlib_qgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), & + lda, zero, work, jw ) + call stdlib_qlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) + call stdlib_qgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), & + ldb, zero, work, jw ) + call stdlib_qlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) + end if + if ( ilq ) then + call stdlib_qgemm( 'N', 'N', n, jw, jw, one, q( 1, kwtop ), ldq, qc,ldqc, zero, & + work, n ) + call stdlib_qlacpy( 'ALL', n, jw, work, n, q( 1, kwtop ), ldq ) + end if + if ( kwtop-1-istartm+1 > 0 ) then + call stdlib_qgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, & + zc, ldzc, zero, work,kwtop-istartm ) + call stdlib_qlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop & + ), lda ) + call stdlib_qgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, & + zc, ldzc, zero, work,kwtop-istartm ) + call stdlib_qlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop & + ), ldb ) + end if + if ( ilz ) then + call stdlib_qgemm( 'N', 'N', n, jw, jw, one, z( 1, kwtop ), ldz, zc,ldzc, zero, & + work, n ) + call stdlib_qlacpy( 'ALL', n, jw, work, n, z( 1, kwtop ), ldz ) + end if + end subroutine stdlib_qlaqz3 + + !> DLAQZ4: Executes a single multishift QZ sweep + + pure subroutine stdlib_qlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_qesired, sr, & + si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) + ! function arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & + nblock_qesired, ldqc, ldzc + real(qp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), qc( & + ldqc, * ),zc( ldzc, * ), work( * ), sr( * ), si( * ),ss( * ) + integer(ilp), intent( out ) :: info + + ! local scalars + integer(ilp) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & + ishift, nblock, npos + real(qp) :: temp, v(3), c1, s1, c2, s2, swap + info = 0 + if ( nblock_qesired < nshifts+1 ) then + info = -8 + end if + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = n*nblock_qesired + return + else if ( lwork < n*nblock_qesired ) then + info = -25 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLAQZ4', -info ) + return + end if + ! executable statements + if ( nshifts < 2 ) then + return + end if + if ( ilo >= ihi ) then + return + end if + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + ! shuffle shifts into pairs of real shifts and pairs + ! of complex conjugate shifts assuming complex + ! conjugate shifts are already adjacent to one + ! another + do i = 1, nshifts-2, 2 + if( si( i )/=-si( i+1 ) ) then + swap = sr( i ) + sr( i ) = sr( i+1 ) + sr( i+1 ) = sr( i+2 ) + sr( i+2 ) = swap + swap = si( i ) + si( i ) = si( i+1 ) + si( i+1 ) = si( i+2 ) + si( i+2 ) = swap + swap = ss( i ) + ss( i ) = ss( i+1 ) + ss( i+1 ) = ss( i+2 ) + ss( i+2 ) = swap + end if + end do + ! nshfts is supposed to be even, but if it is odd, + ! then simply reduce it by one. the shuffle above + ! ensures that the dropped shift is real and that + ! the remaining shifts are paired. + ns = nshifts-mod( nshifts, 2 ) + npos = max( nblock_qesired-ns, 1 ) + ! the following block introduces the shifts and chases + ! them down one by one just enough to make space for + ! the other shifts. the near-the-diagonal block is + ! of size (ns+1) x ns. + call stdlib_qlaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc ) + call stdlib_qlaset( 'FULL', ns, ns, zero, one, zc, ldzc ) + do i = 1, ns, 2 + ! introduce the shift + call stdlib_qlaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( & + i ), ss( i ), ss( i+1 ), v ) + temp = v( 2 ) + call stdlib_qlartg( temp, v( 3 ), c1, s1, v( 2 ) ) + call stdlib_qlartg( v( 1 ), v( 2 ), c2, s2, temp ) + call stdlib_qrot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 ) + call stdlib_qrot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 ) + call stdlib_qrot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 ) + call stdlib_qrot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 ) + call stdlib_qrot( ns+1, qc( 1, 2 ), 1, qc( 1, 3 ), 1, c1, s1 ) + call stdlib_qrot( ns+1, qc( 1, 1 ), 1, qc( 1, 2 ), 1, c2, s2 ) + ! chase the shift down + do j = 1, ns-1-i + call stdlib_qlaqz2( .true., .true., j, 1, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & + ilo, ilo ), ldb, ns+1, 1, qc,ldqc, ns, 1, zc, ldzc ) + end do + end do + ! update the rest of the pencil + ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) + ! from the left with qc(1:ns+1,1:ns+1)' + sheight = ns+1 + swidth = istopm-( ilo+ns )+1 + if ( swidth > 0 ) then + call stdlib_qgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns & + ), lda, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) + + call stdlib_qgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns & + ), ldb, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) + + end if + if ( ilq ) then + call stdlib_qgemm( 'N', 'N', n, sheight, sheight, one, q( 1, ilo ),ldq, qc, ldqc, & + zero, work, n ) + call stdlib_qlacpy( 'ALL', n, sheight, work, n, q( 1, ilo ), ldq ) + end if + ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) + ! from the right with zc(1:ns,1:ns) + sheight = ilo-1-istartm+1 + swidth = ns + if ( sheight > 0 ) then + call stdlib_qgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, & + zc, ldzc, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) + + call stdlib_qgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, & + zc, ldzc, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) + + end if + if ( ilz ) then + call stdlib_qgemm( 'N', 'N', n, swidth, swidth, one, z( 1, ilo ), ldz,zc, ldzc, & + zero, work, n ) + call stdlib_qlacpy( 'ALL', n, swidth, work, n, z( 1, ilo ), ldz ) + end if + ! the following block chases the shifts down to the bottom + ! right block. if possible, a shift is moved down npos + ! positions at a time + k = ilo + do while ( k < ihi-ns ) + np = min( ihi-ns-k, npos ) + ! size of the near-the-diagonal block + nblock = ns+np + ! istartb points to the first row we will be updating + istartb = k+1 + ! istopb points to the last column we will be updating + istopb = k+nblock-1 + call stdlib_qlaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc ) + call stdlib_qlaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc ) + ! near the diagonal shift chase + do i = ns-1, 0, -2 + do j = 0, np-1 + ! move down the block with index k+i+j-1, updating + ! the (ns+np x ns+np) block: + ! (k:k+ns+np,k:k+ns+np-1) + call stdlib_qlaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, & + ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(k+1:k+ns+np, k+ns+np:istopm) and + ! b(k+1:k+ns+np, k+ns+np:istopm) + ! from the left with qc(1:ns+np,1:ns+np)' + sheight = ns+np + swidth = istopm-( k+ns+np )+1 + if ( swidth > 0 ) then + call stdlib_qgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+& + ns+np ), lda, zero, work,sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & + ) + call stdlib_qgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+& + ns+np ), ldb, zero, work,sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & + ) + end if + if ( ilq ) then + call stdlib_qgemm( 'N', 'N', n, nblock, nblock, one, q( 1, k+1 ),ldq, qc, ldqc, & + zero, work, n ) + call stdlib_qlacpy( 'ALL', n, nblock, work, n, q( 1, k+1 ), ldq ) + end if + ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) + ! from the right with zc(1:ns+np,1:ns+np) + sheight = k-istartm+1 + swidth = nblock + if ( sheight > 0 ) then + call stdlib_qgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, & + zc, ldzc, zero, work,sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) + + call stdlib_qgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, & + zc, ldzc, zero, work,sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) + + end if + if ( ilz ) then + call stdlib_qgemm( 'N', 'N', n, nblock, nblock, one, z( 1, k ),ldz, zc, ldzc, & + zero, work, n ) + call stdlib_qlacpy( 'ALL', n, nblock, work, n, z( 1, k ), ldz ) + end if + k = k+np + end do + ! the following block removes the shifts from the bottom right corner + ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). + call stdlib_qlaset( 'FULL', ns, ns, zero, one, qc, ldqc ) + call stdlib_qlaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc ) + ! istartb points to the first row we will be updating + istartb = ihi-ns+1 + ! istopb points to the last column we will be updating + istopb = ihi + do i = 1, ns, 2 + ! chase the shift down to the bottom right corner + do ishift = ihi-i-1, ihi-2 + call stdlib_qlaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & + ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(ihi-ns+1:ihi, ihi+1:istopm) + ! from the left with qc(1:ns,1:ns)' + sheight = ns + swidth = istopm-( ihi+1 )+1 + if ( swidth > 0 ) then + call stdlib_qgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, & + ihi+1 ), lda, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & + ) + call stdlib_qgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, & + ihi+1 ), ldb, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & + ) + end if + if ( ilq ) then + call stdlib_qgemm( 'N', 'N', n, ns, ns, one, q( 1, ihi-ns+1 ), ldq,qc, ldqc, zero, & + work, n ) + call stdlib_qlacpy( 'ALL', n, ns, work, n, q( 1, ihi-ns+1 ), ldq ) + end if + ! update a(istartm:ihi-ns,ihi-ns:ihi) + ! from the right with zc(1:ns+1,1:ns+1) + sheight = ihi-ns-istartm+1 + swidth = ns+1 + if ( sheight > 0 ) then + call stdlib_qgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,& + zc, ldzc, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & + ) + call stdlib_qgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,& + zc, ldzc, zero, work, sheight ) + call stdlib_qlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & + ) + end if + if ( ilz ) then + call stdlib_qgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1, ihi-ns ), ldz,zc, ldzc, zero,& + work, n ) + call stdlib_qlacpy( 'ALL', n, ns+1, work, n, z( 1, ihi-ns ), ldz ) + end if + end subroutine stdlib_qlaqz4 + + !> DLAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + + pure subroutine stdlib_qlar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc, & + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1, bn, n + integer(ilp), intent(out) :: negcnt + integer(ilp), intent(inout) :: r + real(qp), intent(in) :: gaptol, lambda, pivmin + real(qp), intent(out) :: mingma, nrminv, resid, rqcorr, ztz + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*) + real(qp), intent(in) :: d(*), l(*), ld(*), lld(*) + real(qp), intent(out) :: work(*) + real(qp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: sawnan1, sawnan2 + integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2 + real(qp) :: dminus, dplus, eps, s, tmp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + eps = stdlib_qlamch( 'PRECISION' ) + if( r==0 ) then + r1 = b1 + r2 = bn + else + r1 = r + r2 = r + end if + ! storage for lplus + indlpl = 0 + ! storage for uminus + indumn = n + inds = 2*n + 1 + indp = 3*n + 1 + if( b1==1 ) then + work( inds ) = zero + else + work( inds+b1-1 ) = lld( b1-1 ) + end if + ! compute the stationary transform (using the differential form) + ! until the index r2. + sawnan1 = .false. + neg1 = 0 + s = work( inds+b1-1 ) - lambda + do i = b1, r1 - 1 + dplus = d( i ) + s + work( indlpl+i ) = ld( i ) / dplus + if(dplus DLAR2V: applies a vector of real plane rotations from both sides to + !> a sequence of 2-by-2 real symmetric matrices, defined by the elements + !> of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) + !> ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) + + pure subroutine stdlib_qlar2v( n, x, y, z, incx, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, n + ! Array Arguments + real(qp), intent(in) :: c(*), s(*) + real(qp), intent(inout) :: x(*), y(*), z(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix + real(qp) :: ci, si, t1, t2, t3, t4, t5, t6, xi, yi, zi + ! Executable Statements + ix = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( ix ) + zi = z( ix ) + ci = c( ic ) + si = s( ic ) + t1 = si*zi + t2 = ci*zi + t3 = t2 - si*xi + t4 = t2 + si*yi + t5 = ci*xi + t1 + t6 = ci*yi - t1 + x( ix ) = ci*t5 + si*t4 + y( ix ) = ci*t6 - si*t3 + z( ix ) = ci*t4 - si*t5 + ix = ix + incx + ic = ic + incc + end do + return + end subroutine stdlib_qlar2v + + !> DLARF: applies a real elementary reflector H to a real m by n matrix + !> C, from either the left or the right. H is represented in the form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix. + + pure subroutine stdlib_qlarf( side, m, n, v, incv, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, ldc, m, n + real(qp), intent(in) :: tau + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(in) :: v(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: applyleft + integer(ilp) :: i, lastv, lastc + ! Executable Statements + applyleft = stdlib_lsame( side, 'L' ) + lastv = 0 + lastc = 0 + if( tau/=zero ) then + ! set up variables for scanning v. lastv begins pointing to the end + ! of v. + if( applyleft ) then + lastv = m + else + lastv = n + end if + if( incv>0 ) then + i = 1 + (lastv-1) * incv + else + i = 1 + end if + ! look for the last non-zero row in v. + do while( lastv>0 .and. v( i )==zero ) + lastv = lastv - 1 + i = i - incv + end do + if( applyleft ) then + ! scan for the last non-zero column in c(1:lastv,:). + lastc = stdlib_ilaqlc(lastv, n, c, ldc) + else + ! scan for the last non-zero row in c(:,1:lastv). + lastc = stdlib_ilaqlr(m, lastv, c, ldc) + end if + end if + ! note that lastc.eq.0_qp renders the blas operations null; no special + ! case is needed at this level. + if( applyleft ) then + ! form h * c + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastv,1:lastc)**t * v(1:lastv,1) + call stdlib_qgemv( 'TRANSPOSE', lastv, lastc, one, c, ldc, v, incv,zero, work, 1 & + ) + ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**t + call stdlib_qger( lastv, lastc, -tau, v, incv, work, 1, c, ldc ) + end if + else + ! form c * h + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1) + call stdlib_qgemv( 'NO TRANSPOSE', lastc, lastv, one, c, ldc,v, incv, zero, work,& + 1 ) + ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**t + call stdlib_qger( lastc, lastv, -tau, work, 1, v, incv, c, ldc ) + end if + end if + return + end subroutine stdlib_qlarf + + !> DLARFB: applies a real block reflector H or its transpose H**T to a + !> real m by n matrix C, from either the left or the right. + + pure subroutine stdlib_qlarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, ldc, & + work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(in) :: t(ldt,*), v(ldv,*) + real(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'T' + else + transt = 'N' + end if + if( stdlib_lsame( storev, 'C' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 ) (first k rows) + ! ( v2 ) + ! where v1 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v = (c1**t * v1 + c2**t * v2) (stored in work) + ! w := c1**t + do j = 1, k + call stdlib_qcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, one, v, ldv,& + work, ldwork ) + if( m>k ) then + ! w := w + c2**t * v2 + call stdlib_qgemm( 'TRANSPOSE', 'NO TRANSPOSE', n, k, m-k,one, c( k+1, 1 ),& + ldc, v( k+1, 1 ), ldv,one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_qtrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v * w**t + if( m>k ) then + ! c2 := c2 - v2 * w**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-k, n, k,-one, v( k+1, 1 )& + , ldv, work, ldwork, one,c( k+1, 1 ), ldc ) + end if + ! w := w * v1**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', n, k,one, v, ldv, & + work, ldwork ) + ! c1 := c1 - w**t + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_qcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, one, v, ldv,& + work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,one, c( 1, k+& + 1 ), ldc, v( k+1, 1 ), ldv,one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_qtrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v**t + if( n>k ) then + ! c2 := c2 - w * v2**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v( k+1, 1 ), ldv, one,c( 1, k+1 ), ldc ) + end if + ! w := w * v1**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', m, k,one, v, ldv, & + work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 ) + ! ( v2 ) (last k rows) + ! where v2 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v = (c1**t * v1 + c2**t * v2) (stored in work) + ! w := c2**t + do j = 1, k + call stdlib_qcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, one, v( m-k+& + 1, 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**t * v1 + call stdlib_qgemm( 'TRANSPOSE', 'NO TRANSPOSE', n, k, m-k,one, c, ldc, v, & + ldv, one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_qtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v * w**t + if( m>k ) then + ! c1 := c1 - v1 * w**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-k, n, k,-one, v, ldv, & + work, ldwork, one, c, ldc ) + end if + ! w := w * v2**t + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', n, k,one, v( m-k+1, & + 1 ), ldv, work, ldwork ) + ! c2 := c2 - w**t + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_qcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, one, v( n-k+& + 1, 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,one, c, ldc, & + v, ldv, one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v**t + if( n>k ) then + ! c1 := c1 - w * v1**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v, ldv, one, c, ldc ) + end if + ! w := w * v2**t + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', m, k,one, v( n-k+1, & + 1 ), ldv, work, ldwork ) + ! c2 := c2 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + else if( stdlib_lsame( storev, 'R' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 v2 ) (v1: first k columns) + ! where v1 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v**t = (c1**t * v1**t + c2**t * v2**t) (stored in work) + ! w := c1**t + do j = 1, k + call stdlib_qcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v1**t + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', n, k,one, v, ldv, & + work, ldwork ) + if( m>k ) then + ! w := w + c2**t * v2**t + call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, m-k, one,c( k+1, 1 ), & + ldc, v( 1, k+1 ), ldv, one,work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_qtrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v**t * w**t + if( m>k ) then + ! c2 := c2 - v2**t * w**t + call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE', m-k, n, k, -one,v( 1, k+1 ), & + ldv, work, ldwork, one,c( k+1, 1 ), ldc ) + end if + ! w := w * v1 + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, one, v, ldv,& + work, ldwork ) + ! c1 := c1 - w**t + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v**t = (c1*v1**t + c2*v2**t) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_qcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1**t + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', m, k,one, v, ldv, & + work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, n-k,one, c( 1, k+1 ),& + ldc, v( 1, k+1 ), ldv,one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_qtrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c2 := c2 - w * v2 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v( 1, k+1 ), ldv, one,c( 1, k+1 ), ldc ) + end if + ! w := w * v1 + call stdlib_qtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, one, v, ldv,& + work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 v2 ) (v2: last k columns) + ! where v2 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v**t = (c1**t * v1**t + c2**t * v2**t) (stored in work) + ! w := c2**t + do j = 1, k + call stdlib_qcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v2**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', n, k,one, v( 1, m-k+& + 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**t * v1**t + call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, m-k, one,c, ldc, v, ldv,& + one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_qtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v**t * w**t + if( m>k ) then + ! c1 := c1 - v1**t * w**t + call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE', m-k, n, k, -one,v, ldv, work, & + ldwork, one, c, ldc ) + end if + ! w := w * v2 + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, one, v( 1, & + m-k+1 ), ldv, work, ldwork ) + ! c2 := c2 - w**t + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h' where c = ( c1 c2 ) + ! w := c * v**t = (c1*v1**t + c2*v2**t) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_qcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', m, k,one, v( 1, n-k+& + 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1**t + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, n-k,one, c, ldc, v, & + ldv, one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c1 := c1 - w * v1 + call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v, ldv, one, c, ldc ) + end if + ! w := w * v2 + call stdlib_qtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, one, v( 1, & + n-k+1 ), ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + end if + return + end subroutine stdlib_qlarfb + + !> DLARFB_GETT: applies a real Householder block reflector H from the + !> left to a real (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + + pure subroutine stdlib_qlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ident + integer(ilp), intent(in) :: k, lda, ldb, ldt, ldwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(in) :: t(ldt,*) + real(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnotident + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<0 .or. n<=0 .or. k==0 .or. k>n )return + lnotident = .not.stdlib_lsame( ident, 'I' ) + ! ------------------------------------------------------------------ + ! first step. computation of the column block 2: + ! ( a2 ) := h * ( a2 ) + ! ( b2 ) ( b2 ) + ! ------------------------------------------------------------------ + if( n>k ) then + ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n) + ! into w2=work(1:k, 1:n-k) column-by-column. + do j = 1, n-k + call stdlib_qcopy( k, a( 1, k+j ), 1, work( 1, j ), 1 ) + end do + if( lnotident ) then + ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2, + ! v1 is not an identy matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_qtrmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork ) + end if + ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2 + ! v2 stored in b1. + if( m>0 ) then + call stdlib_qgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1, k+1 ), ldb, one, work, & + ldwork ) + end if + ! col2_(4) compute w2: = t * w2, + ! t is upper-triangular. + call stdlib_qtrmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork ) + ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2, + ! v2 stored in b1. + if( m>0 ) then + call stdlib_qgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1, k+& + 1 ), ldb ) + end if + if( lnotident ) then + ! col2_(6) compute w2: = v1 * w2 = a1 * w2, + ! v1 is not an identity matrix, but unit lower-triangular, + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_qtrmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork ) + end if + ! col2_(7) compute a2: = a2 - w2 = + ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k), + ! column-by-column. + do j = 1, n-k + do i = 1, k + a( i, k+j ) = a( i, k+j ) - work( i, j ) + end do + end do + end if + ! ------------------------------------------------------------------ + ! second step. computation of the column block 1: + ! ( a1 ) := h * ( a1 ) + ! ( b1 ) ( 0 ) + ! ------------------------------------------------------------------ + ! col1_(1) compute w1: = a1. copy the upper-triangular + ! a1 = a(1:k, 1:k) into the upper-triangular + ! w1 = work(1:k, 1:k) column-by-column. + do j = 1, k + call stdlib_qcopy( j, a( 1, j ), 1, work( 1, j ), 1 ) + end do + ! set the subdiagonal elements of w1 to zero column-by-column. + do j = 1, k - 1 + do i = j + 1, k + work( i, j ) = zero + end do + end do + if( lnotident ) then + ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_qtrmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork ) + end if + ! col1_(3) compute w1: = t * w1, + ! t is upper-triangular, + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_qtrmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork ) + ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1, + ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal. + if( m>0 ) then + call stdlib_qtrmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb ) + end if + if( lnotident ) then + ! col1_(5) compute w1: = v1 * w1 = a1 * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular on input with zeroes below the diagonal, + ! and square on output. + call stdlib_qtrmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork ) + ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k) + ! column-by-column. a1 is upper-triangular on input. + ! if ident, a1 is square on output, and w1 is square, + ! if not ident, a1 is upper-triangular on output, + ! w1 is upper-triangular. + ! col1_(6)_a compute elements of a1 below the diagonal. + do j = 1, k - 1 + do i = j + 1, k + a( i, j ) = - work( i, j ) + end do + end do + end if + ! col1_(6)_b compute elements of a1 on and above the diagonal. + do j = 1, k + do i = 1, j + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + return + end subroutine stdlib_qlarfb_gett + + !> DLARFG: generates a real elementary reflector H of order n, such + !> that + !> H * ( alpha ) = ( beta ), H**T * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, and x is an (n-1)-element real + !> vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**T ) , + !> ( v ) + !> where tau is a real scalar and v is a real (n-1)-element + !> vector. + !> If the elements of x are all zero, then tau = 0 and H is taken to be + !> the unit matrix. + !> Otherwise 1 <= tau <= 2. + + pure subroutine stdlib_qlarfg( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(qp), intent(inout) :: alpha + real(qp), intent(out) :: tau + ! Array Arguments + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(qp) :: beta, rsafmn, safmin, xnorm + ! Intrinsic Functions + intrinsic :: abs,sign + ! Executable Statements + if( n<=1 ) then + tau = zero + return + end if + xnorm = stdlib_qnrm2( n-1, x, incx ) + if( xnorm==zero ) then + ! h = i + tau = zero + else + ! general case + beta = -sign( stdlib_qlapy2( alpha, xnorm ), alpha ) + safmin = stdlib_qlamch( 'S' ) / stdlib_qlamch( 'E' ) + knt = 0 + if( abs( beta ) DLARFGP: generates a real elementary reflector H of order n, such + !> that + !> H * ( alpha ) = ( beta ), H**T * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is non-negative, and x is + !> an (n-1)-element real vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**T ) , + !> ( v ) + !> where tau is a real scalar and v is a real (n-1)-element + !> vector. + !> If the elements of x are all zero, then tau = 0 and H is taken to be + !> the unit matrix. + + subroutine stdlib_qlarfgp( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(qp), intent(inout) :: alpha + real(qp), intent(out) :: tau + ! Array Arguments + real(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(qp) :: beta, bignum, savealpha, smlnum, xnorm + ! Intrinsic Functions + intrinsic :: abs,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_qnrm2( n-1, x, incx ) + if( xnorm==zero ) then + ! h = [+/-1, 0; i], sign chosen so alpha >= 0 + if( alpha>=zero ) then + ! when tau.eq.zero, the vector is special-cased to be + ! all zeros in the application routines. we do not need + ! to clear it. + tau = zero + else + ! however, the application routines rely on explicit + ! zero checks when tau.ne.zero, and we must clear x. + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = 0 + end do + alpha = -alpha + end if + else + ! general case + beta = sign( stdlib_qlapy2( alpha, xnorm ), alpha ) + smlnum = stdlib_qlamch( 'S' ) / stdlib_qlamch( 'E' ) + knt = 0 + if( abs( beta )=zero ) then + tau = zero + else + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = 0 + end do + beta = -savealpha + end if + else + ! this is the general case. + call stdlib_qscal( n-1, one / alpha, x, incx ) + end if + ! if beta is subnormal, it may lose relative accuracy + do j = 1, knt + beta = beta*smlnum + end do + alpha = beta + end if + return + end subroutine stdlib_qlarfgp + + !> DLARFT: forms the triangular factor T of a real block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**T + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**T * T * V + + pure subroutine stdlib_qlarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + real(qp), intent(out) :: t(ldt,*) + real(qp), intent(in) :: tau(*), v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, prevlastv, lastv + ! Executable Statements + ! quick return if possible + if( n==0 )return + if( stdlib_lsame( direct, 'F' ) ) then + prevlastv = n + do i = 1, k + prevlastv = max( i, prevlastv ) + if( tau( i )==zero ) then + ! h(i) = i + do j = 1, i + t( j, i ) = zero + end do + else + ! general case + if( stdlib_lsame( storev, 'C' ) ) then + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( lastv, i )/=zero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( i , j ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**t * v(i:j,i) + call stdlib_qgemv( 'TRANSPOSE', j-i, i-1, -tau( i ),v( i+1, 1 ), ldv, v( i+& + 1, i ), 1, one,t( 1, i ), 1 ) + else + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( i, lastv )/=zero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( j , i ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**t + call stdlib_qgemv( 'NO TRANSPOSE', i-1, j-i, -tau( i ),v( 1, i+1 ), ldv, v(& + i, i+1 ), ldv, one,t( 1, i ), 1 ) + end if + ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i) + call stdlib_qtrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', i-1, t,ldt, t( 1, i ),& + 1 ) + t( i, i ) = tau( i ) + if( i>1 ) then + prevlastv = max( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + end do + else + prevlastv = 1 + do i = k, 1, -1 + if( tau( i )==zero ) then + ! h(i) = i + do j = i, k + t( j, i ) = zero + end do + else + ! general case + if( i1 ) then + prevlastv = min( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + t( i, i ) = tau( i ) + end if + end do + end if + return + end subroutine stdlib_qlarft + + !> DLARFX: applies a real elementary reflector H to a real m by n + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix + !> This version uses inline code if H has order < 11. + + pure subroutine stdlib_qlarfx( side, m, n, v, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: ldc, m, n + real(qp), intent(in) :: tau + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(in) :: v(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j + real(qp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, v6, & + v7, v8, v9 + ! Executable Statements + if( tau==zero )return + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c, where h has order m. + go to ( 10, 30, 50, 70, 90, 110, 130, 150,170, 190 )m + ! code for general m + call stdlib_qlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 10 continue + ! special code for 1 x 1 householder + t1 = one - tau*v( 1 )*v( 1 ) + do j = 1, n + c( 1, j ) = t1*c( 1, j ) + end do + go to 410 + 30 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + end do + go to 410 + 50 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + end do + go to 410 + 70 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + end do + go to 410 + 90 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + end do + go to 410 + 110 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + end do + go to 410 + 130 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + end do + go to 410 + 150 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + end do + go to 410 + 170 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + end do + go to 410 + 190 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + v10 = v( 10 ) + t10 = tau*v10 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) +v10*c( 10, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + c( 10, j ) = c( 10, j ) - sum*t10 + end do + go to 410 + else + ! form c * h, where h has order n. + go to ( 210, 230, 250, 270, 290, 310, 330, 350,370, 390 )n + ! code for general n + call stdlib_qlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 210 continue + ! special code for 1 x 1 householder + t1 = one - tau*v( 1 )*v( 1 ) + do j = 1, m + c( j, 1 ) = t1*c( j, 1 ) + end do + go to 410 + 230 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + end do + go to 410 + 250 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + end do + go to 410 + 270 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + end do + go to 410 + 290 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + end do + go to 410 + 310 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + end do + go to 410 + 330 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + end do + go to 410 + 350 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + end do + go to 410 + 370 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + end do + go to 410 + 390 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + v10 = v( 10 ) + t10 = tau*v10 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) +v10*c( j, 10 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + c( j, 10 ) = c( j, 10 ) - sum*t10 + end do + go to 410 + end if + 410 continue + return + end subroutine stdlib_qlarfx + + !> DLARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n symmetric matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + + pure subroutine stdlib_qlarfy( uplo, n, v, incv, tau, c, ldc, work ) + ! -- lapack test routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv, ldc, n + real(qp), intent(in) :: tau + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(in) :: v(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: alpha + ! Executable Statements + if( tau==zero )return + ! form w:= c * v + call stdlib_qsymv( uplo, n, one, c, ldc, v, incv, zero, work, 1 ) + alpha = -half*tau*stdlib_qdot( n, work, 1, v, incv ) + call stdlib_qaxpy( n, alpha, v, incv, work, 1 ) + ! c := c - v * w' - w * v' + call stdlib_qsyr2( uplo, n, -tau, v, incv, work, 1, c, ldc ) + return + end subroutine stdlib_qlarfy + + !> DLARGV: generates a vector of real plane rotations, determined by + !> elements of the real vectors x and y. For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( a(i) ) + !> ( -s(i) c(i) ) ( y(i) ) = ( 0 ) + + pure subroutine stdlib_qlargv( n, x, incx, y, incy, c, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(qp), intent(out) :: c(*) + real(qp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + real(qp) :: f, g, t, tt + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + loop_10: do i = 1, n + f = x( ix ) + g = y( iy ) + if( g==zero ) then + c( ic ) = one + else if( f==zero ) then + c( ic ) = zero + y( iy ) = one + x( ix ) = g + else if( abs( f )>abs( g ) ) then + t = g / f + tt = sqrt( one+t*t ) + c( ic ) = one / tt + y( iy ) = t*c( ic ) + x( ix ) = f*tt + else + t = f / g + tt = sqrt( one+t*t ) + y( iy ) = one / tt + c( ic ) = t*y( iy ) + x( ix ) = g*tt + end if + ic = ic + incc + iy = iy + incy + ix = ix + incx + end do loop_10 + return + end subroutine stdlib_qlargv + + !> DLARNV: returns a vector of n random real numbers from a uniform or + !> normal distribution. + + pure subroutine stdlib_qlarnv( idist, iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: idist, n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + real(qp), intent(out) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + real(qp), parameter :: twopi = 6.28318530717958647692528676655900576839e+0_qp + + + + ! Local Scalars + integer(ilp) :: i, il, il2, iv + ! Local Arrays + real(qp) :: u(lv) + ! Intrinsic Functions + intrinsic :: cos,log,min,sqrt + ! Executable Statements + do 40 iv = 1, n, lv / 2 + il = min( lv / 2, n-iv+1 ) + if( idist==3 ) then + il2 = 2*il + else + il2 = il + end if + ! call stdlib_qlaruv to generate il2 numbers from a uniform (0,1) + ! distribution (il2 <= lv) + call stdlib_qlaruv( iseed, il2, u ) + if( idist==1 ) then + ! copy generated numbers + do i = 1, il + x( iv+i-1 ) = u( i ) + end do + else if( idist==2 ) then + ! convert generated numbers to uniform (-1,1) distribution + do i = 1, il + x( iv+i-1 ) = two*u( i ) - one + end do + else if( idist==3 ) then + ! convert generated numbers to normal (0,1) distribution + do i = 1, il + x( iv+i-1 ) = sqrt( -two*log( u( 2*i-1 ) ) )*cos( twopi*u( 2*i ) ) + end do + end if + 40 continue + return + end subroutine stdlib_qlarnv + + !> Compute the splitting points with threshold SPLTOL. + !> DLARRA: sets any "small" off-diagonal elements to zero. + + pure subroutine stdlib_qlarra( n, d, e, e2, spltol, tnrm,nsplit, isplit, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, nsplit + integer(ilp), intent(in) :: n + real(qp), intent(in) :: spltol, tnrm + ! Array Arguments + integer(ilp), intent(out) :: isplit(*) + real(qp), intent(in) :: d(*) + real(qp), intent(inout) :: e(*), e2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: eabs, tmp1 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! compute splitting points + nsplit = 1 + if(spltol Given the relatively robust representation(RRR) L D L^T, DLARRB: + !> does "limited" bisection to refine the eigenvalues of L D L^T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses and their gaps are input in WERR + !> and WGAP, respectively. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + + pure subroutine stdlib_qlarrb( n, d, lld, ifirst, ilast, rtol1,rtol2, offset, w, wgap, werr, & + work, iwork,pivmin, spdiam, twist, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ifirst, ilast, n, offset, twist + integer(ilp), intent(out) :: info + real(qp), intent(in) :: pivmin, rtol1, rtol2, spdiam + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: d(*), lld(*) + real(qp), intent(inout) :: w(*), werr(*), wgap(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + integer(ilp) :: maxitr + ! Local Scalars + integer(ilp) :: i, i1, ii, ip, iter, k, negcnt, next, nint, olnint, prev, r + real(qp) :: back, cvrgd, gap, left, lgap, mid, mnwdth, rgap, right, tmp, width + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + maxitr = int( ( log( spdiam+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + mnwdth = two * pivmin + r = twist + if((r<1).or.(r>n)) r = n + ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ]. + ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while + ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 ) + ! for an unconverged interval is set to the index of the next unconverged + ! interval, and is -1 or 0 for a converged interval. thus a linked + ! list of unconverged intervals is set up. + i1 = ifirst + ! the number of unconverged intervals + nint = 0 + ! the last unconverged interval found + prev = 0 + rgap = wgap( i1-offset ) + loop_75: do i = i1, ilast + k = 2*i + ii = i - offset + left = w( ii ) - werr( ii ) + right = w( ii ) + werr( ii ) + lgap = rgap + rgap = wgap( ii ) + gap = min( lgap, rgap ) + ! make sure that [left,right] contains the desired eigenvalue + ! compute negcount from dstqds facto l+d+l+^t = l d l^t - left + ! do while( negcnt(left)>i-1 ) + back = werr( ii ) + 20 continue + negcnt = stdlib_qlaneg( n, d, lld, left, pivmin, r ) + if( negcnt>i-1 ) then + left = left - back + back = two*back + go to 20 + end if + ! do while( negcnt(right)=i1).and.(i<=ilast)) iwork( 2*prev-1 ) = i + 1 + else + ! unconverged interval found + prev = i + nint = nint + 1 + iwork( k-1 ) = i + 1 + iwork( k ) = negcnt + end if + work( k-1 ) = left + work( k ) = right + end do loop_75 + ! do while( nint>0 ), i.e. there are still unconverged intervals + ! and while (iter1) lgap = wgap( ii-1 ) + gap = min( lgap, rgap ) + next = iwork( k-1 ) + left = work( k-1 ) + right = work( k ) + mid = half*( left + right ) + ! semiwidth of interval + width = right - mid + tmp = max( abs( left ), abs( right ) ) + cvrgd = max(rtol1*gap,rtol2*tmp) + if( ( width<=cvrgd ) .or. ( width<=mnwdth ).or.( iter==maxitr ) )then + ! reduce number of unconverged intervals + nint = nint - 1 + ! mark interval as converged. + iwork( k-1 ) = 0 + if( i1==i ) then + i1 = next + else + ! prev holds the last unconverged interval previously examined + if(prev>=i1) iwork( 2*prev-1 ) = next + end if + i = next + cycle loop_100 + end if + prev = i + ! perform one bisection step + negcnt = stdlib_qlaneg( n, d, lld, mid, pivmin, r ) + if( negcnt<=i-1 ) then + work( k-1 ) = mid + else + work( k ) = mid + end if + i = next + end do loop_100 + iter = iter + 1 + ! do another loop if there are still unconverged intervals + ! however, in the last iteration, all intervals are accepted + ! since this is the best we can do. + if( ( nint>0 ).and.(iter<=maxitr) ) go to 80 + ! at this point, all the intervals have converged + do i = ifirst, ilast + k = 2*i + ii = i - offset + ! all intervals marked by '0' have been refined. + if( iwork( k-1 )==0 ) then + w( ii ) = half*( work( k-1 )+work( k ) ) + werr( ii ) = work( k ) - w( ii ) + end if + end do + do i = ifirst+1, ilast + k = 2*i + ii = i - offset + wgap( ii-1 ) = max( zero,w(ii) - werr (ii) - w( ii-1 ) - werr( ii-1 )) + end do + return + end subroutine stdlib_qlarrb + + !> Find the number of eigenvalues of the symmetric tridiagonal matrix T + !> that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T + !> if JOBT = 'L'. + + pure subroutine stdlib_qlarrc( jobt, n, vl, vu, d, e, pivmin,eigcnt, lcnt, rcnt, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobt + integer(ilp), intent(out) :: eigcnt, info, lcnt, rcnt + integer(ilp), intent(in) :: n + real(qp), intent(in) :: pivmin, vl, vu + ! Array Arguments + real(qp), intent(in) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + logical(lk) :: matt + real(qp) :: lpivot, rpivot, sl, su, tmp, tmp2 + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + lcnt = 0 + rcnt = 0 + eigcnt = 0 + matt = stdlib_lsame( jobt, 'T' ) + if (matt) then + ! sturm sequence count on t + lpivot = d( 1 ) - vl + rpivot = d( 1 ) - vu + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + do i = 1, n-1 + tmp = e(i)**2 + lpivot = ( d( i+1 )-vl ) - tmp/lpivot + rpivot = ( d( i+1 )-vu ) - tmp/rpivot + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + end do + else + ! sturm sequence count on l d l^t + sl = -vl + su = -vu + do i = 1, n - 1 + lpivot = d( i ) + sl + rpivot = d( i ) + su + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + tmp = e(i) * d(i) * e(i) + tmp2 = tmp / lpivot + if( tmp2==zero ) then + sl = tmp - vl + else + sl = sl*tmp2 - vl + end if + tmp2 = tmp / rpivot + if( tmp2==zero ) then + su = tmp - vu + else + su = su*tmp2 - vu + end if + end do + lpivot = d( n ) + sl + rpivot = d( n ) + su + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + endif + eigcnt = rcnt - lcnt + return + end subroutine stdlib_qlarrc + + !> DLARRD: computes the eigenvalues of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from DSTEMR. + !> The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_qlarrd( range, order, n, vl, vu, il, iu, gers,reltol, d, e, e2, & + pivmin, nsplit, isplit,m, w, werr, wl, wu, iblock, indexw,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: order, range + integer(ilp), intent(in) :: il, iu, n, nsplit + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: pivmin, reltol, vl, vu + real(qp), intent(out) :: wl, wu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), indexw(*), iwork(*) + integer(ilp), intent(in) :: isplit(*) + real(qp), intent(in) :: d(*), e(*), e2(*), gers(*) + real(qp), intent(out) :: w(*), werr(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: fudge = two + integer(ilp), parameter :: allrng = 1 + integer(ilp), parameter :: valrng = 2 + integer(ilp), parameter :: indrng = 3 + + + ! Local Scalars + logical(lk) :: ncnvrg, toofew + integer(ilp) :: i, ib, ibegin, idiscl, idiscu, ie, iend, iinfo, im, in, ioff, iout, & + irange, itmax, itmp1, itmp2, iw, iwoff, j, jblk, jdisc, je, jee, nb, nwl, nwu + real(qp) :: atoli, eps, gl, gu, rtoli, tmp1, tmp2, tnorm, uflow, wkill, wlu, & + wul + ! Local Arrays + integer(ilp) :: idumma(1) + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = allrng + else if( stdlib_lsame( range, 'V' ) ) then + irange = valrng + else if( stdlib_lsame( range, 'I' ) ) then + irange = indrng + else + irange = 0 + end if + ! check for errors + if( irange<=0 ) then + info = -1 + else if( .not.(stdlib_lsame(order,'B').or.stdlib_lsame(order,'E')) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( irange==valrng ) then + if( vl>=vu )info = -5 + else if( irange==indrng .and.( il<1 .or. il>max( 1, n ) ) ) then + info = -6 + else if( irange==indrng .and.( iun ) ) then + info = -7 + end if + if( info/=0 ) then + return + end if + ! initialize error flags + info = 0 + ncnvrg = .false. + toofew = .false. + ! quick return if possible + m = 0 + if( n==0 ) return + ! simplification: + if( irange==indrng .and. il==1 .and. iu==n ) irange = 1 + ! get machine constants + eps = stdlib_qlamch( 'P' ) + uflow = stdlib_qlamch( 'U' ) + ! special case when n=1 + ! treat case of 1x1 matrix for quick return + if( n==1 ) then + if( (irange==allrng).or.((irange==valrng).and.(d(1)>vl).and.(d(1)<=vu)).or.((& + irange==indrng).and.(il==1).and.(iu==1)) ) then + m = 1 + w(1) = d(1) + ! the computation error of the eigenvalue is zero + werr(1) = zero + iblock( 1 ) = 1 + indexw( 1 ) = 1 + endif + return + end if + ! nb is the minimum vector length for vector bisection, or 0 + ! if only scalar is to be done. + nb = stdlib_ilaenv( 1, 'DSTEBZ', ' ', n, -1, -1, -1 ) + if( nb<=1 ) nb = 0 + ! find global spectral radius + gl = d(1) + gu = d(1) + do i = 1,n + gl = min( gl, gers( 2*i - 1)) + gu = max( gu, gers(2*i) ) + end do + ! compute global gerschgorin bounds and spectral diameter + tnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*tnorm*eps*n - fudge*two*pivmin + gu = gu + fudge*tnorm*eps*n + fudge*two*pivmin + ! [jan/28/2009] remove the line below since spdiam variable not use + ! spdiam = gu - gl + ! input arguments for stdlib_qlaebz: + ! the relative tolerance. an interval (a,b] lies within + ! "relative tolerance" if b-a < reltol*max(|a|,|b|), + rtoli = reltol + ! set the absolute tolerance for interval convergence to zero to force + ! interval convergence based on relative size of the interval. + ! this is dangerous because intervals might not converge when reltol is + ! small. but at least a very small number should be selected so that for + ! strongly graded matrices, the code can get relatively accurate + ! eigenvalues. + atoli = fudge*two*uflow + fudge*two*pivmin + if( irange==indrng ) then + ! range='i': compute an interval containing eigenvalues + ! il through iu. the initial interval [gl,gu] from the global + ! gerschgorin bounds gl and gu is refined by stdlib_qlaebz. + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + work( n+1 ) = gl + work( n+2 ) = gl + work( n+3 ) = gu + work( n+4 ) = gu + work( n+5 ) = gl + work( n+6 ) = gu + iwork( 1 ) = -1 + iwork( 2 ) = -1 + iwork( 3 ) = n + 1 + iwork( 4 ) = n + 1 + iwork( 5 ) = il - 1 + iwork( 6 ) = iu + call stdlib_qlaebz( 3, itmax, n, 2, 2, nb, atoli, rtoli, pivmin,d, e, e2, iwork( 5 )& + , work( n+1 ), work( n+5 ), iout,iwork, w, iblock, iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + ! on exit, output intervals may not be ordered by ascending negcount + if( iwork( 6 )==iu ) then + wl = work( n+1 ) + wlu = work( n+3 ) + nwl = iwork( 1 ) + wu = work( n+4 ) + wul = work( n+2 ) + nwu = iwork( 4 ) + else + wl = work( n+2 ) + wlu = work( n+4 ) + nwl = iwork( 2 ) + wu = work( n+3 ) + wul = work( n+1 ) + nwu = iwork( 3 ) + end if + ! on exit, the interval [wl, wlu] contains a value with negcount nwl, + ! and [wul, wu] contains a value with negcount nwu. + if( nwl<0 .or. nwl>=n .or. nwu<1 .or. nwu>n ) then + info = 4 + return + end if + elseif( irange==valrng ) then + wl = vl + wu = vu + elseif( irange==allrng ) then + wl = gl + wu = gu + endif + ! find eigenvalues -- loop over blocks and recompute nwl and nwu. + ! nwl accumulates the number of eigenvalues .le. wl, + ! nwu accumulates the number of eigenvalues .le. wu + m = 0 + iend = 0 + info = 0 + nwl = 0 + nwu = 0 + loop_70: do jblk = 1, nsplit + ioff = iend + ibegin = ioff + 1 + iend = isplit( jblk ) + in = iend - ioff + if( in==1 ) then + ! 1x1 block + if( wl>=d( ibegin )-pivmin )nwl = nwl + 1 + if( wu>=d( ibegin )-pivmin )nwu = nwu + 1 + if( irange==allrng .or.( wl= d( ibegin )-pivmin ) ) & + then + m = m + 1 + w( m ) = d( ibegin ) + werr(m) = zero + ! the gap for a single block doesn't matter for the later + ! algorithm and is assigned an arbitrary large value + iblock( m ) = jblk + indexw( m ) = 1 + end if + ! disabled 2x2 case because of a failure on the following matrix + ! range = 'i', il = iu = 4 + ! original tridiagonal, d = [ + ! -0.150102010615740e+00_qp + ! -0.849897989384260e+00_qp + ! -0.128208148052635e-15_qp + ! 0.128257718286320e-15_qp + ! ]; + ! e = [ + ! -0.357171383266986e+00_qp + ! -0.180411241501588e-15_qp + ! -0.175152352710251e-15_qp + ! ]; + ! else if( in==2 ) then + ! * 2x2 block + ! disc = sqrt( (half*(d(ibegin)-d(iend)))**2 + e(ibegin)**2 ) + ! tmp1 = half*(d(ibegin)+d(iend)) + ! l1 = tmp1 - disc + ! if( wl>= l1-pivmin ) + ! $ nwl = nwl + 1 + ! if( wu>= l1-pivmin ) + ! $ nwu = nwu + 1 + ! if( irange==allrng .or. ( wl= + ! $ l1-pivmin ) ) then + ! m = m + 1 + ! w( m ) = l1 + ! * the uncertainty of eigenvalues of a 2x2 matrix is very small + ! werr( m ) = eps * abs( w( m ) ) * two + ! iblock( m ) = jblk + ! indexw( m ) = 1 + ! endif + ! l2 = tmp1 + disc + ! if( wl>= l2-pivmin ) + ! $ nwl = nwl + 1 + ! if( wu>= l2-pivmin ) + ! $ nwu = nwu + 1 + ! if( irange==allrng .or. ( wl= + ! $ l2-pivmin ) ) then + ! m = m + 1 + ! w( m ) = l2 + ! * the uncertainty of eigenvalues of a 2x2 matrix is very small + ! werr( m ) = eps * abs( w( m ) ) * two + ! iblock( m ) = jblk + ! indexw( m ) = 2 + ! endif + else + ! general case - block of size in >= 2 + ! compute local gerschgorin interval and use it as the initial + ! interval for stdlib_qlaebz + gu = d( ibegin ) + gl = d( ibegin ) + tmp1 = zero + do j = ibegin, iend + gl = min( gl, gers( 2*j - 1)) + gu = max( gu, gers(2*j) ) + end do + ! [jan/28/2009] + ! change spdiam by tnorm in lines 2 and 3 thereafter + ! line 1: remove computation of spdiam (not useful anymore) + ! spdiam = gu - gl + ! gl = gl - fudge*spdiam*eps*in - fudge*pivmin + ! gu = gu + fudge*spdiam*eps*in + fudge*pivmin + gl = gl - fudge*tnorm*eps*in - fudge*pivmin + gu = gu + fudge*tnorm*eps*in + fudge*pivmin + if( irange>1 ) then + if( gu=gu )cycle loop_70 + end if + ! find negcount of initial interval boundaries gl and gu + work( n+1 ) = gl + work( n+in+1 ) = gu + call stdlib_qlaebz( 1, 0, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e( & + ibegin ), e2( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), im,iwork, w( m+1 ),& + iblock( m+1 ), iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + nwl = nwl + iwork( 1 ) + nwu = nwu + iwork( in+1 ) + iwoff = m - iwork( 1 ) + ! compute eigenvalues + itmax = int( ( log( gu-gl+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + & + 2 + call stdlib_qlaebz( 2, itmax, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e(& + ibegin ), e2( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), iout,iwork, w( m+& + 1 ), iblock( m+1 ), iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + ! copy eigenvalues into w and iblock + ! use -jblk for block number for unconverged eigenvalues. + ! loop over the number of output intervals from stdlib_qlaebz + do j = 1, iout + ! eigenvalue approximation is middle point of interval + tmp1 = half*( work( j+n )+work( j+in+n ) ) + ! semi length of error interval + tmp2 = half*abs( work( j+n )-work( j+in+n ) ) + if( j>iout-iinfo ) then + ! flag non-convergence. + ncnvrg = .true. + ib = -jblk + else + ib = jblk + end if + do je = iwork( j ) + 1 + iwoff,iwork( j+in ) + iwoff + w( je ) = tmp1 + werr( je ) = tmp2 + indexw( je ) = je - iwoff + iblock( je ) = ib + end do + end do + m = m + im + end if + end do loop_70 + ! if range='i', then (wl,wu) contains eigenvalues nwl+1,...,nwu + ! if nwl+1 < il or nwu > iu, discard extra eigenvalues. + if( irange==indrng ) then + idiscl = il - 1 - nwl + idiscu = nwu - iu + if( idiscl>0 ) then + im = 0 + do je = 1, m + ! remove some of the smallest eigenvalues from the left so that + ! at the end idiscl =0. move all eigenvalues up to the left. + if( w( je )<=wlu .and. idiscl>0 ) then + idiscl = idiscl - 1 + else + im = im + 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscu>0 ) then + ! remove some of the largest eigenvalues from the right so that + ! at the end idiscu =0. move all eigenvalues up to the left. + im=m+1 + do je = m, 1, -1 + if( w( je )>=wul .and. idiscu>0 ) then + idiscu = idiscu - 1 + else + im = im - 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + jee = 0 + do je = im, m + jee = jee + 1 + w( jee ) = w( je ) + werr( jee ) = werr( je ) + indexw( jee ) = indexw( je ) + iblock( jee ) = iblock( je ) + end do + m = m-im+1 + end if + if( idiscl>0 .or. idiscu>0 ) then + ! code to deal with effects of bad arithmetic. (if n(w) is + ! monotone non-decreasing, this should never happen.) + ! some low eigenvalues to be discarded are not in (wl,wlu], + ! or high eigenvalues to be discarded are not in (wul,wu] + ! so just kill off the smallest idiscl/largest idiscu + ! eigenvalues, by marking the corresponding iblock = 0 + if( idiscl>0 ) then + wkill = wu + do jdisc = 1, idiscl + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )0 ) then + wkill = wl + do jdisc = 1, idiscu + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )>=wkill .or. iw==0 ) ) then + iw = je + wkill = w( je ) + end if + end do + iblock( iw ) = 0 + end do + end if + ! now erase all eigenvalues with iblock set to zero + im = 0 + do je = 1, m + if( iblock( je )/=0 ) then + im = im + 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl<0 .or. idiscu<0 ) then + toofew = .true. + end if + end if + if(( irange==allrng .and. m/=n ).or.( irange==indrng .and. m/=iu-il+1 ) ) then + toofew = .true. + end if + ! if order='b', do nothing the eigenvalues are already sorted by + ! block. + ! if order='e', sort the eigenvalues from smallest to largest + if( stdlib_lsame(order,'E') .and. nsplit>1 ) then + do je = 1, m - 1 + ie = 0 + tmp1 = w( je ) + do j = je + 1, m + if( w( j ) To find the desired eigenvalues of a given real symmetric + !> tridiagonal matrix T, DLARRE: sets any "small" off-diagonal + !> elements to zero, and for each unreduced block T_i, it finds + !> (a) a suitable shift at one end of the block's spectrum, + !> (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and + !> (c) eigenvalues of each L_i D_i L_i^T. + !> The representations and eigenvalues found are then used by + !> DSTEMR to compute the eigenvectors of T. + !> The accuracy varies depending on whether bisection is used to + !> find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to + !> conpute all and then discard any unwanted one. + !> As an added benefit, DLARRE also outputs the n + !> Gerschgorin intervals for the matrices L_i D_i L_i^T. + + pure subroutine stdlib_qlarre( range, n, vl, vu, il, iu, d, e, e2,rtol1, rtol2, spltol, & + nsplit, isplit, m,w, werr, wgap, iblock, indexw, gers, pivmin,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: range + integer(ilp), intent(in) :: il, iu, n + integer(ilp), intent(out) :: info, m, nsplit + real(qp), intent(out) :: pivmin + real(qp), intent(in) :: rtol1, rtol2, spltol + real(qp), intent(inout) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), isplit(*), iwork(*), indexw(*) + real(qp), intent(inout) :: d(*), e(*), e2(*) + real(qp), intent(out) :: gers(*), w(*), werr(*), wgap(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: hndrd = 100.0_qp + real(qp), parameter :: pert = 8.0_qp + real(qp), parameter :: fourth = one/four + real(qp), parameter :: fac = half + real(qp), parameter :: maxgrowth = 64.0_qp + real(qp), parameter :: fudge = 2.0_qp + integer(ilp), parameter :: maxtry = 6 + integer(ilp), parameter :: allrng = 1 + integer(ilp), parameter :: indrng = 2 + integer(ilp), parameter :: valrng = 3 + + + ! Local Scalars + logical(lk) :: forceb, norep, usedqd + integer(ilp) :: cnt, cnt1, cnt2, i, ibegin, idum, iend, iinfo, in, indl, indu, irange, & + j, jblk, mb, mm, wbegin, wend + real(qp) :: avgap, bsrtol, clwdth, dmax, dpivot, eabs, emax, eold, eps, gl, gu, isleft,& + isrght, rtl, rtol, s1, s2, safmin, sgndef, sigma, spdiam, tau, tmp, tmp1 + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = allrng + else if( stdlib_lsame( range, 'V' ) ) then + irange = valrng + else if( stdlib_lsame( range, 'I' ) ) then + irange = indrng + end if + m = 0 + ! get machine constants + safmin = stdlib_qlamch( 'S' ) + eps = stdlib_qlamch( 'P' ) + ! set parameters + rtl = sqrt(eps) + bsrtol = sqrt(eps) + ! treat case of 1x1 matrix for quick return + if( n==1 ) then + if( (irange==allrng).or.((irange==valrng).and.(d(1)>vl).and.(d(1)<=vu)).or.((& + irange==indrng).and.(il==1).and.(iu==1)) ) then + m = 1 + w(1) = d(1) + ! the computation error of the eigenvalue is zero + werr(1) = zero + wgap(1) = zero + iblock( 1 ) = 1 + indexw( 1 ) = 1 + gers(1) = d( 1 ) + gers(2) = d( 1 ) + endif + ! store the shift for the initial rrr, which is zero in this case + e(1) = zero + return + end if + ! general case: tridiagonal matrix of order > 1 + ! init werr, wgap. compute gerschgorin intervals and spectral diameter. + ! compute maximum off-diagonal entry and pivmin. + gl = d(1) + gu = d(1) + eold = zero + emax = zero + e(n) = zero + do i = 1,n + werr(i) = zero + wgap(i) = zero + eabs = abs( e(i) ) + if( eabs >= emax ) then + emax = eabs + end if + tmp1 = eabs + eold + gers( 2*i-1) = d(i) - tmp1 + gl = min( gl, gers( 2*i - 1)) + gers( 2*i ) = d(i) + tmp1 + gu = max( gu, gers(2*i) ) + eold = eabs + end do + ! the minimum pivot allowed in the sturm sequence for t + pivmin = safmin * max( one, emax**2 ) + ! compute spectral diameter. the gerschgorin bounds give an + ! estimate that is wrong by at most a factor of sqrt(2) + spdiam = gu - gl + ! compute splitting points + call stdlib_qlarra( n, d, e, e2, spltol, spdiam,nsplit, isplit, iinfo ) + ! can force use of bisection instead of faster dqds. + ! option left in the code for future multisection work. + forceb = .false. + ! initialize usedqd, dqds should be used for allrng unless someone + ! explicitly wants bisection. + usedqd = (( irange==allrng ) .and. (.not.forceb)) + if( (irange==allrng) .and. (.not. forceb) ) then + ! set interval [vl,vu] that contains all eigenvalues + vl = gl + vu = gu + else + ! we call stdlib_qlarrd to find crude approximations to the eigenvalues + ! in the desired range. in case irange = indrng, we also obtain the + ! interval (vl,vu] that contains all the wanted eigenvalues. + ! an interval [left,right] has converged if + ! right-leftvl ).and.( d( & + ibegin )<=vu ) ).or. ( (irange==indrng).and.(iblock(wbegin)==jblk))) then + m = m + 1 + w( m ) = d( ibegin ) + werr(m) = zero + ! the gap for a single block doesn't matter for the later + ! algorithm and is assigned an arbitrary large value + wgap(m) = zero + iblock( m ) = jblk + indexw( m ) = 1 + wbegin = wbegin + 1 + endif + ! e( iend ) holds the shift for the initial rrr + e( iend ) = zero + ibegin = iend + 1 + cycle loop_170 + end if + ! blocks of size larger than 1x1 + ! e( iend ) will hold the shift for the initial rrr, for now set it =0 + e( iend ) = zero + ! find local outer bounds gl,gu for the block + gl = d(ibegin) + gu = d(ibegin) + do i = ibegin , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + if(.not. ((irange==allrng).and.(.not.forceb)) ) then + ! count the number of eigenvalues in the current block. + mb = 0 + do i = wbegin,mm + if( iblock(i)==jblk ) then + mb = mb+1 + else + goto 21 + endif + end do + 21 continue + if( mb==0) then + ! no eigenvalue in the current block lies in the desired range + ! e( iend ) holds the shift for the initial rrr + e( iend ) = zero + ibegin = iend + 1 + cycle loop_170 + else + ! decide whether dqds or bisection is more efficient + usedqd = ( (mb > fac*in) .and. (.not.forceb) ) + wend = wbegin + mb - 1 + ! calculate gaps for the current block + ! in later stages, when representations for individual + ! eigenvalues are different, we use sigma = e( iend ). + sigma = zero + do i = wbegin, wend - 1 + wgap( i ) = max( zero,w(i+1)-werr(i+1) - (w(i)+werr(i)) ) + end do + wgap( wend ) = max( zero,vu - sigma - (w( wend )+werr( wend ))) + ! find local index of the first and last desired evalue. + indl = indexw(wbegin) + indu = indexw( wend ) + endif + endif + if(( (irange==allrng) .and. (.not. forceb) ).or.usedqd) then + ! case of dqds + ! find approximations to the extremal eigenvalues of the block + call stdlib_qlarrk( in, 1, gl, gu, d(ibegin),e2(ibegin), pivmin, rtl, tmp, tmp1, & + iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + isleft = max(gl, tmp - tmp1- hndrd * eps* abs(tmp - tmp1)) + call stdlib_qlarrk( in, in, gl, gu, d(ibegin),e2(ibegin), pivmin, rtl, tmp, tmp1,& + iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + isrght = min(gu, tmp + tmp1+ hndrd * eps * abs(tmp + tmp1)) + ! improve the estimate of the spectral diameter + spdiam = isrght - isleft + else + ! case of bisection + ! find approximations to the wanted extremal eigenvalues + isleft = max(gl, w(wbegin) - werr(wbegin)- hndrd * eps*abs(w(wbegin)- werr(& + wbegin) )) + isrght = min(gu,w(wend) + werr(wend)+ hndrd * eps * abs(w(wend)+ werr(wend))) + + endif + ! decide whether the base representation for the current block + ! l_jblk d_jblk l_jblk^t = t_jblk - sigma_jblk i + ! should be on the left or the right end of the current block. + ! the strategy is to shift to the end which is "more populated" + ! furthermore, decide whether to use dqds for the computation of + ! dqds is chosen if all eigenvalues are desired or the number of + ! eigenvalues to be computed is large compared to the blocksize. + if( ( irange==allrng ) .and. (.not.forceb) ) then + ! if all the eigenvalues have to be computed, we use dqd + usedqd = .true. + ! indl is the local index of the first eigenvalue to compute + indl = 1 + indu = in + ! mb = number of eigenvalues to compute + mb = in + wend = wbegin + mb - 1 + ! define 1/4 and 3/4 points of the spectrum + s1 = isleft + fourth * spdiam + s2 = isrght - fourth * spdiam + else + ! stdlib_qlarrd has computed iblock and indexw for each eigenvalue + ! approximation. + ! choose sigma + if( usedqd ) then + s1 = isleft + fourth * spdiam + s2 = isrght - fourth * spdiam + else + tmp = min(isrght,vu) - max(isleft,vl) + s1 = max(isleft,vl) + fourth * tmp + s2 = min(isrght,vu) - fourth * tmp + endif + endif + ! compute the negcount at the 1/4 and 3/4 points + if(mb>1) then + call stdlib_qlarrc( 'T', in, s1, s2, d(ibegin),e(ibegin), pivmin, cnt, cnt1, & + cnt2, iinfo) + endif + if(mb==1) then + sigma = gl + sgndef = one + elseif( cnt1 - indl >= indu - cnt2 ) then + if( ( irange==allrng ) .and. (.not.forceb) ) then + sigma = max(isleft,gl) + elseif( usedqd ) then + ! use gerschgorin bound as shift to get pos def matrix + ! for dqds + sigma = isleft + else + ! use approximation of the first desired eigenvalue of the + ! block as shift + sigma = max(isleft,vl) + endif + sgndef = one + else + if( ( irange==allrng ) .and. (.not.forceb) ) then + sigma = min(isrght,gu) + elseif( usedqd ) then + ! use gerschgorin bound as shift to get neg def matrix + ! for dqds + sigma = isrght + else + ! use approximation of the first desired eigenvalue of the + ! block as shift + sigma = min(isrght,vu) + endif + sgndef = -one + endif + ! an initial sigma has been chosen that will be used for computing + ! t - sigma i = l d l^t + ! define the increment tau of the shift in case the initial shift + ! needs to be refined to obtain a factorization with not too much + ! element growth. + if( usedqd ) then + ! the initial sigma was to the outer end of the spectrum + ! the matrix is definite and we need not retreat. + tau = spdiam*eps*n + two*pivmin + tau = max( tau,two*eps*abs(sigma) ) + else + if(mb>1) then + clwdth = w(wend) + werr(wend) - w(wbegin) - werr(wbegin) + avgap = abs(clwdth / real(wend-wbegin,KIND=qp)) + if( sgndef==one ) then + tau = half*max(wgap(wbegin),avgap) + tau = max(tau,werr(wbegin)) + else + tau = half*max(wgap(wend-1),avgap) + tau = max(tau,werr(wend)) + endif + else + tau = werr(wbegin) + endif + endif + loop_80: do idum = 1, maxtry + ! compute l d l^t factorization of tridiagonal matrix t - sigma i. + ! store d in work(1:in), l in work(in+1:2*in), and reciprocals of + ! pivots in work(2*in+1:3*in) + dpivot = d( ibegin ) - sigma + work( 1 ) = dpivot + dmax = abs( work(1) ) + j = ibegin + do i = 1, in - 1 + work( 2*in+i ) = one / work( i ) + tmp = e( j )*work( 2*in+i ) + work( in+i ) = tmp + dpivot = ( d( j+1 )-sigma ) - tmp*e( j ) + work( i+1 ) = dpivot + dmax = max( dmax, abs(dpivot) ) + j = j + 1 + end do + ! check for element growth + if( dmax > maxgrowth*spdiam ) then + norep = .true. + else + norep = .false. + endif + if( usedqd .and. .not.norep ) then + ! ensure the definiteness of the representation + ! all entries of d (of l d l^t) must have the same sign + do i = 1, in + tmp = sgndef*work( i ) + if( tmp1 ) then + ! perturb each entry of the base representation by a small + ! (but random) relative amount to overcome difficulties with + ! glued matrices. + do i = 1, 4 + iseed( i ) = 1 + end do + call stdlib_qlarnv(2, iseed, 2*in-1, work(1)) + do i = 1,in-1 + d(ibegin+i-1) = d(ibegin+i-1)*(one+eps*pert*work(i)) + e(ibegin+i-1) = e(ibegin+i-1)*(one+eps*pert*work(in+i)) + end do + d(iend) = d(iend)*(one+eps*four*work(in)) + endif + ! don't update the gerschgorin intervals because keeping track + ! of the updates would be too much work in stdlib_qlarrv. + ! we update w instead and use it to locate the proper gerschgorin + ! intervals. + ! compute the required eigenvalues of l d l' by bisection or dqds + if ( .not.usedqd ) then + ! if stdlib_qlarrd has been used, shift the eigenvalue approximations + ! according to their representation. this is necessary for + ! a uniform stdlib_qlarrv since dqds computes eigenvalues of the + ! shifted representation. in stdlib_qlarrv, w will always hold the + ! unshifted eigenvalue approximation. + do j=wbegin,wend + w(j) = w(j) - sigma + werr(j) = werr(j) + abs(w(j)) * eps + end do + ! call stdlib_qlarrb to reduce eigenvalue error of the approximations + ! from stdlib_qlarrd + do i = ibegin, iend-1 + work( i ) = d( i ) * e( i )**2 + end do + ! use bisection to find ev from indl to indu + call stdlib_qlarrb(in, d(ibegin), work(ibegin),indl, indu, rtol1, rtol2, indl-1,& + w(wbegin), wgap(wbegin), werr(wbegin),work( 2*n+1 ), iwork, pivmin, spdiam,in, & + iinfo ) + if( iinfo /= 0 ) then + info = -4 + return + end if + ! stdlib_qlarrb computes all gaps correctly except for the last one + ! record distance to vu/gu + wgap( wend ) = max( zero,( vu-sigma ) - ( w( wend ) + werr( wend ) ) ) + do i = indl, indu + m = m + 1 + iblock(m) = jblk + indexw(m) = i + end do + else + ! call dqds to get all eigs (and then possibly delete unwanted + ! eigenvalues). + ! note that dqds finds the eigenvalues of the l d l^t representation + ! of t to high relative accuracy. high relative accuracy + ! might be lost when the shift of the rrr is subtracted to obtain + ! the eigenvalues of t. however, t is not guaranteed to define its + ! eigenvalues to high relative accuracy anyway. + ! set rtol to the order of the tolerance used in stdlib_qlasq2 + ! this is an estimated error, the worst case bound is 4*n*eps + ! which is usually too large and requires unnecessary work to be + ! done by bisection when computing the eigenvectors + rtol = log(real(in,KIND=qp)) * four * eps + j = ibegin + do i = 1, in - 1 + work( 2*i-1 ) = abs( d( j ) ) + work( 2*i ) = e( j )*e( j )*work( 2*i-1 ) + j = j + 1 + end do + work( 2*in-1 ) = abs( d( iend ) ) + work( 2*in ) = zero + call stdlib_qlasq2( in, work, iinfo ) + if( iinfo /= 0 ) then + ! if iinfo = -5 then an index is part of a tight cluster + ! and should be changed. the index is in iwork(1) and the + ! gap is in work(n+1) + info = -5 + return + else + ! test that all eigenvalues are positive as expected + do i = 1, in + if( work( i )zero ) then + do i = indl, indu + m = m + 1 + w( m ) = work( in-i+1 ) + iblock( m ) = jblk + indexw( m ) = i + end do + else + do i = indl, indu + m = m + 1 + w( m ) = -work( i ) + iblock( m ) = jblk + indexw( m ) = i + end do + end if + do i = m - mb + 1, m + ! the value of rtol below should be the tolerance in stdlib_qlasq2 + werr( i ) = rtol * abs( w(i) ) + end do + do i = m - mb + 1, m - 1 + ! compute the right gap between the intervals + wgap( i ) = max( zero,w(i+1)-werr(i+1) - (w(i)+werr(i)) ) + end do + wgap( m ) = max( zero,( vu-sigma ) - ( w( m ) + werr( m ) ) ) + end if + ! proceed with next block + ibegin = iend + 1 + wbegin = wend + 1 + end do loop_170 + return + end subroutine stdlib_qlarre + + !> Given the initial representation L D L^T and its cluster of close + !> eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... + !> W( CLEND ), DLARRF: finds a new relatively robust representation + !> L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the + !> eigenvalues of L(+) D(+) L(+)^T is relatively isolated. + + pure subroutine stdlib_qlarrf( n, d, l, ld, clstrt, clend,w, wgap, werr,spdiam, clgapl, & + clgapr, pivmin, sigma,dplus, lplus, work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: clstrt, clend, n + integer(ilp), intent(out) :: info + real(qp), intent(in) :: clgapl, clgapr, pivmin, spdiam + real(qp), intent(out) :: sigma + ! Array Arguments + real(qp), intent(in) :: d(*), l(*), ld(*), w(*), werr(*) + real(qp), intent(out) :: dplus(*), lplus(*), work(*) + real(qp), intent(inout) :: wgap(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: quart = 0.25_qp + real(qp), parameter :: maxgrowth1 = 8._qp + real(qp), parameter :: maxgrowth2 = 8._qp + integer(ilp), parameter :: ktrymax = 1 + integer(ilp), parameter :: sleft = 1 + integer(ilp), parameter :: sright = 2 + + ! Local Scalars + logical(lk) :: dorrr1, forcer, nofail, sawnan1, sawnan2, tryrrr1 + integer(ilp) :: i, indx, ktry, shift + real(qp) :: avgap, bestshift, clwdth, eps, fact, fail, fail2, growthbound, ldelta, & + ldmax, lsigma, max1, max2, mingap, oldp, prod, rdelta, rdmax, rrr1, rrr2, rsigma, s, & + smlgrowth, tmp, znm2 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + fact = real(2**ktrymax,KIND=qp) + eps = stdlib_qlamch( 'PRECISION' ) + shift = 0 + forcer = .false. + ! note that we cannot guarantee that for any of the shifts tried, + ! the factorization has a small or even moderate element growth. + ! there could be ritz values at both ends of the cluster and despite + ! backing off, there are examples where all factorizations tried + ! (in ieee mode, allowing zero pivots + ! element growth. + ! for this reason, we should use pivmin in this subroutine so that at + ! least the l d l^t factorization exists. it can be checked afterwards + ! whether the element growth caused bad residuals/orthogonality. + ! decide whether the code should accept the best among all + ! representations despite large element growth or signal info=1 + ! setting nofail to .false. for quick fix for bug 113 + nofail = .false. + ! compute the average gap length of the cluster + clwdth = abs(w(clend)-w(clstrt)) + werr(clend) + werr(clstrt) + avgap = clwdth / real(clend-clstrt,KIND=qp) + mingap = min(clgapl, clgapr) + ! initial values for shifts to both ends of cluster + lsigma = min(w( clstrt ),w( clend )) - werr( clstrt ) + rsigma = max(w( clstrt ),w( clend )) + werr( clend ) + ! use a small fudge to make sure that we really shift to the outside + lsigma = lsigma - abs(lsigma)* four * eps + rsigma = rsigma + abs(rsigma)* four * eps + ! compute upper bounds for how much to back off the initial shifts + ldmax = quart * mingap + two * pivmin + rdmax = quart * mingap + two * pivmin + ldelta = max(avgap,wgap( clstrt ))/fact + rdelta = max(avgap,wgap( clend-1 ))/fact + ! initialize the record of the best representation found + s = stdlib_qlamch( 'S' ) + smlgrowth = one / s + fail = real(n-1,KIND=qp)*mingap/(spdiam*eps) + fail2 = real(n-1,KIND=qp)*mingap/(spdiam*sqrt(eps)) + bestshift = lsigma + ! while (ktry <= ktrymax) + ktry = 0 + growthbound = maxgrowth1*spdiam + 5 continue + sawnan1 = .false. + sawnan2 = .false. + ! ensure that we do not back off too much of the initial shifts + ldelta = min(ldmax,ldelta) + rdelta = min(rdmax,rdelta) + ! compute the element growth when shifting to both ends of the cluster + ! accept the shift if there is no element growth at one of the two ends + ! left end + s = -lsigma + dplus( 1 ) = d( 1 ) + s + if(abs(dplus(1)) Given the initial eigenvalue approximations of T, DLARRJ: + !> does bisection to refine the eigenvalues of T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses in WERR. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + + pure subroutine stdlib_qlarrj( n, d, e2, ifirst, ilast,rtol, offset, w, werr, work, iwork,& + pivmin, spdiam, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ifirst, ilast, n, offset + integer(ilp), intent(out) :: info + real(qp), intent(in) :: pivmin, rtol, spdiam + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: d(*), e2(*) + real(qp), intent(inout) :: w(*), werr(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + integer(ilp) :: maxitr + ! Local Scalars + integer(ilp) :: cnt, i, i1, i2, ii, iter, j, k, next, nint, olnint, p, prev, & + savi1 + real(qp) :: dplus, fac, left, mid, right, s, tmp, width + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + maxitr = int( ( log( spdiam+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ]. + ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while + ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 ) + ! for an unconverged interval is set to the index of the next unconverged + ! interval, and is -1 or 0 for a converged interval. thus a linked + ! list of unconverged intervals is set up. + i1 = ifirst + i2 = ilast + ! the number of unconverged intervals + nint = 0 + ! the last unconverged interval found + prev = 0 + loop_75: do i = i1, i2 + k = 2*i + ii = i - offset + left = w( ii ) - werr( ii ) + mid = w(ii) + right = w( ii ) + werr( ii ) + width = right - mid + tmp = max( abs( left ), abs( right ) ) + ! the following test prevents the test of converged intervals + if( width=i1).and.(i<=i2)) iwork( 2*prev-1 ) = i + 1 + else + ! unconverged interval found + prev = i + ! make sure that [left,right] contains the desired eigenvalue + ! do while( cnt(left)>i-1 ) + fac = one + 20 continue + cnt = 0 + s = left + dplus = d( 1 ) - s + if( dplusi-1 ) then + left = left - werr( ii )*fac + fac = two*fac + go to 20 + end if + ! do while( cnt(right)0 ), i.e. there are still unconverged intervals + ! and while (iter=i1) iwork( 2*prev-1 ) = next + end if + i = next + cycle loop_100 + end if + prev = i + ! perform one bisection step + cnt = 0 + s = mid + dplus = d( 1 ) - s + if( dplus0 ).and.(iter<=maxitr) ) go to 80 + ! at this point, all the intervals have converged + do i = savi1, ilast + k = 2*i + ii = i - offset + ! all intervals marked by '0' have been refined. + if( iwork( k-1 )==0 ) then + w( ii ) = half*( work( k-1 )+work( k ) ) + werr( ii ) = work( k ) - w( ii ) + end if + end do + return + end subroutine stdlib_qlarrj + + !> DLARRK: computes one eigenvalue of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from DSTEMR. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_qlarrk( n, iw, gl, gu,d, e2, pivmin, reltol, w, werr, info) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: iw, n + real(qp), intent(in) :: pivmin, reltol, gl, gu + real(qp), intent(out) :: w, werr + ! Array Arguments + real(qp), intent(in) :: d(*), e2(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: fudge = two + + ! Local Scalars + integer(ilp) :: i, it, itmax, negcnt + real(qp) :: atoli, eps, left, mid, right, rtoli, tmp1, tmp2, tnorm + ! Intrinsic Functions + intrinsic :: abs,int,log,max + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + info = 0 + return + end if + ! get machine constants + eps = stdlib_qlamch( 'P' ) + tnorm = max( abs( gl ), abs( gu ) ) + rtoli = reltol + atoli = fudge*two*pivmin + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + info = -1 + left = gl - fudge*tnorm*eps*n - fudge*two*pivmin + right = gu + fudge*tnorm*eps*n + fudge*two*pivmin + it = 0 + 10 continue + ! check if interval converged or maximum number of iterations reached + tmp1 = abs( right - left ) + tmp2 = max( abs(right), abs(left) ) + if( tmp1itmax)goto 30 + ! count number of negative pivots for mid-point + it = it + 1 + mid = half * (left + right) + negcnt = 0 + tmp1 = d( 1 ) - mid + if( abs( tmp1 )=iw) then + right = mid + else + left = mid + endif + goto 10 + 30 continue + ! converged or maximum number of iterations reached + w = half * (left + right) + werr = half * abs( right - left ) + return + end subroutine stdlib_qlarrk + + !> Perform tests to decide whether the symmetric tridiagonal matrix T + !> warrants expensive computations which guarantee high relative accuracy + !> in the eigenvalues. + + pure subroutine stdlib_qlarrr( n, d, e, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(in) :: d(*) + real(qp), intent(inout) :: e(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: relcond = 0.999_qp + + ! Local Scalars + integer(ilp) :: i + logical(lk) :: yesrel + real(qp) :: eps, safmin, smlnum, rmin, tmp, tmp2, offdig, offdig2 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + info = 0 + return + end if + ! as a default, do not go for relative-accuracy preserving computations. + info = 1 + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + rmin = sqrt( smlnum ) + ! tests for relative accuracy + ! test for scaled diagonal dominance + ! scale the diagonal entries to one and check whether the sum of the + ! off-diagonals is less than one + ! the sdd relative error bounds have a 1/(1- 2*x) factor in them, + ! x = max(offdig + offdig2), so when x is close to 1/2, no relative + ! accuracy is promised. in the notation of the code fragment below, + ! 1/(1 - (offdig + offdig2)) is the condition number. + ! we don't think it is worth going into "sdd mode" unless the relative + ! condition number is reasonable, not 1/macheps. + ! the threshold should be compatible with other thresholds used in the + ! code. we set offdig + offdig2 <= .999_qp =: relcond, it corresponds + ! to losing at most 3 decimal digits: 1 / (1 - (offdig + offdig2)) <= 1000 + ! instead of the current offdig + offdig2 < 1 + yesrel = .true. + offdig = zero + tmp = sqrt(abs(d(1))) + if (tmp=relcond) yesrel = .false. + if(.not.yesrel) goto 11 + tmp = tmp2 + offdig = offdig2 + end do + 11 continue + if( yesrel ) then + info = 0 + return + else + endif + ! *** more to be implemented *** + ! test if the lower bidiagonal matrix l from t = l d l^t + ! (zero shift facto) is well conditioned + ! test if the upper bidiagonal matrix u from t = u d u^t + ! (zero shift facto) is well conditioned. + ! in this case, the matrix needs to be flipped and, at the end + ! of the eigenvector computation, the flip needs to be applied + ! to the computed eigenvectors (and the support) + return + end subroutine stdlib_qlarrr + + !> DLARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by DLARRE. + + pure subroutine stdlib_qlarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dol, dou, ldz, m, n + integer(ilp), intent(out) :: info + real(qp), intent(in) :: minrgp, pivmin, vl, vu + real(qp), intent(inout) :: rtol1, rtol2 + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), indexw(*), isplit(*) + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), l(*), w(*), werr(*), wgap(*) + real(qp), intent(in) :: gers(*) + real(qp), intent(out) :: work(*) + real(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 10 + + + ! Local Scalars + logical(lk) :: eskip, needbs, stp2ii, tryrqc, usedbs, usedrq + integer(ilp) :: done, i, ibegin, idone, iend, ii, iindc1, iindc2, iindr, iindwk, iinfo,& + im, in, indeig, indld, indlld, indwrk, isupmn, isupmx, iter, itmp1, j, jblk, k, & + miniwsize, minwsize, nclus, ndepth, negcnt, newcls, newfst, newftt, newlst, newsiz, & + offset, oldcls, oldfst, oldien, oldlst, oldncl, p, parity, q, wbegin, wend, windex, & + windmn, windpl, zfrom, zto, zusedl, zusedu, zusedw + real(qp) :: bstres, bstw, eps, fudge, gap, gaptol, gl, gu, lambda, left, lgap, mingma, & + nrminv, resid, rgap, right, rqcorr, rqtol, savgap, sgndef, sigma, spdiam, ssigma, tau, & + tmp, tol, ztz + ! Intrinsic Functions + intrinsic :: abs,real,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( (n<=0).or.(m<=0) ) then + return + end if + ! the first n entries of work are reserved for the eigenvalues + indld = n+1 + indlld= 2*n+1 + indwrk= 3*n+1 + minwsize = 12 * n + do i= 1,minwsize + work( i ) = zero + end do + ! iwork(iindr+1:iindr+n) hold the twist indices r for the + ! factorization used to compute the fp vector + iindr = 0 + ! iwork(iindc1+1:iinc2+n) are used to store the clusters of the current + ! layer and the one above. + iindc1 = n + iindc2 = 2*n + iindwk = 3*n + 1 + miniwsize = 7 * n + do i= 1,miniwsize + iwork( i ) = 0 + end do + zusedl = 1 + if(dol>1) then + ! set lower bound for use of z + zusedl = dol-1 + endif + zusedu = m + if(doudou) ) then + ibegin = iend + 1 + wbegin = wend + 1 + cycle loop_170 + end if + ! find local spectral diameter of the block + gl = gers( 2*ibegin-1 ) + gu = gers( 2*ibegin ) + do i = ibegin+1 , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + ! oldien is the last index of the previous block + oldien = ibegin - 1 + ! calculate the size of the current block + in = iend - ibegin + 1 + ! the number of eigenvalues in the current block + im = wend - wbegin + 1 + ! this is for a 1x1 block + if( ibegin==iend ) then + done = done+1 + z( ibegin, wbegin ) = one + isuppz( 2*wbegin-1 ) = ibegin + isuppz( 2*wbegin ) = ibegin + w( wbegin ) = w( wbegin ) + sigma + work( wbegin ) = w( wbegin ) + ibegin = iend + 1 + wbegin = wbegin + 1 + cycle loop_170 + end if + ! the desired (shifted) eigenvalues are stored in w(wbegin:wend) + ! note that these can be approximations, in this case, the corresp. + ! entries of werr give the size of the uncertainty interval. + ! the eigenvalue approximations will be refined when necessary as + ! high relative accuracy is required for the computation of the + ! corresponding eigenvectors. + call stdlib_qcopy( im, w( wbegin ), 1,work( wbegin ), 1 ) + ! we store in w the eigenvalue approximations w.r.t. the original + ! matrix t. + do i=1,im + w(wbegin+i-1) = w(wbegin+i-1)+sigma + end do + ! ndepth is the current depth of the representation tree + ndepth = 0 + ! parity is either 1 or 0 + parity = 1 + ! nclus is the number of clusters for the next level of the + ! representation tree, we start with nclus = 1 for the root + nclus = 1 + iwork( iindc1+1 ) = 1 + iwork( iindc1+2 ) = im + ! idone is the number of eigenvectors already computed in the current + ! block + idone = 0 + ! loop while( idonem ) then + info = -2 + return + endif + ! breadth first processing of the current level of the representation + ! tree: oldncl = number of clusters on current level + oldncl = nclus + ! reset nclus to count the number of child clusters + nclus = 0 + parity = 1 - parity + if( parity==0 ) then + oldcls = iindc1 + newcls = iindc2 + else + oldcls = iindc2 + newcls = iindc1 + end if + ! process the clusters on the current level + loop_150: do i = 1, oldncl + j = oldcls + 2*i + ! oldfst, oldlst = first, last index of current cluster. + ! cluster indices start with 1 and are relative + ! to wbegin when accessing w, wgap, werr, z + oldfst = iwork( j-1 ) + oldlst = iwork( j ) + if( ndepth>0 ) then + ! retrieve relatively robust representation (rrr) of cluster + ! that has been computed at the previous level + ! the rrr is stored in z and overwritten once the eigenvectors + ! have been computed or when the cluster is refined + if((dol==1).and.(dou==m)) then + ! get representation from location of the leftmost evalue + ! of the cluster + j = wbegin + oldfst - 1 + else + if(wbegin+oldfst-1dou) then + ! get representation from the right end of z array + j = dou + else + j = wbegin + oldfst - 1 + endif + endif + call stdlib_qcopy( in, z( ibegin, j ), 1, d( ibegin ), 1 ) + call stdlib_qcopy( in-1, z( ibegin, j+1 ), 1, l( ibegin ),1 ) + sigma = z( iend, j+1 ) + ! set the corresponding entries in z to zero + call stdlib_qlaset( 'FULL', in, 2, zero, zero,z( ibegin, j), ldz ) + end if + ! compute dl and dll of current rrr + do j = ibegin, iend-1 + tmp = d( j )*l( j ) + work( indld-1+j ) = tmp + work( indlld-1+j ) = tmp*l( j ) + end do + if( ndepth>0 ) then + ! p and q are index of the first and last eigenvalue to compute + ! within the current block + p = indexw( wbegin-1+oldfst ) + q = indexw( wbegin-1+oldlst ) + ! offset for the arrays work, wgap and werr, i.e., the p-offset + ! through the q-offset elements of these arrays are to be used. + ! offset = p-oldfst + offset = indexw( wbegin ) - 1 + ! perform limited bisection (if necessary) to get approximate + ! eigenvalues to the precision needed. + call stdlib_qlarrb( in, d( ibegin ),work(indlld+ibegin-1),p, q, rtol1, & + rtol2, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ), iwork(& + iindwk ),pivmin, spdiam, in, iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + ! we also recompute the extremal gaps. w holds all eigenvalues + ! of the unshifted matrix and must be used for computation + ! of wgap, the entries of work might stem from rrrs with + ! different shifts. the gaps from wbegin-1+oldfst to + ! wbegin-1+oldlst are correctly computed in stdlib_qlarrb. + ! however, we only allow the gaps to become greater since + ! this is what should happen when we decrease werr + if( oldfst>1) then + wgap( wbegin+oldfst-2 ) =max(wgap(wbegin+oldfst-2),w(wbegin+oldfst-1)-& + werr(wbegin+oldfst-1)- w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) ) + + endif + if( wbegin + oldlst -1 < wend ) then + wgap( wbegin+oldlst-1 ) =max(wgap(wbegin+oldlst-1),w(wbegin+oldlst)-& + werr(wbegin+oldlst)- w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) ) + endif + ! each time the eigenvalues in work get refined, we store + ! the newly found approximation with all shifts applied in w + do j=oldfst,oldlst + w(wbegin+j-1) = work(wbegin+j-1)+sigma + end do + end if + ! process the current node. + newfst = oldfst + loop_140: do j = oldfst, oldlst + if( j==oldlst ) then + ! we are at the right end of the cluster, this is also the + ! boundary of the child cluster + newlst = j + else if ( wgap( wbegin + j -1)>=minrgp* abs( work(wbegin + j -1) ) ) & + then + ! the right relative gap is big enough, the child cluster + ! (newfst,..,newlst) is well separated from the following + newlst = j + else + ! inside a child cluster, the relative gap is not + ! big enough. + cycle loop_140 + end if + ! compute size of child cluster found + newsiz = newlst - newfst + 1 + ! newftt is the place in z where the new rrr or the computed + ! eigenvector is to be stored + if((dol==1).and.(dou==m)) then + ! store representation at location of the leftmost evalue + ! of the cluster + newftt = wbegin + newfst - 1 + else + if(wbegin+newfst-1dou) then + ! store representation at the right end of z array + newftt = dou + else + newftt = wbegin + newfst - 1 + endif + endif + if( newsiz>1) then + ! current child is not a singleton but a cluster. + ! compute and store new representation of child. + ! compute left and right cluster gap. + ! lgap and rgap are not computed from work because + ! the eigenvalue approximations may stem from rrrs + ! different shifts. however, w hold all eigenvalues + ! of the unshifted matrix. still, the entries in wgap + ! have to be computed from work since the entries + ! in w might be of the same order so that gaps are not + ! exhibited correctly for very close eigenvalues. + if( newfst==1 ) then + lgap = max( zero,w(wbegin)-werr(wbegin) - vl ) + else + lgap = wgap( wbegin+newfst-2 ) + endif + rgap = wgap( wbegin+newlst-1 ) + ! compute left- and rightmost eigenvalue of child + ! to high precision in order to shift as close + ! as possible and obtain as large relative gaps + ! as possible + do k =1,2 + if(k==1) then + p = indexw( wbegin-1+newfst ) + else + p = indexw( wbegin-1+newlst ) + endif + offset = indexw( wbegin ) - 1 + call stdlib_qlarrb( in, d(ibegin),work( indlld+ibegin-1 ),p,p,rqtol, & + rqtol, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ),& + iwork( iindwk ), pivmin, spdiam,in, iinfo ) + end do + if((wbegin+newlst-1dou)) then + ! if the cluster contains no desired eigenvalues + ! skip the computation of that branch of the rep. tree + ! we could skip before the refinement of the extremal + ! eigenvalues of the child, but then the representation + ! tree could be different from the one when nothing is + ! skipped. for this reason we skip at this place. + idone = idone + newlst - newfst + 1 + goto 139 + endif + ! compute rrr of child cluster. + ! note that the new rrr is stored in z + ! stdlib_qlarrf needs lwork = 2*n + call stdlib_qlarrf( in, d( ibegin ), l( ibegin ),work(indld+ibegin-1),& + newfst, newlst, work(wbegin),wgap(wbegin), werr(wbegin),spdiam, lgap, & + rgap, pivmin, tau,z(ibegin, newftt),z(ibegin, newftt+1),work( indwrk ), & + iinfo ) + if( iinfo==0 ) then + ! a new rrr for the cluster was found by stdlib_qlarrf + ! update shift and store it + ssigma = sigma + tau + z( iend, newftt+1 ) = ssigma + ! work() are the midpoints and werr() the semi-width + ! note that the entries in w are unchanged. + do k = newfst, newlst + fudge =three*eps*abs(work(wbegin+k-1)) + work( wbegin + k - 1 ) =work( wbegin + k - 1) - tau + fudge = fudge +four*eps*abs(work(wbegin+k-1)) + ! fudge errors + werr( wbegin + k - 1 ) =werr( wbegin + k - 1 ) + fudge + ! gaps are not fudged. provided that werr is small + ! when eigenvalues are close, a zero gap indicates + ! that a new representation is needed for resolving + ! the cluster. a fudge could lead to a wrong decision + ! of judging eigenvalues 'separated' which in + ! reality are not. this could have a negative impact + ! on the orthogonality of the computed eigenvectors. + end do + nclus = nclus + 1 + k = newcls + 2*nclus + iwork( k-1 ) = newfst + iwork( k ) = newlst + else + info = -2 + return + endif + else + ! compute eigenvector of singleton + iter = 0 + tol = four * log(real(in,KIND=qp)) * eps + k = newfst + windex = wbegin + k - 1 + windmn = max(windex - 1,1) + windpl = min(windex + 1,m) + lambda = work( windex ) + done = done + 1 + ! check if eigenvector computation is to be skipped + if((windexdou)) then + eskip = .true. + goto 125 + else + eskip = .false. + endif + left = work( windex ) - werr( windex ) + right = work( windex ) + werr( windex ) + indeig = indexw( windex ) + ! note that since we compute the eigenpairs for a child, + ! all eigenvalue approximations are w.r.t the same shift. + ! in this case, the entries in work should be used for + ! computing the gaps since they exhibit even very small + ! differences in the eigenvalues, as opposed to the + ! entries in w which might "look" the same. + if( k == 1) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vl, the formula + ! lgap = max( zero, (sigma - vl) + lambda ) + ! can lead to an overestimation of the left gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small left gap. + lgap = eps*max(abs(left),abs(right)) + else + lgap = wgap(windmn) + endif + if( k == im) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vu, the formula + ! can lead to an overestimation of the right gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small right gap. + rgap = eps*max(abs(left),abs(right)) + else + rgap = wgap(windex) + endif + gap = min( lgap, rgap ) + if(( k == 1).or.(k == im)) then + ! the eigenvector support can become wrong + ! because significant entries could be cut off due to a + ! large gaptol parameter in lar1v. prevent this. + gaptol = zero + else + gaptol = gap * eps + endif + isupmn = in + isupmx = 1 + ! update wgap so that it holds the minimum gap + ! to the left or the right. this is crucial in the + ! case where bisection is used to ensure that the + ! eigenvalue is refined up to the required precision. + ! the correct value is restored afterwards. + savgap = wgap(windex) + wgap(windex) = gap + ! we want to use the rayleigh quotient correction + ! as often as possible since it converges quadratically + ! when we are close enough to the desired eigenvalue. + ! however, the rayleigh quotient can have the wrong sign + ! and lead us away from the desired eigenvalue. in this + ! case, the best we can do is to use bisection. + usedbs = .false. + usedrq = .false. + ! bisection is initially turned off unless it is forced + needbs = .not.tryrqc + 120 continue + ! check if bisection should be used to refine eigenvalue + if(needbs) then + ! take the bisection as new iterate + usedbs = .true. + itmp1 = iwork( iindr+windex ) + offset = indexw( wbegin ) - 1 + call stdlib_qlarrb( in, d(ibegin),work(indlld+ibegin-1),indeig,& + indeig,zero, two*eps, offset,work(wbegin),wgap(wbegin),werr(wbegin),& + work( indwrk ),iwork( iindwk ), pivmin, spdiam,itmp1, iinfo ) + if( iinfo/=0 ) then + info = -3 + return + endif + lambda = work( windex ) + ! reset twist index from inaccurate lambda to + ! force computation of true mingma + iwork( iindr+windex ) = 0 + endif + ! given lambda, compute the eigenvector. + call stdlib_qlar1v( in, 1, in, lambda, d( ibegin ),l( ibegin ), work(& + indld+ibegin-1),work(indlld+ibegin-1),pivmin, gaptol, z( ibegin, windex & + ),.not.usedbs, negcnt, ztz, mingma,iwork( iindr+windex ), isuppz( & + 2*windex-1 ),nrminv, resid, rqcorr, work( indwrk ) ) + if(iter == 0) then + bstres = resid + bstw = lambda + elseif(residtol*gap .and. abs( rqcorr )>rqtol*abs( lambda ) .and. .not. & + usedbs)then + ! we need to check that the rqcorr update doesn't + ! move the eigenvalue away from the desired one and + ! towards a neighbor. -> protection with bisection + if(indeig<=negcnt) then + ! the wanted eigenvalue lies to the left + sgndef = -one + else + ! the wanted eigenvalue lies to the right + sgndef = one + endif + ! we only use the rqcorr if it improves the + ! the iterate reasonably. + if( ( rqcorr*sgndef>=zero ).and.( lambda + rqcorr<= right).and.( & + lambda + rqcorr>= left)) then + usedrq = .true. + ! store new midpoint of bisection interval in work + if(sgndef==one) then + ! the current lambda is on the left of the true + ! eigenvalue + left = lambda + ! we prefer to assume that the error estimate + ! is correct. we could make the interval not + ! as a bracket but to be modified if the rqcorr + ! chooses to. in this case, the right side should + ! be modified as follows: + ! right = max(right, lambda + rqcorr) + else + ! the current lambda is on the right of the true + ! eigenvalue + right = lambda + ! see comment about assuming the error estimate is + ! correct above. + ! left = min(left, lambda + rqcorr) + endif + work( windex ) =half * (right + left) + ! take rqcorr since it has the correct sign and + ! improves the iterate reasonably + lambda = lambda + rqcorr + ! update width of error interval + werr( windex ) =half * (right-left) + else + needbs = .true. + endif + if(right-leftzto) then + do ii = zto+1,isupmx + z( ii, windex ) = zero + end do + endif + call stdlib_qscal( zto-zfrom+1, nrminv,z( zfrom, windex ), 1 ) + 125 continue + ! update w + w( windex ) = lambda+sigma + ! recompute the gaps on the left and right + ! but only allow them to become larger and not + ! smaller (which can only happen through "bad" + ! cancellation and doesn't reflect the theory + ! where the initial gaps are underestimated due + ! to werr being too crude.) + if(.not.eskip) then + if( k>1) then + wgap( windmn ) = max( wgap(windmn),w(windex)-werr(windex)- w(& + windmn)-werr(windmn) ) + endif + if( windex ! + !> + !> DLARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -S C ] [ G ] [ 0 ] + !> where C**2 + S**2 = 1. + !> The mathematical formulas used for C and S are + !> R = sign(F) * sqrt(F**2 + G**2) + !> C = F / R + !> S = G / R + !> Hence C >= 0. The algorithm used to compute these quantities + !> incorporates scaling to avoid overflow or underflow in computing the + !> square root of the sum of squares. + !> This version is discontinuous in R at F = 0 but it returns the same + !> C and S as ZLARTG for complex inputs (F,0) and (G,0). + !> This is a more accurate version of the BLAS1 routine DROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0 and (G .ne. 0), then C=0 and S=sign(1,G) without doing any + !> floating point operations (saves work in DBDSQR when + !> there are zeros on the diagonal). + !> If F exceeds G in magnitude, C will be positive. + !> Below, wp=>dp stands for quad precision from LA_CONSTANTS module. + + pure subroutine stdlib_qlartg( f, g, c, s, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! february 2021 + ! Scalar Arguments + real(qp), intent(out) :: c, r, s + real(qp), intent(in) :: f, g + ! Local Scalars + real(qp) :: d, f1, fs, g1, gs, p, u, uu + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + f1 = abs( f ) + g1 = abs( g ) + if( g == zero ) then + c = one + s = zero + r = f + else if( f == zero ) then + c = zero + s = sign( one, g ) + r = g1 + else if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) & + then + d = sqrt( f*f + g*g ) + p = one / d + c = f1*p + s = g*sign( p, f ) + r = sign( d, f ) + else + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + fs = f*uu + gs = g*uu + d = sqrt( fs*fs + gs*gs ) + p = one / d + c = abs( fs )*p + s = gs*sign( p, f ) + r = sign( d, f )*u + end if + return + end subroutine stdlib_qlartg + + !> DLARTGP: generates a plane rotation so that + !> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + !> [ -SN CS ] [ G ] [ 0 ] + !> This is a slower, more accurate version of the Level 1 BLAS routine DROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then CS=(+/-)1 and SN=0. + !> If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1. + !> The sign is chosen so that R >= 0. + + pure subroutine stdlib_qlartgp( f, g, cs, sn, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(out) :: cs, r, sn + real(qp), intent(in) :: f, g + ! ===================================================================== + + + + ! Local Scalars + ! logical first + integer(ilp) :: count, i + real(qp) :: eps, f1, g1, safmin, safmn2, safmx2, scale + ! Intrinsic Functions + intrinsic :: abs,int,log,max,sign,sqrt + ! Save Statement + ! save first, safmx2, safmin, safmn2 + ! Data Statements + ! data first / .true. / + ! Executable Statements + ! if( first ) then + safmin = stdlib_qlamch( 'S' ) + eps = stdlib_qlamch( 'E' ) + safmn2 = stdlib_qlamch( 'B' )**int( log( safmin / eps ) /log( stdlib_qlamch( 'B' ) )& + / two,KIND=ilp) + safmx2 = one / safmn2 + ! first = .false. + ! end if + if( g==zero ) then + cs = sign( one, f ) + sn = zero + r = abs( f ) + else if( f==zero ) then + cs = zero + sn = sign( one, g ) + r = abs( g ) + else + f1 = f + g1 = g + scale = max( abs( f1 ), abs( g1 ) ) + if( scale>=safmx2 ) then + count = 0 + 10 continue + count = count + 1 + f1 = f1*safmn2 + g1 = g1*safmn2 + scale = max( abs( f1 ), abs( g1 ) ) + if( scale>=safmx2 .and. count < 20 )go to 10 + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + do i = 1, count + r = r*safmx2 + end do + else if( scale<=safmn2 ) then + count = 0 + 30 continue + count = count + 1 + f1 = f1*safmx2 + g1 = g1*safmx2 + scale = max( abs( f1 ), abs( g1 ) ) + if( scale<=safmn2 )go to 30 + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + do i = 1, count + r = r*safmn2 + end do + else + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + end if + if( r DLARTGS: generates a plane rotation designed to introduce a bulge in + !> Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD + !> problem. X and Y are the top-row entries, and SIGMA is the shift. + !> The computed CS and SN define a plane rotation satisfying + !> [ CS SN ] . [ X^2 - SIGMA ] = [ R ], + !> [ -SN CS ] [ X * Y ] [ 0 ] + !> with R nonnegative. If X^2 - SIGMA and X * Y are 0, then the + !> rotation is by PI/2. + + pure subroutine stdlib_qlartgs( x, y, sigma, cs, sn ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(out) :: cs, sn + real(qp), intent(in) :: sigma, x, y + ! =================================================================== + + ! Local Scalars + real(qp) :: r, s, thresh, w, z + thresh = stdlib_qlamch('E') + ! compute the first column of b**t*b - sigma^2*i, up to a scale + ! factor. + if( (sigma == zero .and. abs(x) < thresh) .or.(abs(x) == sigma .and. y == zero) ) & + then + z = zero + w = zero + else if( sigma == zero ) then + if( x >= zero ) then + z = x + w = y + else + z = -x + w = -y + end if + else if( abs(x) < thresh ) then + z = -sigma*sigma + w = zero + else + if( x >= zero ) then + s = one + else + s = negone + end if + z = s * (abs(x)-sigma) * (s+sigma/x) + w = s * y + end if + ! generate the rotation. + ! call stdlib_qlartgp( z, w, cs, sn, r ) might seem more natural; + ! reordering the arguments ensures that if z = 0 then the rotation + ! is by pi/2. + call stdlib_qlartgp( w, z, sn, cs, r ) + return + ! end stdlib_qlartgs + end subroutine stdlib_qlartgs + + !> DLARTV: applies a vector of real plane rotations to elements of the + !> real vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -s(i) c(i) ) ( y(i) ) + + pure subroutine stdlib_qlartv( n, x, incx, y, incy, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(qp), intent(in) :: c(*), s(*) + real(qp), intent(inout) :: x(*), y(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + real(qp) :: xi, yi + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( iy ) + x( ix ) = c( ic )*xi + s( ic )*yi + y( iy ) = c( ic )*yi - s( ic )*xi + ix = ix + incx + iy = iy + incy + ic = ic + incc + end do + return + end subroutine stdlib_qlartv + + !> DLARUV: returns a vector of n random real numbers from a uniform (0,1) + !> distribution (n <= 128). + !> This is an auxiliary routine called by DLARNV and ZLARNV. + + pure subroutine stdlib_qlaruv( iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + real(qp), intent(out) :: x(n) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + integer(ilp), parameter :: ipw2 = 4096 + real(qp), parameter :: r = one/ipw2 + + + + ! Local Scalars + integer(ilp) :: i, i1, i2, i3, i4, it1, it2, it3, it4, j + ! Local Arrays + integer(ilp) :: mm(lv,4) + ! Intrinsic Functions + intrinsic :: real,min,mod + ! Data Statements + mm(1,1:4)=[494,322,2508,2549] + mm(2,1:4)=[2637,789,3754,1145] + mm(3,1:4)=[255,1440,1766,2253] + mm(4,1:4)=[2008,752,3572,305] + mm(5,1:4)=[1253,2859,2893,3301] + mm(6,1:4)=[3344,123,307,1065] + mm(7,1:4)=[4084,1848,1297,3133] + mm(8,1:4)=[1739,643,3966,2913] + mm(9,1:4)=[3143,2405,758,3285] + mm(10,1:4)=[3468,2638,2598,1241] + mm(11,1:4)=[688,2344,3406,1197] + mm(12,1:4)=[1657,46,2922,3729] + mm(13,1:4)=[1238,3814,1038,2501] + mm(14,1:4)=[3166,913,2934,1673] + mm(15,1:4)=[1292,3649,2091,541] + mm(16,1:4)=[3422,339,2451,2753] + mm(17,1:4)=[1270,3808,1580,949] + mm(18,1:4)=[2016,822,1958,2361] + mm(19,1:4)=[154,2832,2055,1165] + mm(20,1:4)=[2862,3078,1507,4081] + mm(21,1:4)=[697,3633,1078,2725] + mm(22,1:4)=[1706,2970,3273,3305] + mm(23,1:4)=[491,637,17,3069] + mm(24,1:4)=[931,2249,854,3617] + mm(25,1:4)=[1444,2081,2916,3733] + mm(26,1:4)=[444,4019,3971,409] + mm(27,1:4)=[3577,1478,2889,2157] + mm(28,1:4)=[3944,242,3831,1361] + mm(29,1:4)=[2184,481,2621,3973] + mm(30,1:4)=[1661,2075,1541,1865] + mm(31,1:4)=[3482,4058,893,2525] + mm(32,1:4)=[657,622,736,1409] + mm(33,1:4)=[3023,3376,3992,3445] + mm(34,1:4)=[3618,812,787,3577] + mm(35,1:4)=[1267,234,2125,77] + mm(36,1:4)=[1828,641,2364,3761] + mm(37,1:4)=[164,4005,2460,2149] + mm(38,1:4)=[3798,1122,257,1449] + mm(39,1:4)=[3087,3135,1574,3005] + mm(40,1:4)=[2400,2640,3912,225] + mm(41,1:4)=[2870,2302,1216,85] + mm(42,1:4)=[3876,40,3248,3673] + mm(43,1:4)=[1905,1832,3401,3117] + mm(44,1:4)=[1593,2247,2124,3089] + mm(45,1:4)=[1797,2034,2762,1349] + mm(46,1:4)=[1234,2637,149,2057] + mm(47,1:4)=[3460,1287,2245,413] + mm(48,1:4)=[328,1691,166,65] + mm(49,1:4)=[2861,496,466,1845] + mm(50,1:4)=[1950,1597,4018,697] + mm(51,1:4)=[617,2394,1399,3085] + mm(52,1:4)=[2070,2584,190,3441] + mm(53,1:4)=[3331,1843,2879,1573] + mm(54,1:4)=[769,336,153,3689] + mm(55,1:4)=[1558,1472,2320,2941] + mm(56,1:4)=[2412,2407,18,929] + mm(57,1:4)=[2800,433,712,533] + mm(58,1:4)=[189,2096,2159,2841] + mm(59,1:4)=[287,1761,2318,4077] + mm(60,1:4)=[2045,2810,2091,721] + mm(61,1:4)=[1227,566,3443,2821] + mm(62,1:4)=[2838,442,1510,2249] + mm(63,1:4)=[209,41,449,2397] + mm(64,1:4)=[2770,1238,1956,2817] + mm(65,1:4)=[3654,1086,2201,245] + mm(66,1:4)=[3993,603,3137,1913] + mm(67,1:4)=[192,840,3399,1997] + mm(68,1:4)=[2253,3168,1321,3121] + mm(69,1:4)=[3491,1499,2271,997] + mm(70,1:4)=[2889,1084,3667,1833] + mm(71,1:4)=[2857,3438,2703,2877] + mm(72,1:4)=[2094,2408,629,1633] + mm(73,1:4)=[1818,1589,2365,981] + mm(74,1:4)=[688,2391,2431,2009] + mm(75,1:4)=[1407,288,1113,941] + mm(76,1:4)=[634,26,3922,2449] + mm(77,1:4)=[3231,512,2554,197] + mm(78,1:4)=[815,1456,184,2441] + mm(79,1:4)=[3524,171,2099,285] + mm(80,1:4)=[1914,1677,3228,1473] + mm(81,1:4)=[516,2657,4012,2741] + mm(82,1:4)=[164,2270,1921,3129] + mm(83,1:4)=[303,2587,3452,909] + mm(84,1:4)=[2144,2961,3901,2801] + mm(85,1:4)=[3480,1970,572,421] + mm(86,1:4)=[119,1817,3309,4073] + mm(87,1:4)=[3357,676,3171,2813] + mm(88,1:4)=[837,1410,817,2337] + mm(89,1:4)=[2826,3723,3039,1429] + mm(90,1:4)=[2332,2803,1696,1177] + mm(91,1:4)=[2089,3185,1256,1901] + mm(92,1:4)=[3780,184,3715,81] + mm(93,1:4)=[1700,663,2077,1669] + mm(94,1:4)=[3712,499,3019,2633] + mm(95,1:4)=[150,3784,1497,2269] + mm(96,1:4)=[2000,1631,1101,129] + mm(97,1:4)=[3375,1925,717,1141] + mm(98,1:4)=[1621,3912,51,249] + mm(99,1:4)=[3090,1398,981,3917] + mm(100,1:4)=[3765,1349,1978,2481] + mm(101,1:4)=[1149,1441,1813,3941] + mm(102,1:4)=[3146,2224,3881,2217] + mm(103,1:4)=[33,2411,76,2749] + mm(104,1:4)=[3082,1907,3846,3041] + mm(105,1:4)=[2741,3192,3694,1877] + mm(106,1:4)=[359,2786,1682,345] + mm(107,1:4)=[3316,382,124,2861] + mm(108,1:4)=[1749,37,1660,1809] + mm(109,1:4)=[185,759,3997,3141] + mm(110,1:4)=[2784,2948,479,2825] + mm(111,1:4)=[2202,1862,1141,157] + mm(112,1:4)=[2199,3802,886,2881] + mm(113,1:4)=[1364,2423,3514,3637] + mm(114,1:4)=[1244,2051,1301,1465] + mm(115,1:4)=[2020,2295,3604,2829] + mm(116,1:4)=[3160,1332,1888,2161] + mm(117,1:4)=[2785,1832,1836,3365] + mm(118,1:4)=[2772,2405,1990,361] + mm(119,1:4)=[1217,3638,2058,2685] + mm(120,1:4)=[1822,3661,692,3745] + mm(121,1:4)=[1245,327,1194,2325] + mm(122,1:4)=[2252,3660,20,3609] + mm(123,1:4)=[3904,716,3285,3821] + mm(124,1:4)=[2774,1842,2046,3537] + mm(125,1:4)=[997,3987,2107,517] + mm(126,1:4)=[2573,1368,3508,3017] + mm(127,1:4)=[1148,1848,3525,2141] + mm(128,1:4)=[545,2366,3801,1537] + ! Executable Statements + i1 = iseed( 1 ) + i2 = iseed( 2 ) + i3 = iseed( 3 ) + i4 = iseed( 4 ) + loop_10: do i = 1, min( n, lv ) + 20 continue + ! multiply the seed by i-th power of the multiplier modulo 2**48 + it4 = i4*mm( i, 4 ) + it3 = it4 / ipw2 + it4 = it4 - ipw2*it3 + it3 = it3 + i3*mm( i, 4 ) + i4*mm( i, 3 ) + it2 = it3 / ipw2 + it3 = it3 - ipw2*it2 + it2 = it2 + i2*mm( i, 4 ) + i3*mm( i, 3 ) + i4*mm( i, 2 ) + it1 = it2 / ipw2 + it2 = it2 - ipw2*it1 + it1 = it1 + i1*mm( i, 4 ) + i2*mm( i, 3 ) + i3*mm( i, 2 ) +i4*mm( i, 1 ) + it1 = mod( it1, ipw2 ) + ! convert 48-bit integer to a realnumber in the interval (0,1,KIND=qp) + x( i ) = r*( real( it1,KIND=qp)+r*( real( it2,KIND=qp)+r*( real( it3,KIND=qp)+& + r*real( it4,KIND=qp) ) ) ) + if (x( i )==1.0_qp) then + ! if a real number has n bits of precision, and the first + ! n bits of the 48-bit integer above happen to be all 1 (which + ! will occur about once every 2**n calls), then x( i ) will + ! be rounded to exactly one. + ! since x( i ) is not supposed to return exactly 0.0_qp or 1.0_qp, + ! the statistically correct thing to do in this situation is + ! simply to iterate again. + ! n.b. the case x( i ) = 0.0_qp should not be possible. + i1 = i1 + 2 + i2 = i2 + 2 + i3 = i3 + 2 + i4 = i4 + 2 + goto 20 + end if + end do loop_10 + ! return final value of seed + iseed( 1 ) = it1 + iseed( 2 ) = it2 + iseed( 3 ) = it3 + iseed( 4 ) = it4 + return + end subroutine stdlib_qlaruv + + !> DLARZ: applies a real elementary reflector H to a real M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> H is a product of k elementary reflectors as returned by DTZRZF. + + pure subroutine stdlib_qlarz( side, m, n, l, v, incv, tau, c, ldc, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, l, ldc, m, n + real(qp), intent(in) :: tau + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*) + real(qp), intent(in) :: v(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Executable Statements + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c + if( tau/=zero ) then + ! w( 1:n ) = c( 1, 1:n ) + call stdlib_qcopy( n, c, ldc, work, 1 ) + ! w( 1:n ) = w( 1:n ) + c( m-l+1:m, 1:n )**t * v( 1:l ) + call stdlib_qgemv( 'TRANSPOSE', l, n, one, c( m-l+1, 1 ), ldc, v,incv, one, work,& + 1 ) + ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n ) + call stdlib_qaxpy( n, -tau, work, 1, c, ldc ) + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! tau * v( 1:l ) * w( 1:n )**t + call stdlib_qger( l, n, -tau, v, incv, work, 1, c( m-l+1, 1 ),ldc ) + end if + else + ! form c * h + if( tau/=zero ) then + ! w( 1:m ) = c( 1:m, 1 ) + call stdlib_qcopy( m, c, 1, work, 1 ) + ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l ) + call stdlib_qgemv( 'NO TRANSPOSE', m, l, one, c( 1, n-l+1 ), ldc,v, incv, one, & + work, 1 ) + ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m ) + call stdlib_qaxpy( m, -tau, work, 1, c, 1 ) + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! tau * w( 1:m ) * v( 1:l )**t + call stdlib_qger( m, l, -tau, work, 1, v, incv, c( 1, n-l+1 ),ldc ) + end if + end if + return + end subroutine stdlib_qlarz + + !> DLARZB: applies a real block reflector H or its transpose H**T to + !> a real distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_qlarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*), t(ldt,*), v(ldv,*) + real(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, info, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -3 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLARZB', -info ) + return + end if + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'T' + else + transt = 'N' + end if + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c + ! w( 1:n, 1:k ) = c( 1:k, 1:n )**t + do j = 1, k + call stdlib_qcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ... + ! c( m-l+1:m, 1:n )**t * v( 1:k, 1:l )**t + if( l>0 )call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, l, one,c( m-l+1, 1 ), & + ldc, v, ldv, one, work, ldwork ) + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t or w( 1:m, 1:k ) * t + call stdlib_qtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k, one, t,ldt, work, & + ldwork ) + ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**t + do j = 1, n + do i = 1, k + c( i, j ) = c( i, j ) - work( j, i ) + end do + end do + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! v( 1:k, 1:l )**t * w( 1:n, 1:k )**t + if( l>0 )call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE', l, n, k, -one, v, ldv,work, & + ldwork, one, c( m-l+1, 1 ), ldc ) + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t + ! w( 1:m, 1:k ) = c( 1:m, 1:k ) + do j = 1, k + call stdlib_qcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ... + ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**t + if( l>0 )call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, l, one,c( 1, n-l+1 ),& + ldc, v, ldv, one, work, ldwork ) + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * t or w( 1:m, 1:k ) * t**t + call stdlib_qtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k, one, t,ldt, work, & + ldwork ) + ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k ) + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! w( 1:m, 1:k ) * v( 1:k, 1:l ) + if( l>0 )call stdlib_qgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, l, k, -one,work, & + ldwork, v, ldv, one, c( 1, n-l+1 ), ldc ) + end if + return + end subroutine stdlib_qlarzb + + !> DLARZT: forms the triangular factor T of a real block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**T + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**T * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_qlarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + real(qp), intent(out) :: t(ldt,*) + real(qp), intent(in) :: tau(*) + real(qp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + ! Executable Statements + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -1 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLARZT', -info ) + return + end if + do i = k, 1, -1 + if( tau( i )==zero ) then + ! h(i) = i + do j = i, k + t( j, i ) = zero + end do + else + ! general case + if( i DLAS2: computes the singular values of the 2-by-2 matrix + !> [ F G ] + !> [ 0 H ]. + !> On return, SSMIN is the smaller singular value and SSMAX is the + !> larger singular value. + + pure subroutine stdlib_qlas2( f, g, h, ssmin, ssmax ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: f, g, h + real(qp), intent(out) :: ssmax, ssmin + ! ==================================================================== + + + + ! Local Scalars + real(qp) :: as, at, au, c, fa, fhmn, fhmx, ga, ha + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + fa = abs( f ) + ga = abs( g ) + ha = abs( h ) + fhmn = min( fa, ha ) + fhmx = max( fa, ha ) + if( fhmn==zero ) then + ssmin = zero + if( fhmx==zero ) then + ssmax = ga + else + ssmax = max( fhmx, ga )*sqrt( one+( min( fhmx, ga ) / max( fhmx, ga ) )**2 ) + + end if + else + if( ga DLASCL: multiplies the M by N real matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + + pure subroutine stdlib_qlascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, lda, m, n + real(qp), intent(in) :: cfrom, cto + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: i, itype, j, k1, k2, k3, k4 + real(qp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( stdlib_lsame( type, 'G' ) ) then + itype = 0 + else if( stdlib_lsame( type, 'L' ) ) then + itype = 1 + else if( stdlib_lsame( type, 'U' ) ) then + itype = 2 + else if( stdlib_lsame( type, 'H' ) ) then + itype = 3 + else if( stdlib_lsame( type, 'B' ) ) then + itype = 4 + else if( stdlib_lsame( type, 'Q' ) ) then + itype = 5 + else if( stdlib_lsame( type, 'Z' ) ) then + itype = 6 + else + itype = -1 + end if + if( itype==-1 ) then + info = -1 + else if( cfrom==zero .or. stdlib_qisnan(cfrom) ) then + info = -4 + else if( stdlib_qisnan(cto) ) then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 .or. ( itype==4 .and. n/=m ) .or.( itype==5 .and. n/=m ) ) then + info = -7 + else if( itype<=3 .and. lda=4 ) then + if( kl<0 .or. kl>max( m-1, 0 ) ) then + info = -2 + else if( ku<0 .or. ku>max( n-1, 0 ) .or.( ( itype==4 .or. itype==5 ) .and. kl/=ku ) & + )then + info = -3 + else if( ( itype==4 .and. ldaabs( ctoc ) .and. ctoc/=zero ) then + mul = smlnum + done = .false. + cfromc = cfrom1 + else if( abs( cto1 )>abs( cfromc ) ) then + mul = bignum + done = .false. + ctoc = cto1 + else + mul = ctoc / cfromc + done = .true. + end if + end if + if( itype==0 ) then + ! full matrix + do j = 1, n + do i = 1, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==1 ) then + ! lower triangular matrix + do j = 1, n + do i = j, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==2 ) then + ! upper triangular matrix + do j = 1, n + do i = 1, min( j, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==3 ) then + ! upper hessenberg matrix + do j = 1, n + do i = 1, min( j+1, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==4 ) then + ! lower half of a symmetric band matrix + k3 = kl + 1 + k4 = n + 1 + do j = 1, n + do i = 1, min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==5 ) then + ! upper half of a symmetric band matrix + k1 = ku + 2 + k3 = ku + 1 + do j = 1, n + do i = max( k1-j, 1 ), k3 + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==6 ) then + ! band matrix + k1 = kl + ku + 2 + k2 = kl + 1 + k3 = 2*kl + ku + 1 + k4 = kl + ku + 1 + m + do j = 1, n + do i = max( k1-j, k2 ), min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + end if + if( .not.done )go to 10 + return + end subroutine stdlib_qlascl + + !> Using a divide and conquer approach, DLASD0: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M + !> matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + !> The algorithm computes orthogonal matrices U and VT such that + !> B = U * S * VT. The singular values S are overwritten on D. + !> A related subroutine, DLASDA, computes only the singular values, + !> and optionally, the singular vectors in compact form. + + pure subroutine stdlib_qlasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, n, smlsiz, sqre + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,& + nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei + real(qp) :: alpha, beta + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -2 + end if + m = n + sqre + if( ldu DLASD1: computes the SVD of an upper bidiagonal N-by-M matrix B, + !> where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. + !> A related subroutine DLASD7 handles the case in which the singular + !> values (and the singular vectors in factored form) are desired. + !> DLASD1 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The left singular vectors of the original matrix are stored in U, and + !> the transpose of the right singular vectors are stored in VT, and the + !> singular values are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or when there are zeros in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLASD2. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the square roots of the + !> roots of the secular equation via the routine DLASD4 (as called + !> by DLASD3). This routine also calculates the singular vectors of + !> the current problem. + !> The final stage consists of computing the updated singular vectors + !> directly using the updated singular values. The singular vectors + !> for the current problem are multiplied with the singular vectors + !> from the overall problem. + + pure subroutine stdlib_qlasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, & + work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, nl, nr, sqre + real(qp), intent(inout) :: alpha, beta + ! Array Arguments + integer(ilp), intent(inout) :: idxq(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, & + ldvt2, m, n, n1, n2 + real(qp) :: orgnrm + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DLASD1', -info ) + return + end if + n = nl + nr + 1 + m = n + sqre + ! the following values are for bookkeeping purposes only. they are + ! integer pointers which indicate the portion of the workspace + ! used by a particular array in stdlib_qlasd2 and stdlib_qlasd3. + ldu2 = n + ldvt2 = m + iz = 1 + isigma = iz + m + iu2 = isigma + n + ivt2 = iu2 + ldu2*n + iq = ivt2 + ldvt2*m + idx = 1 + idxc = idx + n + coltyp = idxc + n + idxp = coltyp + n + ! scale. + orgnrm = max( abs( alpha ), abs( beta ) ) + d( nl+1 ) = zero + do i = 1, n + if( abs( d( i ) )>orgnrm ) then + orgnrm = abs( d( i ) ) + end if + end do + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + alpha = alpha / orgnrm + beta = beta / orgnrm + ! deflate singular values. + call stdlib_qlasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(& + isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( & + idxc ), idxq, iwork( coltyp ), info ) + ! solve secular equation and update singular vectors. + ldq = k + call stdlib_qlasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( & + iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),& + info ) + ! report the convergence failure. + if( info/=0 ) then + return + end if + ! unscale. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + ! prepare the idxq sorting permutation. + n1 = k + n2 = n - k + call stdlib_qlamrg( n1, n2, d, 1, -1, idxq ) + return + end subroutine stdlib_qlasd1 + + !> DLASD2: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> singular values are close together or if there is a tiny entry in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + !> DLASD2 is called from DLASD1. + + pure subroutine stdlib_qlasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, & + u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, k + integer(ilp), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre + real(qp), intent(in) :: alpha, beta + ! Array Arguments + integer(ilp), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*) + integer(ilp), intent(inout) :: idxq(*) + real(qp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) + real(qp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*) + ! ===================================================================== + + ! Local Arrays + integer(ilp) :: ctot(4), psm(4) + ! Local Scalars + integer(ilp) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 + real(qp) :: c, eps, hlftol, s, tau, tol, z1 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre/=1 ) .and. ( sqre/=0 ) ) then + info = -3 + end if + n = nl + nr + 1 + m = n + sqre + if( ldun )go to 110 + if( abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + idxp( k2 ) = j + coltyp( j ) = 4 + else + ! check if singular values are close enough to allow deflation. + if( abs( d( j )-d( jprev ) )<=tol ) then + ! deflation is possible. + s = z( jprev ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_qlapy2( c, s ) + c = c / tau + s = -s / tau + z( j ) = tau + z( jprev ) = zero + ! apply back the givens rotation to the left and right + ! singular vector matrices. + idxjp = idxq( idx( jprev )+1 ) + idxj = idxq( idx( j )+1 ) + if( idxjp<=nlp1 ) then + idxjp = idxjp - 1 + end if + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + call stdlib_qrot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s ) + call stdlib_qrot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt, c,s ) + if( coltyp( j )/=coltyp( jprev ) ) then + coltyp( j ) = 3 + end if + coltyp( jprev ) = 4 + k2 = k2 - 1 + idxp( k2 ) = jprev + jprev = j + else + k = k + 1 + u2( k, 1 ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + jprev = j + end if + end if + go to 100 + 110 continue + ! record the last singular value. + k = k + 1 + u2( k, 1 ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + 120 continue + ! count up the total number of the various types of columns, then + ! form a permutation which positions the four column types into + ! four groups of uniform structure (although one or more of these + ! groups may be empty). + do j = 1, 4 + ctot( j ) = 0 + end do + do j = 2, n + ct = coltyp( j ) + ctot( ct ) = ctot( ct ) + 1 + end do + ! psm(*) = position in submatrix (of types 1 through 4) + psm( 1 ) = 2 + psm( 2 ) = 2 + ctot( 1 ) + psm( 3 ) = psm( 2 ) + ctot( 2 ) + psm( 4 ) = psm( 3 ) + ctot( 3 ) + ! fill out the idxc array so that the permutation which it induces + ! will place all type-1 columns first, all type-2 columns next, + ! then all type-3's, and finally all type-4's, starting from the + ! second column. this applies similarly to the rows of vt. + do j = 2, n + jp = idxp( j ) + ct = coltyp( jp ) + idxc( psm( ct ) ) = j + psm( ct ) = psm( ct ) + 1 + end do + ! sort the singular values and corresponding singular vectors into + ! dsigma, u2, and vt2 respectively. the singular values/vectors + ! which were not deflated go into the first k slots of dsigma, u2, + ! and vt2 respectively, while those which were deflated go into the + ! last n - k slots, except that the first column/row will be treated + ! separately. + do j = 2, n + jp = idxp( j ) + dsigma( j ) = d( jp ) + idxj = idxq( idx( idxp( idxc( j ) ) )+1 ) + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + call stdlib_qcopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 ) + call stdlib_qcopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 ) + end do + ! determine dsigma(1), dsigma(2) and z(1) + dsigma( 1 ) = zero + hlftol = tol / two + if( abs( dsigma( 2 ) )<=hlftol )dsigma( 2 ) = hlftol + if( m>n ) then + z( 1 ) = stdlib_qlapy2( z1, z( m ) ) + if( z( 1 )<=tol ) then + c = one + s = zero + z( 1 ) = tol + else + c = z1 / z( 1 ) + s = z( m ) / z( 1 ) + end if + else + if( abs( z1 )<=tol ) then + z( 1 ) = tol + else + z( 1 ) = z1 + end if + end if + ! move the rest of the updating row to z. + call stdlib_qcopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 ) + ! determine the first column of u2, the first row of vt2 and the + ! last row of vt. + call stdlib_qlaset( 'A', n, 1, zero, zero, u2, ldu2 ) + u2( nlp1, 1 ) = one + if( m>n ) then + do i = 1, nlp1 + vt( m, i ) = -s*vt( nlp1, i ) + vt2( 1, i ) = c*vt( nlp1, i ) + end do + do i = nlp2, m + vt2( 1, i ) = s*vt( m, i ) + vt( m, i ) = c*vt( m, i ) + end do + else + call stdlib_qcopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 ) + end if + if( m>n ) then + call stdlib_qcopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 ) + end if + ! the deflated singular values and their corresponding vectors go + ! into the back of d, u, and v respectively. + if( n>k ) then + call stdlib_qcopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 ) + call stdlib_qlacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),ldu ) + call stdlib_qlacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1, 1 ),ldvt ) + end if + ! copy ctot into coltyp for referencing in stdlib_qlasd3. + do j = 1, 4 + coltyp( j ) = ctot( j ) + end do + return + end subroutine stdlib_qlasd2 + + !> DLASD3: finds all the square roots of the roots of the secular + !> equation, as defined by the values in D and Z. It makes the + !> appropriate calls to DLASD4 and then updates the singular + !> vectors by matrix multiplication. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> DLASD3 is called from DLASD1. + + pure subroutine stdlib_qlasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,& + vt2, ldvt2, idxc, ctot, z,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre + ! Array Arguments + integer(ilp), intent(in) :: ctot(*), idxc(*) + real(qp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*) + real(qp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*) + real(qp), intent(in) :: u2(ldu2,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1 + real(qp) :: rho, temp + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre/=1 ) .and. ( sqre/=0 ) ) then + info = -3 + end if + n = nl + nr + 1 + m = n + sqre + nlp1 = nl + 1 + nlp2 = nl + 2 + if( ( k<1 ) .or. ( k>n ) ) then + info = -4 + else if( ldqzero ) then + call stdlib_qcopy( n, u2( 1, 1 ), 1, u( 1, 1 ), 1 ) + else + do i = 1, n + u( i, 1 ) = -u2( i, 1 ) + end do + end if + return + end if + ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can + ! be computed with high relative accuracy (barring over/underflow). + ! this is a problem on machines without a guard digit in + ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). + ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), + ! which on any of these machines zeros out the bottommost + ! bit of dsigma(i) if it is 1; this makes the subsequent + ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation + ! occurs. on binary machines with a guard digit (almost all + ! machines) it does not change dsigma(i) at all. on hexadecimal + ! and decimal machines with a guard digit, it slightly + ! changes the bottommost bits of dsigma(i). it does not account + ! for hexadecimal or decimal machines without guard digits + ! (we know of none). we use a subroutine call to compute + ! 2*dsigma(i) to prevent optimizing compilers from eliminating + ! this code. + do i = 1, k + dsigma( i ) = stdlib_qlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) + end do + ! keep a copy of z. + call stdlib_qcopy( k, z, 1, q, 1 ) + ! normalize z. + rho = stdlib_qnrm2( k, z, 1 ) + call stdlib_qlascl( 'G', 0, 0, rho, one, k, 1, z, k, info ) + rho = rho*rho + ! find the new singular values. + do j = 1, k + call stdlib_qlasd4( k, j, dsigma, z, u( 1, j ), rho, d( j ),vt( 1, j ), info ) + + ! if the zero finder fails, report the convergence failure. + if( info/=0 ) then + return + end if + end do + ! compute updated z. + do i = 1, k + z( i ) = u( i, k )*vt( i, k ) + do j = 1, i - 1 + z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i & + )+dsigma( j ) ) ) + end do + do j = i, k - 1 + z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( & + i )+dsigma( j+1 ) ) ) + end do + z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1 ) ) + end do + ! compute left singular vectors of the modified diagonal matrix, + ! and store related information for the right singular vectors. + do i = 1, k + vt( 1, i ) = z( 1 ) / u( 1, i ) / vt( 1, i ) + u( 1, i ) = negone + do j = 2, k + vt( j, i ) = z( j ) / u( j, i ) / vt( j, i ) + u( j, i ) = dsigma( j )*vt( j, i ) + end do + temp = stdlib_qnrm2( k, u( 1, i ), 1 ) + q( 1, i ) = u( 1, i ) / temp + do j = 2, k + jc = idxc( j ) + q( j, i ) = u( jc, i ) / temp + end do + end do + ! update the left singular vector matrix. + if( k==2 ) then + call stdlib_qgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu ) + go to 100 + end if + if( ctot( 1 )>0 ) then + call stdlib_qgemm( 'N', 'N', nl, k, ctot( 1 ), one, u2( 1, 2 ), ldu2,q( 2, 1 ), ldq,& + zero, u( 1, 1 ), ldu ) + if( ctot( 3 )>0 ) then + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + call stdlib_qgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),ldu2, q( & + ktemp, 1 ), ldq, one, u( 1, 1 ), ldu ) + end if + else if( ctot( 3 )>0 ) then + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + call stdlib_qgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),ldu2, q( ktemp, & + 1 ), ldq, zero, u( 1, 1 ), ldu ) + else + call stdlib_qlacpy( 'F', nl, k, u2, ldu2, u, ldu ) + end if + call stdlib_qcopy( k, q( 1, 1 ), ldq, u( nlp1, 1 ), ldu ) + ktemp = 2 + ctot( 1 ) + ctemp = ctot( 2 ) + ctot( 3 ) + call stdlib_qgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1 ), & + ldq, zero, u( nlp2, 1 ), ldu ) + ! generate the right singular vectors. + 100 continue + do i = 1, k + temp = stdlib_qnrm2( k, vt( 1, i ), 1 ) + q( i, 1 ) = vt( 1, i ) / temp + do j = 2, k + jc = idxc( j ) + q( i, j ) = vt( jc, i ) / temp + end do + end do + ! update the right singular vector matrix. + if( k==2 ) then + call stdlib_qgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt ) + + return + end if + ktemp = 1 + ctot( 1 ) + call stdlib_qgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1, 1 ), ldq,vt2( 1, 1 ), ldvt2, & + zero, vt( 1, 1 ), ldvt ) + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + if( ktemp<=ldvt2 )call stdlib_qgemm( 'N', 'N', k, nlp1, ctot( 3 ), one, q( 1, ktemp ),& + ldq, vt2( ktemp, 1 ), ldvt2, one, vt( 1, 1 ),ldvt ) + ktemp = ctot( 1 ) + 1 + nrp1 = nr + sqre + if( ktemp>1 ) then + do i = 1, k + q( i, ktemp ) = q( i, 1 ) + end do + do i = nlp2, m + vt2( ktemp, i ) = vt2( 1, i ) + end do + end if + ctemp = 1 + ctot( 2 ) + ctot( 3 ) + call stdlib_qgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1, ktemp ), ldq,vt2( ktemp, nlp2 )& + , ldvt2, zero, vt( 1, nlp2 ), ldvt ) + return + end subroutine stdlib_qlasd3 + + !> This subroutine computes the square root of the I-th updated + !> eigenvalue of a positive symmetric rank-one modification to + !> a positive diagonal matrix whose entries are given as the squares + !> of the corresponding entries in the array d, and that + !> 0 <= D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) * diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + + pure subroutine stdlib_qlasd4( n, i, d, z, delta, rho, sigma, work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i, n + integer(ilp), intent(out) :: info + real(qp), intent(in) :: rho + real(qp), intent(out) :: sigma + ! Array Arguments + real(qp), intent(in) :: d(*), z(*) + real(qp), intent(out) :: delta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 400 + + + ! Local Scalars + logical(lk) :: orgati, swtch, swtch3, geomavg + integer(ilp) :: ii, iim1, iip1, ip1, iter, j, niter + real(qp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, & + dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, & + temp, temp1, temp2, w + ! Local Arrays + real(qp) :: dd(3), zz(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! since this routine is called in an inner loop, we do no argument + ! checking. + ! quick return for n=1 and 2. + info = 0 + if( n==1 ) then + ! presumably, i=1 upon entry + sigma = sqrt( d( 1 )*d( 1 )+rho*z( 1 )*z( 1 ) ) + delta( 1 ) = one + work( 1 ) = one + return + end if + if( n==2 ) then + call stdlib_qlasd5( i, d, z, delta, rho, sigma, work ) + return + end if + ! compute machine epsilon + eps = stdlib_qlamch( 'EPSILON' ) + rhoinv = one / rho + tau2= zero + ! the case i = n + if( i==n ) then + ! initialize some basic variables + ii = n - 1 + niter = 1 + ! calculate initial guess + temp = rho / two + ! if ||z||_2 is not one, then temp should be set to + ! rho * ||z||_2^2 / two + temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) ) + do j = 1, n + work( j ) = d( j ) + d( n ) + temp1 + delta( j ) = ( d( j )-d( n ) ) - temp1 + end do + psi = zero + do j = 1, n - 2 + psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) ) + end do + c = rhoinv + psi + w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )& + *work( n ) ) + if( w<=zero ) then + temp1 = sqrt( d( n )*d( n )+rho ) + temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+& + temp1 ) ) ) +z( n )*z( n ) / rho + ! the following tau2 is to approximate + ! sigma_n^2 - d( n )*d( n ) + if( c<=temp ) then + tau = rho + else + delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) + a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) + b = z( n )*z( n )*delsq + if( a=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = eta - dtnsq + if( temp>rho )eta = rho + dtnsq + eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) + tau = tau + eta + sigma = sigma + eta + do j = 1, n + delta( j ) = delta( j ) - eta + work( j ) = work( j ) + eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, ii + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + tau2 = work( n )*delta( n ) + temp = z( n ) / tau2 + phi = z( n )*temp + dphi = temp*temp + erretm = eight*( -phi-psi ) + erretm - phi + rhoinv + ! $ + abs( tau2 )*( dpsi+dphi ) + w = rhoinv + phi + psi + ! main loop to update the values of the array delta + iter = niter + 1 + loop_90: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + go to 240 + end if + ! calculate the new step + dtnsq1 = work( n-1 )*delta( n-1 ) + dtnsq = work( n )*delta( n ) + c = w - dtnsq1*dpsi - dtnsq*dphi + a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi ) + b = dtnsq1*dtnsq*w + if( a>=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = eta - dtnsq + if( temp<=zero )eta = eta / two + eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) + tau = tau + eta + sigma = sigma + eta + do j = 1, n + delta( j ) = delta( j ) - eta + work( j ) = work( j ) + eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, ii + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + tau2 = work( n )*delta( n ) + temp = z( n ) / tau2 + phi = z( n )*temp + dphi = temp*temp + erretm = eight*( -phi-psi ) + erretm - phi + rhoinv + ! $ + abs( tau2 )*( dpsi+dphi ) + w = rhoinv + phi + psi + end do loop_90 + ! return with info = 1, niter = maxit and not converged + info = 1 + go to 240 + ! end for the case i = n + else + ! the case for i < n + niter = 1 + ip1 = i + 1 + ! calculate initial guess + delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) ) + delsq2 = delsq / two + sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two ) + temp = delsq2 / ( d( i )+sq2 ) + do j = 1, n + work( j ) = d( j ) + d( i ) + temp + delta( j ) = ( d( j )-d( i ) ) - temp + end do + psi = zero + do j = 1, i - 1 + psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) ) + end do + phi = zero + do j = n, i + 2, -1 + phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) ) + end do + c = rhoinv + psi + phi + w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )& + *delta( ip1 ) ) + geomavg = .false. + if( w>zero ) then + ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + ! we choose d(i) as origin. + orgati = .true. + ii = i + sglb = zero + sgub = delsq2 / ( d( i )+sq2 ) + a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 ) + b = z( i )*z( i )*delsq + if( a>zero ) then + tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + else + tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + end if + ! tau2 now is an estimation of sigma^2 - d( i )^2. the + ! following, however, is the corresponding estimation of + ! sigma - d( i ). + tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) ) + temp = sqrt(eps) + if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then + tau = min( ten*d(i), sgub ) + geomavg = .true. + end if + else + ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + ! we choose d(i+1) as origin. + orgati = .false. + ii = ip1 + sglb = -delsq2 / ( d( ii )+sq2 ) + sgub = zero + a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 ) + b = z( ip1 )*z( ip1 )*delsq + if( azero )swtch3 = .true. + end if + if( ii==1 .or. ii==n )swtch3 = .false. + temp = z( ii ) / ( work( ii )*delta( ii ) ) + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = w + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + ! test for convergence + if( abs( w )<=eps*erretm ) then + go to 240 + end if + if( w<=zero ) then + sglb = max( sglb, tau ) + else + sgub = min( sgub, tau ) + end if + ! calculate the new step + niter = niter + 1 + if( .not.swtch3 ) then + dtipsq = work( ip1 )*delta( ip1 ) + dtisq = work( i )*delta( i ) + if( orgati ) then + c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2 + else + c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2 + end if + a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw + b = dtipsq*dtisq*w + if( c==zero ) then + if( a==zero ) then + if( orgati ) then + a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi ) + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + dtiim = work( iim1 )*delta( iim1 ) + dtiip = work( iip1 )*delta( iip1 ) + temp = rhoinv + psi + phi + if( orgati ) then + temp1 = z( iim1 ) / dtiim + temp1 = temp1*temp1 + c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( & + iip1 ) )*temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + if( dpsi 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) + temp = tau + eta + if( temp>sgub .or. temp zero ) then + eta = sqrt(sgub*tau)-tau + end if + else + if( sglb > zero ) then + eta = sqrt(sglb*tau)-tau + end if + end if + end if + end if + prew = w + tau = tau + eta + sigma = sigma + eta + do j = 1, n + work( j ) = work( j ) + eta + delta( j ) = delta( j ) - eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, iim1 + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + dphi = zero + phi = zero + do j = n, iip1, -1 + temp = z( j ) / ( work( j )*delta( j ) ) + phi = phi + z( j )*temp + dphi = dphi + temp*temp + erretm = erretm + phi + end do + tau2 = work( ii )*delta( ii ) + temp = z( ii ) / tau2 + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = rhoinv + phi + psi + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + swtch = .false. + if( orgati ) then + if( -w>abs( prew ) / ten )swtch = .true. + else + if( w>abs( prew ) / ten )swtch = .true. + end if + ! main loop to update the values of the array delta and work + iter = niter + 1 + loop_230: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + ! $ .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then + go to 240 + end if + if( w<=zero ) then + sglb = max( sglb, tau ) + else + sgub = min( sgub, tau ) + end if + ! calculate the new step + if( .not.swtch3 ) then + dtipsq = work( ip1 )*delta( ip1 ) + dtisq = work( i )*delta( i ) + if( .not.swtch ) then + if( orgati ) then + c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2 + else + c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2 + end if + else + temp = z( ii ) / ( work( ii )*delta( ii ) ) + if( orgati ) then + dpsi = dpsi + temp*temp + else + dphi = dphi + temp*temp + end if + c = w - dtisq*dpsi - dtipsq*dphi + end if + a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw + b = dtipsq*dtisq*w + if( c==zero ) then + if( a==zero ) then + if( .not.swtch ) then + if( orgati ) then + a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) + end if + else + a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + dtiim = work( iim1 )*delta( iim1 ) + dtiip = work( iip1 )*delta( iip1 ) + temp = rhoinv + psi + phi + if( swtch ) then + c = temp - dtiim*dpsi - dtiip*dphi + zz( 1 ) = dtiim*dtiim*dpsi + zz( 3 ) = dtiip*dtiip*dphi + else + if( orgati ) then + temp1 = z( iim1 ) / dtiim + temp1 = temp1*temp1 + temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1 + c = temp - dtiip*( dpsi+dphi ) - temp2 + zz( 1 ) = z( iim1 )*z( iim1 ) + if( dpsi 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) + temp=tau+eta + if( temp>sgub .or. temp zero ) then + eta = sqrt(sgub*tau)-tau + end if + else + if( sglb > zero ) then + eta = sqrt(sglb*tau)-tau + end if + end if + end if + end if + prew = w + tau = tau + eta + sigma = sigma + eta + do j = 1, n + work( j ) = work( j ) + eta + delta( j ) = delta( j ) - eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, iim1 + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + dphi = zero + phi = zero + do j = n, iip1, -1 + temp = z( j ) / ( work( j )*delta( j ) ) + phi = phi + z( j )*temp + dphi = dphi + temp*temp + erretm = erretm + phi + end do + tau2 = work( ii )*delta( ii ) + temp = z( ii ) / tau2 + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = rhoinv + phi + psi + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch + end do loop_230 + ! return with info = 1, niter = maxit and not converged + info = 1 + end if + 240 continue + return + end subroutine stdlib_qlasd4 + + !> This subroutine computes the square root of the I-th eigenvalue + !> of a positive symmetric rank-one modification of a 2-by-2 diagonal + !> matrix + !> diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal entries in the array D are assumed to satisfy + !> 0 <= D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + + pure subroutine stdlib_qlasd5( i, d, z, delta, rho, dsigma, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i + real(qp), intent(out) :: dsigma + real(qp), intent(in) :: rho + ! Array Arguments + real(qp), intent(in) :: d(2), z(2) + real(qp), intent(out) :: delta(2), work(2) + ! ===================================================================== + + ! Local Scalars + real(qp) :: b, c, del, delsq, tau, w + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + del = d( 2 ) - d( 1 ) + delsq = del*( d( 2 )+d( 1 ) ) + if( i==1 ) then + w = one + four*rho*( z( 2 )*z( 2 ) / ( d( 1 )+three*d( 2 ) )-z( 1 )*z( 1 ) / ( & + three*d( 1 )+d( 2 ) ) ) / del + if( w>zero ) then + b = delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 1 )*z( 1 )*delsq + ! b > zero, always + ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 ) + tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) + ! the following tau is dsigma - d( 1 ) + tau = tau / ( d( 1 )+sqrt( d( 1 )*d( 1 )+tau ) ) + dsigma = d( 1 ) + tau + delta( 1 ) = -tau + delta( 2 ) = del - tau + work( 1 ) = two*d( 1 ) + tau + work( 2 ) = ( d( 1 )+tau ) + d( 2 ) + ! delta( 1 ) = -z( 1 ) / tau + ! delta( 2 ) = z( 2 ) / ( del-tau ) + else + b = -delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*delsq + ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) + if( b>zero ) then + tau = -two*c / ( b+sqrt( b*b+four*c ) ) + else + tau = ( b-sqrt( b*b+four*c ) ) / two + end if + ! the following tau is dsigma - d( 2 ) + tau = tau / ( d( 2 )+sqrt( abs( d( 2 )*d( 2 )+tau ) ) ) + dsigma = d( 2 ) + tau + delta( 1 ) = -( del+tau ) + delta( 2 ) = -tau + work( 1 ) = d( 1 ) + tau + d( 2 ) + work( 2 ) = two*d( 2 ) + tau + ! delta( 1 ) = -z( 1 ) / ( del+tau ) + ! delta( 2 ) = -z( 2 ) / tau + end if + ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + ! delta( 1 ) = delta( 1 ) / temp + ! delta( 2 ) = delta( 2 ) / temp + else + ! now i=2 + b = -delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*delsq + ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) + if( b>zero ) then + tau = ( b+sqrt( b*b+four*c ) ) / two + else + tau = two*c / ( -b+sqrt( b*b+four*c ) ) + end if + ! the following tau is dsigma - d( 2 ) + tau = tau / ( d( 2 )+sqrt( d( 2 )*d( 2 )+tau ) ) + dsigma = d( 2 ) + tau + delta( 1 ) = -( del+tau ) + delta( 2 ) = -tau + work( 1 ) = d( 1 ) + tau + d( 2 ) + work( 2 ) = two*d( 2 ) + tau + ! delta( 1 ) = -z( 1 ) / ( del+tau ) + ! delta( 2 ) = -z( 2 ) / tau + ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + ! delta( 1 ) = delta( 1 ) / temp + ! delta( 2 ) = delta( 2 ) / temp + end if + return + end subroutine stdlib_qlasd5 + + !> DLASD6: computes the SVD of an updated upper bidiagonal matrix B + !> obtained by merging two smaller ones by appending a row. This + !> routine is used only for the problem which requires all singular + !> values and optionally singular vector matrices in factored form. + !> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + !> A related subroutine, DLASD1, handles the case in which all singular + !> values and singular vectors of the bidiagonal matrix are desired. + !> DLASD6 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The singular values of B can be computed using D1, D2, the first + !> components of all the right singular vectors of the lower block, and + !> the last components of all the right singular vectors of the upper + !> block. These components are stored and updated in VF and VL, + !> respectively, in DLASD6. Hence U and VT are not explicitly + !> referenced. + !> The singular values are stored in D. The algorithm consists of two + !> stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or if there is a zero + !> in the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLASD7. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the roots of the + !> secular equation via the routine DLASD4 (as called by DLASD8). + !> This routine also updates VF and VL and computes the distances + !> between the updated singular values and the old singular + !> values. + !> DLASD6 is called from DLASDA. + + pure subroutine stdlib_qlasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & + givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: givptr, info, k + integer(ilp), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre + real(qp), intent(inout) :: alpha, beta + real(qp), intent(out) :: c, s + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*) + integer(ilp), intent(inout) :: idxq(*) + real(qp), intent(inout) :: d(*), vf(*), vl(*) + real(qp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), & + z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2 + real(qp) :: orgnrm + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + m = n + sqre + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldgcolorgnrm ) then + orgnrm = abs( d( i ) ) + end if + end do + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + alpha = alpha / orgnrm + beta = beta / orgnrm + ! sort and deflate singular values. + call stdlib_qlasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, & + work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, & + givptr, givcol, ldgcol, givnum, ldgnum, c, s,info ) + ! solve secular equation, compute difl, difr, and update vf, vl. + call stdlib_qlasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( & + iw ), info ) + ! report the possible convergence failure. + if( info/=0 ) then + return + end if + ! save the poles if icompq = 1. + if( icompq==1 ) then + call stdlib_qcopy( k, d, 1, poles( 1, 1 ), 1 ) + call stdlib_qcopy( k, work( isigma ), 1, poles( 1, 2 ), 1 ) + end if + ! unscale. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + ! prepare the idxq sorting permutation. + n1 = k + n2 = n - k + call stdlib_qlamrg( n1, n2, d, 1, -1, idxq ) + return + end subroutine stdlib_qlasd6 + + !> DLASD7: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. There + !> are two ways in which deflation can occur: when two or more singular + !> values are close together or if there is a tiny entry in the Z + !> vector. For each such occurrence the order of the related + !> secular equation problem is reduced by one. + !> DLASD7 is called from DLASD6. + + pure subroutine stdlib_qlasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & + beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: givptr, info, k + integer(ilp), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre + real(qp), intent(in) :: alpha, beta + real(qp), intent(out) :: c, s + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*) + integer(ilp), intent(inout) :: idxq(*) + real(qp), intent(inout) :: d(*), vf(*), vl(*) + real(qp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*) + + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 + real(qp) :: eps, hlftol, tau, tol, z1 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + m = n + sqre + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldgcoln )go to 90 + if( abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + idxp( k2 ) = j + else + ! check if singular values are close enough to allow deflation. + if( abs( d( j )-d( jprev ) )<=tol ) then + ! deflation is possible. + s = z( jprev ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_qlapy2( c, s ) + z( j ) = tau + z( jprev ) = zero + c = c / tau + s = -s / tau + ! record the appropriate givens rotation + if( icompq==1 ) then + givptr = givptr + 1 + idxjp = idxq( idx( jprev )+1 ) + idxj = idxq( idx( j )+1 ) + if( idxjp<=nlp1 ) then + idxjp = idxjp - 1 + end if + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + givcol( givptr, 2 ) = idxjp + givcol( givptr, 1 ) = idxj + givnum( givptr, 2 ) = c + givnum( givptr, 1 ) = s + end if + call stdlib_qrot( 1, vf( jprev ), 1, vf( j ), 1, c, s ) + call stdlib_qrot( 1, vl( jprev ), 1, vl( j ), 1, c, s ) + k2 = k2 - 1 + idxp( k2 ) = jprev + jprev = j + else + k = k + 1 + zw( k ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + jprev = j + end if + end if + go to 80 + 90 continue + ! record the last singular value. + k = k + 1 + zw( k ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + 100 continue + ! sort the singular values into dsigma. the singular values which + ! were not deflated go into the first k slots of dsigma, except + ! that dsigma(1) is treated separately. + do j = 2, n + jp = idxp( j ) + dsigma( j ) = d( jp ) + vfw( j ) = vf( jp ) + vlw( j ) = vl( jp ) + end do + if( icompq==1 ) then + do j = 2, n + jp = idxp( j ) + perm( j ) = idxq( idx( jp )+1 ) + if( perm( j )<=nlp1 ) then + perm( j ) = perm( j ) - 1 + end if + end do + end if + ! the deflated singular values go back into the last n - k slots of + ! d. + call stdlib_qcopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 ) + ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and + ! vl(m). + dsigma( 1 ) = zero + hlftol = tol / two + if( abs( dsigma( 2 ) )<=hlftol )dsigma( 2 ) = hlftol + if( m>n ) then + z( 1 ) = stdlib_qlapy2( z1, z( m ) ) + if( z( 1 )<=tol ) then + c = one + s = zero + z( 1 ) = tol + else + c = z1 / z( 1 ) + s = -z( m ) / z( 1 ) + end if + call stdlib_qrot( 1, vf( m ), 1, vf( 1 ), 1, c, s ) + call stdlib_qrot( 1, vl( m ), 1, vl( 1 ), 1, c, s ) + else + if( abs( z1 )<=tol ) then + z( 1 ) = tol + else + z( 1 ) = z1 + end if + end if + ! restore z, vf, and vl. + call stdlib_qcopy( k-1, zw( 2 ), 1, z( 2 ), 1 ) + call stdlib_qcopy( n-1, vfw( 2 ), 1, vf( 2 ), 1 ) + call stdlib_qcopy( n-1, vlw( 2 ), 1, vl( 2 ), 1 ) + return + end subroutine stdlib_qlasd7 + + !> DLASD8: finds the square roots of the roots of the secular equation, + !> as defined by the values in DSIGMA and Z. It makes the appropriate + !> calls to DLASD4, and stores, for each element in D, the distance + !> to its two nearest poles (elements in DSIGMA). It also updates + !> the arrays VF and VL, the first and last components of all the + !> right singular vectors of the original bidiagonal matrix. + !> DLASD8 is called from DLASD6. + + pure subroutine stdlib_qlasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, k, lddifr + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*) + real(qp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j + real(qp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( k<1 ) then + info = -2 + else if( lddifr Using a divide and conquer approach, DLASDA: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + !> B with diagonal D and offdiagonal E, where M = N + SQRE. The + !> algorithm computes the singular values in the SVD B = U * S * VT. + !> The orthogonal matrices U and VT are optionally computed in + !> compact form. + !> A related subroutine, DLASD0, computes the singular values and + !> the singular vectors in explicit form. + + pure subroutine stdlib_qlasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, & + poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,& + *) + real(qp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), & + s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*) + real(qp), intent(inout) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, & + m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, & + nwork2, smlszp, sqrei, vf, vfi, vl, vli + real(qp) :: alpha, beta + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldu<( n+sqre ) ) then + info = -8 + else if( ldgcol DLASDQ: computes the singular value decomposition (SVD) of a real + !> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + !> E, accumulating the transformations if desired. Letting B denote + !> the input bidiagonal matrix, the algorithm computes orthogonal + !> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose + !> of P). The singular values S are overwritten on D. + !> The input matrix U is changed to U * Q if desired. + !> The input matrix VT is changed to P**T * VT if desired. + !> The input matrix C is changed to Q**T * C if desired. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3, for a detailed description of the algorithm. + + pure subroutine stdlib_qlasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, & + work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre + ! Array Arguments + real(qp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: rotate + integer(ilp) :: i, isub, iuplo, j, np1, sqre1 + real(qp) :: cs, r, smin, sn + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + iuplo = 0 + if( stdlib_lsame( uplo, 'U' ) )iuplo = 1 + if( stdlib_lsame( uplo, 'L' ) )iuplo = 2 + if( iuplo==0 ) then + info = -1 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncvt<0 ) then + info = -4 + else if( nru<0 ) then + info = -5 + else if( ncc<0 ) then + info = -6 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + np1 = n + 1 + sqre1 = sqre + ! if matrix non-square upper bidiagonal, rotate to be lower + ! bidiagonal. the rotations are on the right. + if( ( iuplo==1 ) .and. ( sqre1==1 ) ) then + do i = 1, n - 1 + call stdlib_qlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( rotate ) then + work( i ) = cs + work( n+i ) = sn + end if + end do + call stdlib_qlartg( d( n ), e( n ), cs, sn, r ) + d( n ) = r + e( n ) = zero + if( rotate ) then + work( n ) = cs + work( n+n ) = sn + end if + iuplo = 2 + sqre1 = 0 + ! update singular vectors if desired. + if( ncvt>0 )call stdlib_qlasr( 'L', 'V', 'F', np1, ncvt, work( 1 ),work( np1 ), vt, & + ldvt ) + end if + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left. + if( iuplo==2 ) then + do i = 1, n - 1 + call stdlib_qlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( rotate ) then + work( i ) = cs + work( n+i ) = sn + end if + end do + ! if matrix (n+1)-by-n lower bidiagonal, one additional + ! rotation is needed. + if( sqre1==1 ) then + call stdlib_qlartg( d( n ), e( n ), cs, sn, r ) + d( n ) = r + if( rotate ) then + work( n ) = cs + work( n+n ) = sn + end if + end if + ! update singular vectors if desired. + if( nru>0 ) then + if( sqre1==0 ) then + call stdlib_qlasr( 'R', 'V', 'F', nru, n, work( 1 ),work( np1 ), u, ldu ) + + else + call stdlib_qlasr( 'R', 'V', 'F', nru, np1, work( 1 ),work( np1 ), u, ldu ) + + end if + end if + if( ncc>0 ) then + if( sqre1==0 ) then + call stdlib_qlasr( 'L', 'V', 'F', n, ncc, work( 1 ),work( np1 ), c, ldc ) + + else + call stdlib_qlasr( 'L', 'V', 'F', np1, ncc, work( 1 ),work( np1 ), c, ldc ) + + end if + end if + end if + ! call stdlib_qbdsqr to compute the svd of the reduced real + ! n-by-n upper bidiagonal matrix. + call stdlib_qbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info ) + + ! sort the singular values into ascending order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n + ! scan for smallest d(i). + isub = i + smin = d( i ) + do j = i + 1, n + if( d( j )0 )call stdlib_qswap( ncvt, vt( isub, 1 ), ldvt, vt( i, 1 ), ldvt ) + + if( nru>0 )call stdlib_qswap( nru, u( 1, isub ), 1, u( 1, i ), 1 ) + if( ncc>0 )call stdlib_qswap( ncc, c( isub, 1 ), ldc, c( i, 1 ), ldc ) + end if + end do + return + end subroutine stdlib_qlasdq + + !> DLASDT: creates a tree of subproblems for bidiagonal divide and + !> conquer. + + pure subroutine stdlib_qlasdt( n, lvl, nd, inode, ndiml, ndimr, msub ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: lvl, nd + integer(ilp), intent(in) :: msub, n + ! Array Arguments + integer(ilp), intent(out) :: inode(*), ndiml(*), ndimr(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, il, ir, llst, maxn, ncrnt, nlvl + real(qp) :: temp + ! Intrinsic Functions + intrinsic :: real,int,log,max + ! Executable Statements + ! find the number of levels on the tree. + maxn = max( 1, n ) + temp = log( real( maxn,KIND=qp) / real( msub+1,KIND=qp) ) / log( two ) + lvl = int( temp,KIND=ilp) + 1 + i = n / 2 + inode( 1 ) = i + 1 + ndiml( 1 ) = i + ndimr( 1 ) = n - i - 1 + il = 0 + ir = 1 + llst = 1 + do nlvl = 1, lvl - 1 + ! constructing the tree at (nlvl+1)-st level. the number of + ! nodes created on this level is llst * 2. + do i = 0, llst - 1 + il = il + 2 + ir = ir + 2 + ncrnt = llst + i + ndiml( il ) = ndiml( ncrnt ) / 2 + ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1 + inode( il ) = inode( ncrnt ) - ndimr( il ) - 1 + ndiml( ir ) = ndimr( ncrnt ) / 2 + ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1 + inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1 + end do + llst = llst*2 + end do + nd = llst*2 - 1 + return + end subroutine stdlib_qlasdt + + !> DLASET: initializes an m-by-n matrix A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + + pure subroutine stdlib_qlaset( uplo, m, n, alpha, beta, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(in) :: alpha, beta + ! Array Arguments + real(qp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + ! set the strictly upper triangular or trapezoidal part of the + ! array to alpha. + do j = 2, n + do i = 1, min( j-1, m ) + a( i, j ) = alpha + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + ! set the strictly lower triangular or trapezoidal part of the + ! array to alpha. + do j = 1, min( m, n ) + do i = j + 1, m + a( i, j ) = alpha + end do + end do + else + ! set the leading m-by-n submatrix to alpha. + do j = 1, n + do i = 1, m + a( i, j ) = alpha + end do + end do + end if + ! set the first min(m,n) diagonal elements to beta. + do i = 1, min( m, n ) + a( i, i ) = beta + end do + return + end subroutine stdlib_qlaset + + !> DLASQ1: computes the singular values of a real N-by-N bidiagonal + !> matrix with diagonal D and off-diagonal E. The singular values + !> are computed to high relative accuracy, in the absence of + !> denormalization, underflow and overflow. The algorithm was first + !> presented in + !> "Accurate singular values and differential qd algorithms" by K. V. + !> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + !> 1994, + !> and the present implementation is described in "An implementation of + !> the dqds Algorithm (Positive Case)", LAPACK Working Note. + + pure subroutine stdlib_qlasq1( n, d, e, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo + real(qp) :: eps, scale, safmin, sigmn, sigmx + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DLASQ1', -info ) + return + else if( n==0 ) then + return + else if( n==1 ) then + d( 1 ) = abs( d( 1 ) ) + return + else if( n==2 ) then + call stdlib_qlas2( d( 1 ), e( 1 ), d( 2 ), sigmn, sigmx ) + d( 1 ) = sigmx + d( 2 ) = sigmn + return + end if + ! estimate the largest singular value. + sigmx = zero + do i = 1, n - 1 + d( i ) = abs( d( i ) ) + sigmx = max( sigmx, abs( e( i ) ) ) + end do + d( n ) = abs( d( n ) ) + ! early return if sigmx is zero (matrix is already diagonal). + if( sigmx==zero ) then + call stdlib_qlasrt( 'D', n, d, iinfo ) + return + end if + do i = 1, n + sigmx = max( sigmx, d( i ) ) + end do + ! copy d and e into work (in the z format) and scale (squaring the + ! input data makes scaling by a power of the radix pointless). + eps = stdlib_qlamch( 'PRECISION' ) + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + scale = sqrt( eps / safmin ) + call stdlib_qcopy( n, d, 1, work( 1 ), 2 ) + call stdlib_qcopy( n-1, e, 1, work( 2 ), 2 ) + call stdlib_qlascl( 'G', 0, 0, sigmx, scale, 2*n-1, 1, work, 2*n-1,iinfo ) + ! compute the q's and e's. + do i = 1, 2*n - 1 + work( i ) = work( i )**2 + end do + work( 2*n ) = zero + call stdlib_qlasq2( n, work, info ) + if( info==0 ) then + do i = 1, n + d( i ) = sqrt( work( i ) ) + end do + call stdlib_qlascl( 'G', 0, 0, scale, sigmx, n, 1, d, n, iinfo ) + else if( info==2 ) then + ! maximum number of iterations exceeded. move data from work + ! into d and e so the calling subroutine can try to finish + do i = 1, n + d( i ) = sqrt( work( 2*i-1 ) ) + e( i ) = sqrt( work( 2*i ) ) + end do + call stdlib_qlascl( 'G', 0, 0, scale, sigmx, n, 1, d, n, iinfo ) + call stdlib_qlascl( 'G', 0, 0, scale, sigmx, n, 1, e, n, iinfo ) + end if + return + end subroutine stdlib_qlasq1 + + !> DLASQ2: computes all the eigenvalues of the symmetric positive + !> definite tridiagonal matrix associated with the qd array Z to high + !> relative accuracy are computed to high relative accuracy, in the + !> absence of denormalization, underflow and overflow. + !> To see the relation of Z to the tridiagonal matrix, let L be a + !> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + !> let U be an upper bidiagonal matrix with 1's above and diagonal + !> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + !> symmetric tridiagonal to which it is similar. + !> Note : DLASQ2 defines a logical variable, IEEE, which is true + !> on machines which follow ieee-754 floating-point standard in their + !> handling of infinities and NaNs, and false otherwise. This variable + !> is passed to DLASQ3. + + pure subroutine stdlib_qlasq2( n, z, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: z(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: cbias = 1.50_qp + real(qp), parameter :: hundrd = 100.0_qp + + + ! Local Scalars + logical(lk) :: ieee + integer(ilp) :: i0, i1, i4, iinfo, ipn4, iter, iwhila, iwhilb, k, kmin, n0, n1, nbig, & + ndiv, nfail, pp, splt, ttype + real(qp) :: d, dee, deemin, desig, dmin, dmin1, dmin2, dn, dn1, dn2, e, emax, emin, & + eps, g, oldemn, qmax, qmin, s, safmin, sigma, t, tau, temp, tol, tol2, trace, zmax, & + tempe, tempq + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + ! test the input arguments. + ! (in case stdlib_qlasq2 is not called by stdlib_qlasq1) + info = 0 + eps = stdlib_qlamch( 'PRECISION' ) + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + tol = eps*hundrd + tol2 = tol**2 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DLASQ2', 1 ) + return + else if( n==0 ) then + return + else if( n==1 ) then + ! 1-by-1 case. + if( z( 1 )z( 1 ) ) then + d = z( 3 ) + z( 3 ) = z( 1 ) + z( 1 ) = d + end if + z( 5 ) = z( 1 ) + z( 2 ) + z( 3 ) + if( z( 2 )>z( 3 )*tol2 ) then + t = half*( ( z( 1 )-z( 3 ) )+z( 2 ) ) + s = z( 3 )*( z( 2 ) / t ) + if( s<=t ) then + s = z( 3 )*( z( 2 ) / ( t*( one+sqrt( one+s / t ) ) ) ) + else + s = z( 3 )*( z( 2 ) / ( t+sqrt( t )*sqrt( t+s ) ) ) + end if + t = z( 1 ) + ( s+z( 2 ) ) + z( 3 ) = z( 3 )*( z( 1 ) / t ) + z( 1 ) = t + end if + z( 2 ) = z( 3 ) + z( 6 ) = z( 2 ) + z( 1 ) + return + end if + ! check for negative data and compute sums of q's and e's. + z( 2*n ) = zero + emin = z( 2 ) + qmax = zero + zmax = zero + d = zero + e = zero + do k = 1, 2*( n-1 ), 2 + if( z( k )i0 ) then + emin = abs( z( 4*n0-5 ) ) + else + emin = zero + end if + qmin = z( 4*n0-3 ) + qmax = qmin + do i4 = 4*n0, 8, -4 + if( z( i4-5 )<=zero )go to 100 + if( qmin>=four*emax ) then + qmin = min( qmin, z( i4-3 ) ) + emax = max( emax, z( i4-5 ) ) + end if + qmax = max( qmax, z( i4-7 )+z( i4-5 ) ) + emin = min( emin, z( i4-5 ) ) + end do + i4 = 4 + 100 continue + i0 = i4 / 4 + pp = 0 + if( n0-i0>1 ) then + dee = z( 4*i0-3 ) + deemin = dee + kmin = i0 + do i4 = 4*i0+1, 4*n0-3, 4 + dee = z( i4 )*( dee /( dee+z( i4-2 ) ) ) + if( dee<=deemin ) then + deemin = dee + kmin = ( i4+3 )/4 + end if + end do + if( (kmin-i0)*2n0 )go to 150 + ! while submatrix unfinished take a good dqds step. + call stdlib_qlasq3( i0, n0, z, pp, dmin, sigma, desig, qmax, nfail,iter, ndiv, & + ieee, ttype, dmin1, dmin2, dn, dn1,dn2, g, tau ) + pp = 1 - pp + ! when emin is very small check for splits. + if( pp==0 .and. n0-i0>=3 ) then + if( z( 4*n0 )<=tol2*qmax .or.z( 4*n0-1 )<=tol2*sigma ) then + splt = i0 - 1 + qmax = z( 4*i0-3 ) + emin = z( 4*i0-1 ) + oldemn = z( 4*i0 ) + do i4 = 4*i0, 4*( n0-3 ), 4 + if( z( i4 )<=tol2*z( i4-3 ) .or.z( i4-1 )<=tol2*sigma ) then + z( i4-1 ) = -sigma + splt = i4 / 4 + qmax = zero + emin = z( i4+3 ) + oldemn = z( i4+4 ) + else + qmax = max( qmax, z( i4+1 ) ) + emin = min( emin, z( i4-1 ) ) + oldemn = min( oldemn, z( i4 ) ) + end if + end do + z( 4*n0-1 ) = emin + z( 4*n0 ) = oldemn + i0 = splt + 1 + end if + end if + end do loop_140 + info = 2 + ! maximum number of iterations exceeded, restore the shift + ! sigma and place the new d's and e's in a qd array. + ! this might need to be done for several blocks + i1 = i0 + n1 = n0 + 145 continue + tempq = z( 4*i0-3 ) + z( 4*i0-3 ) = z( 4*i0-3 ) + sigma + do k = i0+1, n0 + tempe = z( 4*k-5 ) + z( 4*k-5 ) = z( 4*k-5 ) * (tempq / z( 4*k-7 )) + tempq = z( 4*k-3 ) + z( 4*k-3 ) = z( 4*k-3 ) + sigma + tempe - z( 4*k-5 ) + end do + ! prepare to do this on the previous block if there is one + if( i1>1 ) then + n1 = i1-1 + do while( ( i1>=2 ) .and. ( z(4*i1-5)>=zero ) ) + i1 = i1 - 1 + end do + sigma = -z(4*n1-1) + go to 145 + end if + do k = 1, n + z( 2*k-1 ) = z( 4*k-3 ) + ! only the block 1..n0 is unfinished. the rest of the e's + ! must be essentially zero, although sometimes other data + ! has been stored in them. + if( k DLASQ3: checks for deflation, computes a shift (TAU) and calls dqds. + !> In case of failure it changes shifts, and tries again until output + !> is positive. + + pure subroutine stdlib_qlasq3( i0, n0, z, pp, dmin, sigma, desig, qmax, nfail,iter, ndiv, & + ieee, ttype, dmin1, dmin2, dn, dn1,dn2, g, tau ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0 + integer(ilp), intent(inout) :: iter, n0, ndiv, nfail, pp + real(qp), intent(inout) :: desig, dmin1, dmin2, dn, dn1, dn2, g, qmax, tau + real(qp), intent(out) :: dmin, sigma + ! Array Arguments + real(qp), intent(inout) :: z(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: cbias = 1.50_qp + real(qp), parameter :: qurtr = 0.250_qp + real(qp), parameter :: hundrd = 100.0_qp + + + ! Local Scalars + integer(ilp) :: ipn4, j4, n0in, nn + integer(ilp), intent(inout) :: ttype + real(qp) :: eps, s, t, temp, tol, tol2 + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + n0in = n0 + eps = stdlib_qlamch( 'PRECISION' ) + tol = eps*hundrd + tol2 = tol**2 + ! check for deflation. + 10 continue + if( n0tol2*( sigma+z( nn-3 ) ) .and.z( nn-2*pp-4 )>tol2*z( nn-7 ) )go to & + 30 + 20 continue + z( 4*n0-3 ) = z( 4*n0+pp-3 ) + sigma + n0 = n0 - 1 + go to 10 + ! check whether e(n0-2) is negligible, 2 eigenvalues. + 30 continue + if( z( nn-9 )>tol2*sigma .and.z( nn-2*pp-8 )>tol2*z( nn-11 ) )go to 50 + 40 continue + if( z( nn-3 )>z( nn-7 ) ) then + s = z( nn-3 ) + z( nn-3 ) = z( nn-7 ) + z( nn-7 ) = s + end if + t = half*( ( z( nn-7 )-z( nn-3 ) )+z( nn-5 ) ) + if( z( nn-5 )>z( nn-3 )*tol2.and.t/=zero ) then + s = z( nn-3 )*( z( nn-5 ) / t ) + if( s<=t ) then + s = z( nn-3 )*( z( nn-5 ) /( t*( one+sqrt( one+s / t ) ) ) ) + else + s = z( nn-3 )*( z( nn-5 ) / ( t+sqrt( t )*sqrt( t+s ) ) ) + end if + t = z( nn-7 ) + ( s+z( nn-5 ) ) + z( nn-3 ) = z( nn-3 )*( z( nn-7 ) / t ) + z( nn-7 ) = t + end if + z( 4*n0-7 ) = z( nn-7 ) + sigma + z( 4*n0-3 ) = z( nn-3 ) + sigma + n0 = n0 - 2 + go to 10 + 50 continue + if( pp==2 )pp = 0 + ! reverse the qd-array, if warranted. + if( dmin<=zero .or. n0 0. + 70 continue + call stdlib_qlasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2, dn,dn1, dn2, ieee, & + eps ) + ndiv = ndiv + ( n0-i0+2 ) + iter = iter + 1 + ! check status. + if( dmin>=zero .and. dmin1>=zero ) then + ! success. + go to 90 + else if( dminzero .and.z( 4*( n0-1 )-pp )

DSTEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> DSTEGR is a compatibility wrapper around the improved DSTEMR routine. + !> See DSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : DSTEGR and DSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + + pure subroutine stdlib_qstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: w(*), work(*) + real(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: tryrac + ! Executable Statements + info = 0 + tryrac = .false. + call stdlib_qstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, n, isuppz, & + tryrac, work, lwork,iwork, liwork, info ) + end subroutine stdlib_qstegr + + !> DSTEIN: computes the eigenvectors of a real symmetric tridiagonal + !> matrix T corresponding to specified eigenvalues, using inverse + !> iteration. + !> The maximum number of iterations allowed for each eigenvector is + !> specified by an internal parameter MAXITS (currently set to 5). + + pure subroutine stdlib_qstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, m, n + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), isplit(*) + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(in) :: d(*), e(*), w(*) + real(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: odm3 = 1.0e-3_qp + real(qp), parameter :: odm1 = 1.0e-1_qp + integer(ilp), parameter :: maxits = 5 + integer(ilp), parameter :: extra = 2 + + + ! Local Scalars + integer(ilp) :: b1, blksiz, bn, gpind, i, iinfo, indrv1, indrv2, indrv3, indrv4, & + indrv5, its, j, j1, jblk, jmax, nblk, nrmchk + real(qp) :: dtpcrt, eps, eps1, nrm, onenrm, ortol, pertol, scl, sep, tol, xj, xjm, & + ztr + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + do i = 1, m + ifail( i ) = 0 + end do + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -4 + else if( ldz1 ) then + eps1 = abs( eps*xj ) + pertol = ten*eps1 + sep = xj - xjm + if( sepmaxits )go to 100 + ! normalize and scale the righthand side vector pb. + jmax = stdlib_iqamax( blksiz, work( indrv1+1 ), 1 ) + scl = blksiz*onenrm*max( eps,abs( work( indrv4+blksiz ) ) ) /abs( work( indrv1+& + jmax ) ) + call stdlib_qscal( blksiz, scl, work( indrv1+1 ), 1 ) + ! solve the system lu = pb. + call stdlib_qlagts( -1, blksiz, work( indrv4+1 ), work( indrv2+2 ),work( indrv3+& + 1 ), work( indrv5+1 ), iwork,work( indrv1+1 ), tol, iinfo ) + ! reorthogonalize by modified gram-schmidt if eigenvalues are + ! close enough. + if( jblk==1 )go to 90 + if( abs( xj-xjm )>ortol )gpind = j + if( gpind/=j ) then + do i = gpind, j - 1 + ztr = -stdlib_qdot( blksiz, work( indrv1+1 ), 1, z( b1, i ),1 ) + call stdlib_qaxpy( blksiz, ztr, z( b1, i ), 1,work( indrv1+1 ), 1 ) + end do + end if + ! check the infinity norm of the iterate. + 90 continue + jmax = stdlib_iqamax( blksiz, work( indrv1+1 ), 1 ) + nrm = abs( work( indrv1+jmax ) ) + ! continue for additional iterations after norm reaches + ! stopping criterion. + if( nrm DSTEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.DSTEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + + pure subroutine stdlib_qstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il, iu, ldz, nzc, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: w(*), work(*) + real(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: minrgp = 1.0e-3_qp + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, valeig, wantz, zquery + integer(ilp) :: i, ibegin, iend, ifirst, iil, iindbl, iindw, iindwk, iinfo, iinspl, & + iiu, ilast, in, indd, inde2, inderr, indgp, indgrs, indwrk, itmp, itmp2, j, jblk, jj, & + liwmin, lwmin, nsplit, nzcmin, offset, wbegin, wend + real(qp) :: bignum, cs, eps, pivmin, r1, r2, rmax, rmin, rtol1, rtol2, safmin, scale, & + smlnum, sn, thresh, tmp, tnrm, wl, wu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ).or.( liwork==-1 ) ) + zquery = ( nzc==-1 ) + ! stdlib_qstemr needs work of size 6*n, iwork of size 3*n. + ! in addition, stdlib_qlarre needs work of size 6*n, iwork of size 5*n. + ! furthermore, stdlib_qlarrv needs work of size 12*n, iwork of size 7*n. + if( wantz ) then + lwmin = 18*n + liwmin = 10*n + else + ! need less workspace if only the eigenvalues are wanted + lwmin = 12*n + liwmin = 8*n + endif + wl = zero + wu = zero + iil = 0 + iiu = 0 + nsplit = 0 + if( valeig ) then + ! we do not reference vl, vu in the cases range = 'i','a' + ! the interval (wl, wu] contains all the wanted eigenvalues. + ! it is either given by the user or computed in stdlib_qlarre. + wl = vl + wu = vu + elseif( indeig ) then + ! we do not reference il, iu in the cases range = 'v','a' + iil = il + iiu = iu + endif + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( valeig .and. n>0 .and. wu<=wl ) then + info = -7 + else if( indeig .and. ( iil<1 .or. iil>n ) ) then + info = -8 + else if( indeig .and. ( iiun ) ) then + info = -9 + else if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz.and.(.not.zquery) ) then + z( 1, 1 ) = one + isuppz(1) = 1 + isuppz(2) = 1 + end if + return + end if + if( n==2 ) then + if( .not.wantz ) then + call stdlib_qlae2( d(1), e(1), d(2), r1, r2 ) + else if( wantz.and.(.not.zquery) ) then + call stdlib_qlaev2( d(1), e(1), d(2), r1, r2, cs, sn ) + end if + if( alleig.or.(valeig.and.(r2>wl).and.(r2<=wu)).or.(indeig.and.(iil==1)) ) & + then + m = m+1 + w( m ) = r2 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = -sn + z( 2, m ) = cs + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + if( alleig.or.(valeig.and.(r1>wl).and.(r1<=wu)).or.(indeig.and.(iiu==2)) ) & + then + m = m+1 + w( m ) = r1 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = cs + z( 2, m ) = sn + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + else + ! continue with general n + indgrs = 1 + inderr = 2*n + 1 + indgp = 3*n + 1 + indd = 4*n + 1 + inde2 = 5*n + 1 + indwrk = 6*n + 1 + iinspl = 1 + iindbl = n + 1 + iindw = 2*n + 1 + iindwk = 3*n + 1 + ! scale matrix to allowable range, if necessary. + ! the allowable range is related to the pivmin parameter; see the + ! comments in stdlib_qlarrd. the preference for scaling small values + ! up is heuristic; we expect users' matrices not to be close to the + ! rmax threshold. + scale = one + tnrm = stdlib_qlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + scale = rmax / tnrm + end if + if( scale/=one ) then + call stdlib_qscal( n, scale, d, 1 ) + call stdlib_qscal( n-1, scale, e, 1 ) + tnrm = tnrm*scale + if( valeig ) then + ! if eigenvalues in interval have to be found, + ! scale (wl, wu] accordingly + wl = wl*scale + wu = wu*scale + endif + end if + ! compute the desired eigenvalues of the tridiagonal after splitting + ! into smaller subblocks if the corresponding off-diagonal elements + ! are small + ! thresh is the splitting parameter for stdlib_qlarre + ! a negative thresh forces the old splitting criterion based on the + ! size of the off-diagonal. a positive thresh switches to splitting + ! which preserves relative accuracy. + if( tryrac ) then + ! test whether the matrix warrants the more expensive relative approach. + call stdlib_qlarrr( n, d, e, iinfo ) + else + ! the user does not care about relative accurately eigenvalues + iinfo = -1 + endif + ! set the splitting criterion + if (iinfo==0) then + thresh = eps + else + thresh = -eps + ! relative accuracy is desired but t does not guarantee it + tryrac = .false. + endif + if( tryrac ) then + ! copy original diagonal, needed to guarantee relative accuracy + call stdlib_qcopy(n,d,1,work(indd),1) + endif + ! store the squares of the offdiagonal values of t + do j = 1, n-1 + work( inde2+j-1 ) = e(j)**2 + end do + ! set the tolerance parameters for bisection + if( .not.wantz ) then + ! stdlib_qlarre computes the eigenvalues to full precision. + rtol1 = four * eps + rtol2 = four * eps + else + ! stdlib_qlarre computes the eigenvalues to less than full precision. + ! stdlib_qlarrv will refine the eigenvalue approximations, and we can + ! need less accurate initial bisection in stdlib_qlarre. + ! note: these settings do only affect the subset case and stdlib_qlarre + rtol1 = sqrt(eps) + rtol2 = max( sqrt(eps)*5.0e-3_qp, four * eps ) + endif + call stdlib_qlarre( range, n, wl, wu, iil, iiu, d, e,work(inde2), rtol1, rtol2, & + thresh, nsplit,iwork( iinspl ), m, w, work( inderr ),work( indgp ), iwork( iindbl ),& + iwork( iindw ), work( indgrs ), pivmin,work( indwrk ), iwork( iindwk ), iinfo ) + + if( iinfo/=0 ) then + info = 10 + abs( iinfo ) + return + end if + ! note that if range /= 'v', stdlib_qlarre computes bounds on the desired + ! part of the spectrum. all desired eigenvalues are contained in + ! (wl,wu] + if( wantz ) then + ! compute the desired eigenvectors corresponding to the computed + ! eigenvalues + call stdlib_qlarrv( n, wl, wu, d, e,pivmin, iwork( iinspl ), m,1, m, minrgp, & + rtol1, rtol2,w, work( inderr ), work( indgp ), iwork( iindbl ),iwork( iindw ), & + work( indgrs ), z, ldz,isuppz, work( indwrk ), iwork( iindwk ), iinfo ) + if( iinfo/=0 ) then + info = 20 + abs( iinfo ) + return + end if + else + ! stdlib_qlarre computes eigenvalues of the (shifted) root representation + ! stdlib_qlarrv returns the eigenvalues of the unshifted matrix. + ! however, if the eigenvectors are not desired by the user, we need + ! to apply the corresponding shifts from stdlib_qlarre to obtain the + ! eigenvalues of the original matrix. + do j = 1, m + itmp = iwork( iindbl+j-1 ) + w( j ) = w( j ) + e( iwork( iinspl+itmp-1 ) ) + end do + end if + if ( tryrac ) then + ! refine computed eigenvalues so that they are relatively accurate + ! with respect to the original matrix t. + ibegin = 1 + wbegin = 1 + loop_39: do jblk = 1, iwork( iindbl+m-1 ) + iend = iwork( iinspl+jblk-1 ) + in = iend - ibegin + 1 + wend = wbegin - 1 + ! check if any eigenvalues have to be refined in this block + 36 continue + if( wend1 .or. n==2 ) then + if( .not. wantz ) then + call stdlib_qlasrt( 'I', m, w, iinfo ) + if( iinfo/=0 ) then + info = 3 + return + end if + else + do j = 1, m - 1 + i = 0 + tmp = w( j ) + do jj = j + 1, m + if( w( jj ) DSTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the implicit QL or QR method. + !> The eigenvectors of a full or band symmetric matrix can also be found + !> if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to + !> tridiagonal form. + + pure subroutine stdlib_qsteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*), z(ldz,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + ! Local Scalars + integer(ilp) :: i, icompz, ii, iscale, j, jtot, k, l, l1, lend, lendm1, lendp1, lendsv,& + lm1, lsv, m, mm, mm1, nm1, nmaxit + real(qp) :: anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, s, safmax, safmin, ssfmax, & + ssfmin, tst + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldzn )go to 160 + if( l1>1 )e( l1-1 ) = zero + if( l1<=nm1 ) then + do m = l1, nm1 + tst = abs( e( m ) ) + if( tst==zero )go to 30 + if( tst<=( sqrt( abs( d( m ) ) )*sqrt( abs( d( m+1 ) ) ) )*eps ) then + e( m ) = zero + go to 30 + end if + end do + end if + m = n + 30 continue + l = l1 + lsv = l + lend = m + lendsv = lend + l1 = m + 1 + if( lend==l )go to 10 + ! scale submatrix in rows and columns l to lend + anorm = stdlib_qlanst( 'M', lend-l+1, d( l ), e( l ) ) + iscale = 0 + if( anorm==zero )go to 10 + if( anorm>ssfmax ) then + iscale = 1 + call stdlib_qlascl( 'G', 0, 0, anorm, ssfmax, lend-l+1, 1, d( l ), n,info ) + call stdlib_qlascl( 'G', 0, 0, anorm, ssfmax, lend-l, 1, e( l ), n,info ) + else if( anorml ) then + ! ql iteration + ! look for small subdiagonal element. + 40 continue + if( l/=lend ) then + lendm1 = lend - 1 + do m = l, lendm1 + tst = abs( e( m ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m+1 ) )+safmin )go to 60 + end do + end if + m = lend + 60 continue + if( m0 ) then + call stdlib_qlaev2( d( l ), e( l ), d( l+1 ), rt1, rt2, c, s ) + work( l ) = c + work( n-1+l ) = s + call stdlib_qlasr( 'R', 'V', 'B', n, 2, work( l ),work( n-1+l ), z( 1, l ), & + ldz ) + else + call stdlib_qlae2( d( l ), e( l ), d( l+1 ), rt1, rt2 ) + end if + d( l ) = rt1 + d( l+1 ) = rt2 + e( l ) = zero + l = l + 2 + if( l<=lend )go to 40 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l+1 )-p ) / ( two*e( l ) ) + r = stdlib_qlapy2( g, one ) + g = d( m ) - p + ( e( l ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + mm1 = m - 1 + do i = mm1, l, -1 + f = s*e( i ) + b = c*e( i ) + call stdlib_qlartg( g, f, c, s, r ) + if( i/=m-1 )e( i+1 ) = r + g = d( i+1 ) - p + r = ( d( i )-g )*s + two*c*b + p = s*r + d( i+1 ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = -s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = m - l + 1 + call stdlib_qlasr( 'R', 'V', 'B', n, mm, work( l ), work( n-1+l ),z( 1, l ), ldz & + ) + end if + d( l ) = d( l ) - p + e( l ) = g + go to 40 + ! eigenvalue found. + 80 continue + d( l ) = p + l = l + 1 + if( l<=lend )go to 40 + go to 140 + else + ! qr iteration + ! look for small superdiagonal element. + 90 continue + if( l/=lend ) then + lendp1 = lend + 1 + do m = l, lendp1, -1 + tst = abs( e( m-1 ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m-1 ) )+safmin )go to 110 + end do + end if + m = lend + 110 continue + if( m>lend )e( m-1 ) = zero + p = d( l ) + if( m==l )go to 130 + ! if remaining matrix is 2-by-2, use stdlib_qlae2 or stdlib_dlaev2 + ! to compute its eigensystem. + if( m==l-1 ) then + if( icompz>0 ) then + call stdlib_qlaev2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2, c, s ) + work( m ) = c + work( n-1+m ) = s + call stdlib_qlasr( 'R', 'V', 'F', n, 2, work( m ),work( n-1+m ), z( 1, l-1 ), & + ldz ) + else + call stdlib_qlae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 ) + end if + d( l-1 ) = rt1 + d( l ) = rt2 + e( l-1 ) = zero + l = l - 2 + if( l>=lend )go to 90 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l-1 )-p ) / ( two*e( l-1 ) ) + r = stdlib_qlapy2( g, one ) + g = d( m ) - p + ( e( l-1 ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + lm1 = l - 1 + do i = m, lm1 + f = s*e( i ) + b = c*e( i ) + call stdlib_qlartg( g, f, c, s, r ) + if( i/=m )e( i-1 ) = r + g = d( i ) - p + r = ( d( i+1 )-g )*s + two*c*b + p = s*r + d( i ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = l - m + 1 + call stdlib_qlasr( 'R', 'V', 'F', n, mm, work( m ), work( n-1+m ),z( 1, m ), ldz & + ) + end if + d( l ) = d( l ) - p + e( lm1 ) = g + go to 90 + ! eigenvalue found. + 130 continue + d( l ) = p + l = l - 1 + if( l>=lend )go to 90 + go to 140 + end if + ! undo scaling if necessary + 140 continue + if( iscale==1 ) then + call stdlib_qlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_qlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + else if( iscale==2 ) then + call stdlib_qlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_qlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + end if + ! check for no convergence to an eigenvalue after a total + ! of n*maxit iterations. + if( jtot DSTERF: computes all eigenvalues of a symmetric tridiagonal matrix + !> using the Pal-Walker-Kahan variant of the QL or QR algorithm. + + pure subroutine stdlib_qsterf( n, d, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + ! Local Scalars + integer(ilp) :: i, iscale, jtot, l, l1, lend, lendsv, lsv, m, nmaxit + real(qp) :: alpha, anorm, bb, c, eps, eps2, gamma, oldc, oldgam, p, r, rt1, rt2, rte, & + s, safmax, safmin, sigma, ssfmax, ssfmin, rmax + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + ! quick return if possible + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'DSTERF', -info ) + return + end if + if( n<=1 )return + ! determine the unit roundoff for this environment. + eps = stdlib_qlamch( 'E' ) + eps2 = eps**2 + safmin = stdlib_qlamch( 'S' ) + safmax = one / safmin + ssfmax = sqrt( safmax ) / three + ssfmin = sqrt( safmin ) / eps2 + rmax = stdlib_qlamch( 'O' ) + ! compute the eigenvalues of the tridiagonal matrix. + nmaxit = n*maxit + sigma = zero + jtot = 0 + ! determine where the matrix splits and choose ql or qr iteration + ! for each block, according to whether top or bottom diagonal + ! element is smaller. + l1 = 1 + 10 continue + if( l1>n )go to 170 + if( l1>1 )e( l1-1 ) = zero + do m = l1, n - 1 + if( abs( e( m ) )<=( sqrt( abs( d( m ) ) )*sqrt( abs( d( m+1 ) ) ) )*eps ) & + then + e( m ) = zero + go to 30 + end if + end do + m = n + 30 continue + l = l1 + lsv = l + lend = m + lendsv = lend + l1 = m + 1 + if( lend==l )go to 10 + ! scale submatrix in rows and columns l to lend + anorm = stdlib_qlanst( 'M', lend-l+1, d( l ), e( l ) ) + iscale = 0 + if( anorm==zero )go to 10 + if( (anorm>ssfmax) ) then + iscale = 1 + call stdlib_qlascl( 'G', 0, 0, anorm, ssfmax, lend-l+1, 1, d( l ), n,info ) + call stdlib_qlascl( 'G', 0, 0, anorm, ssfmax, lend-l, 1, e( l ), n,info ) + else if( anorm=l ) then + ! ql iteration + ! look for small subdiagonal element. + 50 continue + if( l/=lend ) then + do m = l, lend - 1 + if( abs( e( m ) )<=eps2*abs( d( m )*d( m+1 ) ) )go to 70 + end do + end if + m = lend + 70 continue + if( mlend )e( m-1 ) = zero + p = d( l ) + if( m==l )go to 140 + ! if remaining matrix is 2 by 2, use stdlib_qlae2 to compute its + ! eigenvalues. + if( m==l-1 ) then + rte = sqrt( e( l-1 ) ) + call stdlib_qlae2( d( l ), rte, d( l-1 ), rt1, rt2 ) + d( l ) = rt1 + d( l-1 ) = rt2 + e( l-1 ) = zero + l = l - 2 + if( l>=lend )go to 100 + go to 150 + end if + if( jtot==nmaxit )go to 150 + jtot = jtot + 1 + ! form shift. + rte = sqrt( e( l-1 ) ) + sigma = ( d( l-1 )-p ) / ( two*rte ) + r = stdlib_qlapy2( sigma, one ) + sigma = p - ( rte / ( sigma+sign( r, sigma ) ) ) + c = one + s = zero + gamma = d( m ) - sigma + p = gamma*gamma + ! inner loop + do i = m, l - 1 + bb = e( i ) + r = p + bb + if( i/=m )e( i-1 ) = s*r + oldc = c + c = p / r + s = bb / r + oldgam = gamma + alpha = d( i+1 ) + gamma = c*( alpha-sigma ) - s*oldgam + d( i ) = oldgam + ( alpha-gamma ) + if( c/=zero ) then + p = ( gamma*gamma ) / c + else + p = oldc*bb + end if + end do + e( l-1 ) = s*p + d( l ) = sigma + gamma + go to 100 + ! eigenvalue found. + 140 continue + d( l ) = p + l = l - 1 + if( l>=lend )go to 100 + go to 150 + end if + ! undo scaling if necessary + 150 continue + if( iscale==1 )call stdlib_qlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv+1, 1,d( lsv ), & + n, info ) + if( iscale==2 )call stdlib_qlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv+1, 1,d( lsv ), & + n, info ) + ! check for no convergence to an eigenvalue after a total + ! of n*maxit iterations. + if( jtot DSTEV: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric tridiagonal matrix A. + + pure subroutine stdlib_qstev( jobz, n, d, e, z, ldz, work, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantz + integer(ilp) :: imax, iscale + real(qp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tnrm + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_qscal( n, sigma, d, 1 ) + call stdlib_qscal( n-1, sigma, e( 1 ), 1 ) + end if + ! for eigenvalues only, call stdlib_qsterf. for eigenvalues and + ! eigenvectors, call stdlib_qsteqr. + if( .not.wantz ) then + call stdlib_qsterf( n, d, e, info ) + else + call stdlib_qsteqr( 'I', n, d, e, z, ldz, work, info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, d, 1 ) + end if + return + end subroutine stdlib_qstev + + !> DSTEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric tridiagonal matrix. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_qstevd( jobz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iscale, liwmin, lwmin + real(qp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tnrm + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + liwmin = 1 + lwmin = 1 + if( n>1 .and. wantz ) then + lwmin = 1 + 4*n + n**2 + liwmin = 3 + 5*n + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_qscal( n, sigma, d, 1 ) + call stdlib_qscal( n-1, sigma, e( 1 ), 1 ) + end if + ! for eigenvalues only, call stdlib_qsterf. for eigenvalues and + ! eigenvectors, call stdlib_qstedc. + if( .not.wantz ) then + call stdlib_qsterf( n, d, e, info ) + else + call stdlib_qstedc( 'I', n, d, e, z, ldz, work, lwork, iwork, liwork,info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_qscal( n, one / sigma, d, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_qstevd + + !> DSTEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Eigenvalues and + !> eigenvectors can be selected by specifying either a range of values + !> or a range of indices for the desired eigenvalues. + !> Whenever possible, DSTEVR calls DSTEMR to compute the + !> eigenspectrum using Relatively Robust Representations. DSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. For the i-th + !> unreduced block of T, + !> (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T + !> is a relatively robust representation, + !> (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high + !> relative accuracy by the dqds algorithm, + !> (c) If there is a cluster of close eigenvalues, "choose" sigma_i + !> close to the cluster, and go to step (a), + !> (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, + !> compute the corresponding eigenvector by forming a + !> rank-revealing twisted factorization. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, + !> Computer Science Division Technical Report No. UCB//CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of DSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + pure subroutine stdlib_qstevr( jobz, range, n, d, e, vl, vu, il, iu, abstol,m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, lquery, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, imax, indibl, indifl, indisp, indiwo, iscale, itmp1, j, jj, & + liwmin, lwmin, nsplit + real(qp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, tnrm, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'DSTEVR', 'N', 1, 2, 3, 4 ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( liwork==-1 ) ) + lwmin = max( 1, 20*n ) + liwmin = max( 1, 10*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + if( valeig ) then + vll = vl + vuu = vu + end if + tnrm = stdlib_qlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_qscal( n, sigma, d, 1 ) + call stdlib_qscal( n-1, sigma, e( 1 ), 1 ) + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: these indices are used only + ! if stdlib_qsterf or stdlib_qstemr fail. + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_qstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_qstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_qstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indisp + n + ! if all eigenvalues are desired, then + ! call stdlib_qsterf or stdlib_qstemr. if this fails for some eigenvalue, then + ! try stdlib_qstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ieeeok==1 ) then + call stdlib_qcopy( n-1, e( 1 ), 1, work( 1 ), 1 ) + if( .not.wantz ) then + call stdlib_qcopy( n, d, 1, w, 1 ) + call stdlib_qsterf( n, w, work, info ) + else + call stdlib_qcopy( n, d, 1, work( n+1 ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_qstemr( jobz, 'A', n, work( n+1 ), work, vl, vu, il,iu, m, w, z, ldz,& + n, isuppz, tryrac,work( 2*n+1 ), lwork-2*n, iwork, liwork, info ) + end if + if( info==0 ) then + m = n + go to 10 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_qstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstol, d, e, m,nsplit, w, & + iwork( indibl ), iwork( indisp ), work,iwork( indiwo ), info ) + if( wantz ) then + call stdlib_qstein( n, d, e, m, w, iwork( indibl ), iwork( indisp ),z, ldz, work, & + iwork( indiwo ), iwork( indifl ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 10 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSTEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix A. Eigenvalues and + !> eigenvectors can be selected by specifying either a range of values + !> or a range of indices for the desired eigenvalues. + + pure subroutine stdlib_qstevx( jobz, range, n, d, e, vl, vu, il, iu, abstol,m, w, z, ldz, & + work, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, valeig, wantz + character :: order + integer(ilp) :: i, imax, indibl, indisp, indiwo, indwrk, iscale, itmp1, j, jj, & + nsplit + real(qp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, tnrm, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + tnrm = stdlib_qlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_qscal( n, sigma, d, 1 ) + call stdlib_qscal( n-1, sigma, e( 1 ), 1 ) + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! if all eigenvalues are desired and abstol is less than zero, then + ! call stdlib_qsterf or stdlib_dsteqr. if this fails for some eigenvalue, then + ! try stdlib_qstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_qcopy( n, d, 1, w, 1 ) + call stdlib_qcopy( n-1, e( 1 ), 1, work( 1 ), 1 ) + indwrk = n + 1 + if( .not.wantz ) then + call stdlib_qsterf( n, w, work, info ) + else + call stdlib_qsteqr( 'I', n, w, work, z, ldz, work( indwrk ), info ) + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 20 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_dstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indwrk = 1 + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstol, d, e, m,nsplit, w, & + iwork( indibl ), iwork( indisp ),work( indwrk ), iwork( indiwo ), info ) + if( wantz ) then + call stdlib_qstein( n, d, e, m, w, iwork( indibl ), iwork( indisp ),z, ldz, work( & + indwrk ), iwork( indiwo ), ifail,info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 20 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSYCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a real symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by DSYTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_qsycon( uplo, n, a, lda, ipiv, anorm, rcond, work,iwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_qsytrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_qsycon + + !> DSYCON_ROOK: estimates the reciprocal of the condition number (in the + !> 1-norm) of a real symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by DSYTRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_qsycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,iwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_qsytrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_qsycon_rook + + !> DSYCONV: convert A given by TRF into L and D and vice-versa. + !> Get Non-diag elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + + pure subroutine stdlib_qsyconv( uplo, way, n, a, lda, ipiv, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, j + real(qp) :: temp + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda 1 ) + if( ipiv(i) < 0 ) then + e(i)=a(i-1,i) + e(i-1)=zero + a(i-1,i)=zero + i=i-1 + else + e(i)=zero + endif + i=i-1 + end do + ! convert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + i=i-1 + endif + i=i-1 + end do + else + ! revert a (a is upper) + ! revert permutations + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i+1 + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + endif + i=i+1 + end do + ! revert value + i=n + do while ( i > 1 ) + if( ipiv(i) < 0 ) then + a(i-1,i)=e(i) + i=i-1 + endif + i=i-1 + end do + end if + else + ! a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + i=1 + e(n)=zero + do while ( i <= n ) + if( i 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i+1,j) + a(i+1,j)=temp + end do + endif + i=i+1 + endif + i=i+1 + end do + else + ! revert a (a is lower) + ! revert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(i,j) + a(i,j)=a(ip,j) + a(ip,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i-1 + if (i > 1) then + do j= 1,i-1 + temp=a(i+1,j) + a(i+1,j)=a(ip,j) + a(ip,j)=temp + end do + endif + endif + i=i-1 + end do + ! revert value + i=1 + do while ( i <= n-1 ) + if( ipiv(i) < 0 ) then + a(i+1,i)=e(i) + i=i+1 + endif + i=i+1 + end do + end if + end if + return + end subroutine stdlib_qsyconv + + !> If parameter WAY = 'C': + !> DSYCONVF: converts the factorization output format used in + !> DSYTRF provided on entry in parameter A into the factorization + !> output format used in DSYTRF_RK (or DSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in DSYTRF into + !> the format used in DSYTRF_RK (or DSYTRF_BK). + !> If parameter WAY = 'R': + !> DSYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in DSYTRF_RK + !> (or DSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in DSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in DSYTRF_RK + !> (or DSYTRF_BK) into the format used in DSYTRF. + + pure subroutine stdlib_qsyconvf( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = zero + a( i-1, i ) = zero + i = i - 1 + else + e( i ) = zero + end if + i = i - 1 + end do + ! convert permutations and ipiv + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and zero out + ! corresponding entries in input storage a + i = 1 + e( n ) = zero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_qswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_qswap( i-1, a( i+1, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is no interchnge of rows i and and ipiv(i), + ! so this should be reflected in ipiv format for + ! *sytrf_rk ( or *sytrf_bk) + ipiv( i ) = i + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations and ipiv + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_qswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_qswap( i-1, a( ip, 1 ), lda,a( i+1, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is one interchange of rows i+1 and ipiv(i+1), + ! so this should be recorded in consecutive entries + ! in ipiv format for *sytrf + ipiv( i ) = ipiv( i+1 ) + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_qsyconvf + + !> If parameter WAY = 'C': + !> DSYCONVF_ROOK: converts the factorization output format used in + !> DSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in DSYTRF_RK (or DSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for DSYTRF_ROOK and + !> DSYTRF_RK (or DSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> DSYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in DSYTRF_RK + !> (or DSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in DSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for DSYTRF_ROOK and + !> DSYTRF_RK (or DSYTRF_BK) is the same and is not converted. + + pure subroutine stdlib_qsyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, ip2 + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = zero + a( i-1, i ) = zero + i = i - 1 + else + e( i ) = zero + end if + i = i - 1 + end do + ! convert permutations + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and zero out + ! corresponding entries in input storage a + i = 1 + e( n ) = zero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_qswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i and ipiv(i) and i+1 and ipiv(i+1) + ! in a(i:n,1:i-1) + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_qswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + if( ip2/=(i+1) ) then + call stdlib_qswap( i-1, a( i+1, 1 ), lda,a( ip2, 1 ), lda ) + end if + end if + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_qswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i+1) and i and ipiv(i) + ! in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip2/=(i+1) ) then + call stdlib_qswap( i-1, a( ip2, 1 ), lda,a( i+1, 1 ), lda ) + end if + if( ip/=i ) then + call stdlib_qswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_qsyconvf_rook + + !> DSYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_qsyequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: s(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(qp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'DSYEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), abs( a( i, j ) ) ) + s( j ) = max( s( j ), abs( a( i, j ) ) ) + amax = max( amax, abs( a( i, j ) ) ) + end do + s( j ) = max( s( j ), abs( a( j, j ) ) ) + amax = max( amax, abs( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), abs( a( j, j ) ) ) + amax = max( amax, abs( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), abs( a( i, j ) ) ) + s( j ) = max( s( j ), abs( a( i, j ) ) ) + amax = max( amax, abs( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_qp / s( j ) + end do + tol = one / sqrt( 2.0_qp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + abs( a( i, j ) ) * s( j ) + work( j ) = work( j ) + abs( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + abs( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + abs( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + abs( a( i, j ) ) * s( j ) + work( j ) = work( j ) + abs( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + s( i )*work( i ) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_qlassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = abs( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( work( i ) - t*si ) + c0 = -(t*si)*si + 2*work( i )*si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = abs( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = abs( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = abs( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = abs( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + work( i ) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_qlamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_qlamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_qsyequb + + !> DSYEV: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. + + subroutine stdlib_qsyev( jobz, uplo, n, a, lda, w, work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale, llwork, lwkopt, nb + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_qlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_qsytrd to reduce symmetric matrix to tridiagonal form. + inde = 1 + indtau = inde + n + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_qsytrd( uplo, n, a, lda, w, work( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, first call + ! stdlib_qorgtr to generate the orthogonal matrix, then call stdlib_qsteqr. + if( .not.wantz ) then + call stdlib_qsterf( n, w, work( inde ), info ) + else + call stdlib_qorgtr( uplo, n, a, lda, work( indtau ), work( indwrk ),llwork, iinfo ) + + call stdlib_qsteqr( jobz, n, w, work( inde ), a, lda, work( indtau ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! set work(1) to optimal workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_qsyev + + !> DSYEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> Because of large use of BLAS of level 3, DSYEVD needs N**2 more + !> workspace than DSYEVX. + + subroutine stdlib_qsyevd( jobz, uplo, n, a, lda, w, work, lwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, inde, indtau, indwk2, indwrk, iscale, liopt, liwmin, llwork, & + llwrk2, lopt, lwmin + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_qlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_qsytrd to reduce symmetric matrix to tridiagonal form. + inde = 1 + indtau = inde + n + indwrk = indtau + n + llwork = lwork - indwrk + 1 + indwk2 = indwrk + n*n + llwrk2 = lwork - indwk2 + 1 + call stdlib_qsytrd( uplo, n, a, lda, w, work( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, first call + ! stdlib_qstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_qormtr to multiply it by the + ! householder transformations stored in a. + if( .not.wantz ) then + call stdlib_qsterf( n, w, work( inde ), info ) + else + call stdlib_qstedc( 'I', n, w, work( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, iwork, liwork, info ) + call stdlib_qormtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),work( indwrk ), n, & + work( indwk2 ), llwrk2, iinfo ) + call stdlib_qlacpy( 'A', n, n, work( indwrk ), n, a, lda ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_qscal( n, one / sigma, w, 1 ) + work( 1 ) = lopt + iwork( 1 ) = liopt + return + end subroutine stdlib_qsyevd + + !> DSYEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> DSYEVR first reduces the matrix A to tridiagonal form T with a call + !> to DSYTRD. Then, whenever possible, DSYEVR calls DSTEMR to compute + !> the eigenspectrum using Relatively Robust Representations. DSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see DSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : DSYEVR calls DSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> DSYEVR calls DSTEBZ and DSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of DSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + subroutine stdlib_qsyevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork,iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, iinfo, imax, indd, inddd, inde, indee, indibl, indifl, & + indisp, indiwo, indtau, indwk, indwkn, iscale, j, jj, liwmin, llwork, llwrkn, lwkopt, & + lwmin, nb, nsplit + real(qp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'DSYEVR', 'N', 1, 2, 3, 4 ) + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( liwork==-1 ) ) + lwmin = max( 1, 26*n ) + liwmin = max( 1, 10*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=a( 1, 1 ) ) then + m = 1 + w( 1 ) = a( 1, 1 ) + end if + end if + if( wantz ) then + z( 1, 1 ) = one + isuppz( 1 ) = 1 + isuppz( 2 ) = 1 + end if + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if (valeig) then + vll = vl + vuu = vu + end if + anrm = stdlib_qlansy( 'M', uplo, n, a, lda, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_qscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_qscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: the iwork indices are + ! used only if stdlib_qsterf or stdlib_qstemr fail. + ! work(indtau:indtau+n-1) stores the scalar factors of the + ! elementary reflectors used in stdlib_qsytrd. + indtau = 1 + ! work(indd:indd+n-1) stores the tridiagonal's diagonal entries. + indd = indtau + n + ! work(inde:inde+n-1) stores the off-diagonal entries of the + ! tridiagonal matrix from stdlib_qsytrd. + inde = indd + n + ! work(inddd:inddd+n-1) is a copy of the diagonal entries over + ! -written by stdlib_qstemr (the stdlib_qsterf path copies the diagonal to w). + inddd = inde + n + ! work(indee:indee+n-1) is a copy of the off-diagonal entries over + ! -written while computing the eigenvalues in stdlib_qsterf and stdlib_qstemr. + indee = inddd + n + ! indwk is the starting offset of the left-over workspace, and + ! llwork is the remaining workspace size. + indwk = indee + n + llwork = lwork - indwk + 1 + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_qstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_qstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_qstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indifl + n + ! call stdlib_qsytrd to reduce symmetric matrix to tridiagonal form. + call stdlib_qsytrd( uplo, n, a, lda, work( indd ), work( inde ),work( indtau ), work( & + indwk ), llwork, iinfo ) + ! if all eigenvalues are desired + ! then call stdlib_qsterf or stdlib_qstemr and stdlib_qormtr. + if( ( alleig .or. ( indeig .and. il==1 .and. iu==n ) ) .and.ieeeok==1 ) then + if( .not.wantz ) then + call stdlib_qcopy( n, work( indd ), 1, w, 1 ) + call stdlib_qcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_qsterf( n, w, work( indee ), info ) + else + call stdlib_qcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_qcopy( n, work( indd ), 1, work( inddd ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_qstemr( jobz, 'A', n, work( inddd ), work( indee ),vl, vu, il, iu, m,& + w, z, ldz, n, isuppz,tryrac, work( indwk ), lwork, iwork, liwork,info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_qstemr. + if( wantz .and. info==0 ) then + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_qormtr( 'L', uplo, 'N', n, m, a, lda,work( indtau ), z, ldz, work(& + indwkn ),llwrkn, iinfo ) + end if + end if + if( info==0 ) then + ! everything worked. skip stdlib_qstebz/stdlib_qstein. iwork(:) are + ! undefined. + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_qstein. + ! also call stdlib_qstebz and stdlib_qstein if stdlib_qstemr fails. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwk ),iwork( indiwo ), info ) + + if( wantz ) then + call stdlib_qstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwk ), iwork( indiwo ), iwork( indifl ),info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_qstein. + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_qormtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + ! jump here if stdlib_qstemr/stdlib_qstein succeeded. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. note: we do not sort the ifail portion of iwork. + ! it may not be initialized (if stdlib_qstemr/stdlib_qstein succeeded), and we do + ! not return this detailed information to the user. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSYEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of indices + !> for the desired eigenvalues. + + subroutine stdlib_qsyevx( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, lwork, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indtau, & + indwkn, indwrk, iscale, itmp1, j, jj, llwork, llwrkn, lwkmin, lwkopt, nb, & + nsplit + real(qp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=a( 1, 1 ) ) then + m = 1 + w( 1 ) = a( 1, 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + end if + anrm = stdlib_qlansy( 'M', uplo, n, a, lda, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_qscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_qscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_qsytrd to reduce symmetric matrix to tridiagonal form. + indtau = 1 + inde = indtau + n + indd = inde + n + indwrk = indd + n + llwork = lwork - indwrk + 1 + call stdlib_qsytrd( uplo, n, a, lda, work( indd ), work( inde ),work( indtau ), work( & + indwrk ), llwork, iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal to + ! zero, then call stdlib_qsterf or stdlib_qorgtr and stdlib_dsteqr. if this fails for + ! some eigenvalue, then try stdlib_qstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_qcopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_qcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_qsterf( n, w, work( indee ), info ) + else + call stdlib_qlacpy( 'A', n, n, a, lda, z, ldz ) + call stdlib_qorgtr( uplo, n, z, ldz, work( indtau ),work( indwrk ), llwork, & + iinfo ) + call stdlib_qcopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_qsteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 40 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_dstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_qstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_qstein. + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_qormtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 40 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) DSYGS2: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. + !> B must have been previously factorized as U**T *U or L*L**T by DPOTRF. + + pure subroutine stdlib_qsygs2( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k + real(qp) :: akk, bkk, ct + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DSYGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by DPOTRF. + + pure subroutine stdlib_qsygst( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(in) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_qsygs2( itype, uplo, n, a, lda, b, ldb, info ) + else + ! use blocked code + if( itype==1 ) then + if( upper ) then + ! compute inv(u**t)*a*inv(u) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(k:n,k:n) + call stdlib_qsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_qtrsm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT',kb, n-k-kb+1, & + one, b( k, k ), ldb,a( k, k+kb ), lda ) + call stdlib_qsymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( & + k, k+kb ), ldb, one,a( k, k+kb ), lda ) + call stdlib_qsyr2k( uplo, 'TRANSPOSE', n-k-kb+1, kb, -one,a( k, k+kb ), & + lda, b( k, k+kb ), ldb,one, a( k+kb, k+kb ), lda ) + call stdlib_qsymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( & + k, k+kb ), ldb, one,a( k, k+kb ), lda ) + call stdlib_qtrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+& + 1, one,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda ) + end if + end do + else + ! compute inv(l)*a*inv(l**t) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(k:n,k:n) + call stdlib_qsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_qtrsm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',n-k-kb+1, kb, & + one, b( k, k ), ldb,a( k+kb, k ), lda ) + call stdlib_qsymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(& + k+kb, k ), ldb, one,a( k+kb, k ), lda ) + call stdlib_qsyr2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-one, a( k+kb, k & + ), lda, b( k+kb, k ),ldb, one, a( k+kb, k+kb ), lda ) + call stdlib_qsymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(& + k+kb, k ), ldb, one,a( k+kb, k ), lda ) + call stdlib_qtrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, & + kb, one,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda ) + end if + end do + end if + else + if( upper ) then + ! compute u*a*u**t + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_qtrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, one, & + b, ldb, a( 1, k ), lda ) + call stdlib_qsymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1, k ), & + ldb, one, a( 1, k ), lda ) + call stdlib_qsyr2k( uplo, 'NO TRANSPOSE', k-1, kb, one,a( 1, k ), lda, b( & + 1, k ), ldb, one, a,lda ) + call stdlib_qsymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1, k ), & + ldb, one, a( 1, k ), lda ) + call stdlib_qtrmm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',k-1, kb, one, b( & + k, k ), ldb, a( 1, k ),lda ) + call stdlib_qsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + else + ! compute l**t*a*l + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_qtrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, one, & + b, ldb, a( k, 1 ), lda ) + call stdlib_qsymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1 ), & + ldb, one, a( k, 1 ), lda ) + call stdlib_qsyr2k( uplo, 'TRANSPOSE', k-1, kb, one,a( k, 1 ), lda, b( k, & + 1 ), ldb, one, a,lda ) + call stdlib_qsymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1 ), & + ldb, one, a( k, 1 ), lda ) + call stdlib_qtrmm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT', kb,k-1, one, b( & + k, k ), ldb, a( k, 1 ), lda ) + call stdlib_qsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + end if + end if + end if + return + end subroutine stdlib_qsygst + + !> DSYGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric and B is also + !> positive definite. + + subroutine stdlib_qsygv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: lwkmin, lwkopt, nb, neig + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + call stdlib_qtrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, one,b, ldb, a, lda ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + call stdlib_qtrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, one,b, ldb, a, lda ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_qsygv + + !> DSYGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_qsygvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: liopt, liwmin, lopt, lwmin + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 6*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + 1 + end if + lopt = lwmin + liopt = liwmin + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda DSYGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A + !> and B are assumed to be symmetric and B is also positive definite. + !> Eigenvalues and eigenvectors can be selected by specifying either a + !> range of values or a range of indices for the desired eigenvalues. + + subroutine stdlib_qsygvx( itype, jobz, range, uplo, n, a, lda, b, ldb,vl, vu, il, iu, abstol,& + m, w, z, ldz, work,lwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, lda, ldb, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, upper, valeig, wantz + character :: trans + integer(ilp) :: lwkmin, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + upper = stdlib_lsame( uplo, 'U' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if (info==0) then + if (ldz<1 .or. (wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + call stdlib_qtrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, one, b,ldb, z, ldz ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + call stdlib_qtrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, one, b,ldb, z, ldz ) + + end if + end if + ! set work(1) to optimal workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_qsygvx + + !> DSYRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_qsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*) + real(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_qsytrs( uplo, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + call stdlib_qaxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_qsytrs( uplo, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_qsytrs( uplo, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_qsyrfs + + !> DSYSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_qsysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DSYSV computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_qsysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_sytrf, lwkopt_sytrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DSYSV_RK: computes the solution to a real system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> DSYTRF_RK is called to compute the factorization of a real + !> symmetric matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine DSYTRS_3. + + pure subroutine stdlib_qsysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DSYSV_ROOK: computes the solution to a real system of linear + !> equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> DSYTRF_ROOK is called to compute the factorization of a real + !> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling DSYTRS_ROOK. + + pure subroutine stdlib_qsysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda DSYSVX: uses the diagonal pivoting factorization to compute the + !> solution to a real system of linear equations A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_qsysvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), b(ldb,*) + real(qp), intent(inout) :: af(ldaf,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_qlansy( 'I', uplo, n, a, lda, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_qsycon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, iwork,info ) + ! compute the solution vectors x. + call stdlib_qlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_qsytrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_qsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, iwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond DSYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + + pure subroutine stdlib_qsyswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(qp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_qswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=a(i1+i,i2) + a(i1+i,i2)=tmp + end do + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from i1 to i1-1 + call stdlib_qswap( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=a(i2,i1+i) + a(i2,i1+i)=tmp + end do + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_qsyswapr + + !> DSYTD2: reduces a real symmetric matrix A to symmetric tridiagonal + !> form T by an orthogonal similarity transformation: Q**T * A * Q = T. + + pure subroutine stdlib_qsytd2( uplo, n, a, lda, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*), e(*), tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(qp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DSYTF2: computes the factorization of a real symmetric matrix A using + !> the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_qsytf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(qp) :: absakk, alpha, colmax, d11, d12, d21, d22, r1, rowmax, t, wk, wkm1, & + wkp1 + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iqamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) .or. stdlib_qisnan(absakk) ) then + ! column k is zero or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = imax + stdlib_iqamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = abs( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_iqamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, abs( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_qswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + call stdlib_qswap( kk-kp-1, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = one / a( k, k ) + call stdlib_qsyr( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_qscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*a( j, k-1 )-a( j, k ) ) + wk = d12*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*wk -a( i, k-1 )*wkm1 + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_iqamax( imax-k, a( imax, k ), lda ) + rowmax = abs( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp DSYTF2_RK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_qsytf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: e(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(qp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, dtemp, t, wk, wkm1, & + wkp1, sfmin + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iqamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + ! set e( k ) to zero + if( k>1 )e( k ) = zero + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_iqamax( imax-1, a( 1, imax ), 1 ) + dtemp = abs( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )1 )call stdlib_qswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_qswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_qswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_qswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / a( k, k ) + call stdlib_qsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_qscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_qsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = zero + a( k-1, k ) = zero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**t using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = zero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )(k+1) )call stdlib_qswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_qswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_qswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_qswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / a( k, k ) + call stdlib_qsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_qscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_qsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k DSYTF2_ROOK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_qsytf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(qp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, dtemp, t, wk, wkm1, & + wkp1, sfmin + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iqamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_iqamax( imax-1, a( 1, imax ), 1 ) + dtemp = abs( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )1 )call stdlib_qswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_qswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + if( kp>1 )call stdlib_qswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_qswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / a( k, k ) + call stdlib_qsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_qscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_qsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )(k+1) )call stdlib_qswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_qswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / a( k, k ) + call stdlib_qsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_qscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_qsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k DSYTRD: reduces a real symmetric matrix A to real symmetric + !> tridiagonal form T by an orthogonal similarity transformation: + !> Q**T * A * Q = T. + + pure subroutine stdlib_qsytrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: d(*), e(*), tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, iws, j, kk, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb DSYTRD_SB2ST: reduces a real symmetric band matrix A to real symmetric + !> tridiagonal form T by a orthogonal similarity transformation: + !> Q**T * A * Q = T. + + pure subroutine stdlib_qsytrd_sb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, lhous, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: stage1, uplo, vect + integer(ilp), intent(in) :: n, kd, ldab, lhous, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + real(qp), intent(inout) :: ab(ldab,*) + real(qp), intent(out) :: hous(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: rzero = 0.0e+0_qp + + + ! Local Scalars + logical(lk) :: lquery, wantq, upper, afters1 + integer(ilp) :: i, m, k, ib, sweepid, myid, shift, stt, st, ed, stind, edind, & + blklastind, colpt, thed, stepercol, grsiz, thgrsiz, thgrnb, thgrid, nbtiles, ttype, & + tid, nthreads, debug, abdpos, abofdpos, dpos, ofdpos, awpos, inda, indw, apos, sizea, & + lda, indv, indtau, sidev, sizetau, ldv, lhmin, lwmin + ! Intrinsic Functions + intrinsic :: min,max,ceiling,real + ! Executable Statements + ! determine the minimal workspace size required. + ! test the input parameters + debug = 0 + info = 0 + afters1 = stdlib_lsame( stage1, 'Y' ) + wantq = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) .or. ( lhous==-1 ) + ! determine the block size, the workspace size and the hous size. + ib = stdlib_ilaenv2stage( 2, 'DSYTRD_SB2ST', vect, n, kd, -1, -1 ) + lhmin = stdlib_ilaenv2stage( 3, 'DSYTRD_SB2ST', vect, n, kd, ib, -1 ) + lwmin = stdlib_ilaenv2stage( 4, 'DSYTRD_SB2ST', vect, n, kd, ib, -1 ) + if( .not.afters1 .and. .not.stdlib_lsame( stage1, 'N' ) ) then + info = -1 + else if( .not.stdlib_lsame( vect, 'N' ) ) then + info = -2 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab<(kd+1) ) then + info = -7 + else if( lhoused ) exit + loop_120: do m = 1, stepercol + st = stt + loop_130: do sweepid = st, ed + loop_140: do k = 1, grsiz + myid = (i-sweepid)*(stepercol*grsiz)+ (m-1)*grsiz + k + if ( myid==1 ) then + ttype = 1 + else + ttype = mod( myid, 2 ) + 2 + endif + if( ttype==2 ) then + colpt = (myid/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + blklastind = colpt + else + colpt = ((myid+1)/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + if( ( stind>=edind-1 ).and.( edind==n ) ) then + blklastind = n + else + blklastind = 0 + endif + endif + ! call the kernel + !$ if( ttype/=1 ) then + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(in:WORK(MYID-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + !$ call stdlib_qsb2st_kernels( uplo, wantq, ttype,stind, edind, & + !$ sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + !$ indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ else + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + call stdlib_qsb2st_kernels( uplo, wantq, ttype,stind, edind, & + sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ endif + if ( blklastind>=(n-1) ) then + stt = stt + 1 + exit + endif + end do loop_140 + end do loop_130 + end do loop_120 + end do loop_110 + end do loop_100 + !$OMP END MASTER + !$OMP END PARALLEL + ! copy the diagonal from a to d. note that d is real thus only + ! the real part is needed, the imaginary part should be zero. + do i = 1, n + d( i ) = ( work( dpos+(i-1)*lda ) ) + end do + ! copy the off diagonal from a to e. note that e is real thus only + ! the real part is needed, the imaginary part should be zero. + if( upper ) then + do i = 1, n-1 + e( i ) = ( work( ofdpos+i*lda ) ) + end do + else + do i = 1, n-1 + e( i ) = ( work( ofdpos+(i-1)*lda ) ) + end do + endif + hous( 1 ) = lhmin + work( 1 ) = lwmin + return + end subroutine stdlib_qsytrd_sb2st + + !> DSYTRD_SY2SB: reduces a real symmetric matrix A to real symmetric + !> band-diagonal form AB by a orthogonal similarity transformation: + !> Q**T * A * Q = AB. + + pure subroutine stdlib_qsytrd_sy2sb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldab, lwork, n, kd + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: ab(ldab,*), tau(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: rone = 1.0e+0_qp + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, j, iinfo, lwmin, pn, pk, lk, ldt, ldw, lds2, lds1, ls2, ls1, lw, lt,& + tpos, wpos, s2pos, s1pos + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + ! determine the minimal workspace size required + ! and test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + lwmin = stdlib_ilaenv2stage( 4, 'DSYTRD_SY2SB', '', n, kd, -1, -1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( lda DSYTRF: computes the factorization of a real symmetric matrix A using + !> the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U**T*D*U or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_qsytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_qlasyf( uplo, k, nb, kb, a, lda, ipiv, work, ldwork,iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_qsytf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_qlasyf; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_qlasyf( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, ldwork, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_qsytf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_qsytrf + + !> DSYTRF_AA: computes the factorization of a real symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_qsytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + real(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'DSYTRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_qlasyf_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_qswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j, j+1 ) + a( j, j+1 ) = one + call stdlib_qcopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_qscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_qgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_qgemv( 'NO TRANSPOSE', mj, jb+1,-one, work( j3-j1+1+k1*n ), & + n,a( j1-k2, j3 ), 1,one, a( j3, j3 ), lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_qgemm + call stdlib_qgemm( 'TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-one, a( j1-& + k2, j2 ), lda,work( j3-j1+1+k1*n ), n,one, a( j2, j3 ), lda ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_qcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**t using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_qcopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_qlasyf; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_qlasyf_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_qswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j+1, j ) + a( j+1, j ) = one + call stdlib_qcopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_qscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_qgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_qgemv( 'NO TRANSPOSE', mj, jb+1,-one, work( j3-j1+1+k1*n ), & + n,a( j3, j1-k2 ), lda,one, a( j3, j3 ), 1 ) + j3 = j3 + 1 + end do + ! update off-diagonal block in j2-th block column with stdlib_qgemm + call stdlib_qgemm( 'NO TRANSPOSE', 'TRANSPOSE',n-j3+1, nj, jb+1,-one, work(& + j3-j1+1+k1*n ), n,a( j2, j1-k2 ), lda,one, a( j3, j2 ), lda ) + end do + ! recover t( j+1, j ) + a( j+1, j ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_qcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_qsytrf_aa + + !> DSYTRF_RK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_qsytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_qlasyf_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_qsytf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_qlasyf_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_qsytf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_qswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_qsytrf_rk + + !> DSYTRF_ROOK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_qsytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_qlasyf_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_qsytf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_qlasyf_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_qlasyf_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_qsytf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_qsytrf_rook + + !> DSYTRI: computes the inverse of a real symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> DSYTRF. + + pure subroutine stdlib_qsytri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + real(qp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==zero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_qcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_qsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_qdot( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_qcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_qsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_qdot( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_qdot( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_qcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_qsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_qdot( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_qswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_qswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k DSYTRI_ROOK: computes the inverse of a real symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by DSYTRF_ROOK. + + pure subroutine stdlib_qsytri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + real(qp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==zero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_qcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_qsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_qdot( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_qcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_qsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_qdot( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_qdot( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_qcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_qsymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_qdot( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k+1,1:k+1) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_qswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_qswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_qswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_qswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_qswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_qswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k DSYTRS: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSYTRF. + + pure subroutine stdlib_qsytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_qger( k-1, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_qscal( nrhs, one / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_qswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_qger( k-2, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_qger( k-2, nrhs, -one, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_qgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, a( 1, k ),1, one, b( k, & + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_qgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, a( 1, k ),1, one, b( k, & + 1 ), ldb ) + call stdlib_qgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb,a( 1, k+1 ), 1, one, b( & + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k DSYTRS2: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSYTRF and converted by DSYCONV. + + pure subroutine stdlib_qsytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + real(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_qswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_qtrsm('L','U','N','U',n,nrhs,one,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + call stdlib_qscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_qtrsm('L','U','T','U',n,nrhs,one,a,lda,b,ldb) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_qswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_qtrsm('L','L','N','U',n,nrhs,one,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + call stdlib_qscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / akm1k + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i, j ) / akm1k + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**t \ b) -> b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_qtrsm('L','L','T','U',n,nrhs,one,a,lda,b,ldb) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_qsyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_qsytrs2 + + !> DSYTRS_3: solves a system of linear equations A * X = B with a real + !> symmetric matrix A using the factorization computed + !> by DSYTRF_RK or DSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_qsytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*), e(*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + real(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_qtrsm( 'L', 'U', 'N', 'U', n, nrhs, one, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + call stdlib_qscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_qtrsm( 'L', 'U', 'T', 'U', n, nrhs, one, a, lda, b, ldb ) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_qtrsm( 'L', 'L', 'N', 'U', n, nrhs, one, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + call stdlib_qscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else if( i b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_qtrsm('L', 'L', 'T', 'U', n, nrhs, one, a, lda, b, ldb ) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_qsytrs_3 + + !> DSYTRS_AA: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by DSYTRF_AA. + + pure subroutine stdlib_qsytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**t \ b -> b [ (u**t \p**t * b) ] + call stdlib_qtrsm('L', 'U', 'T', 'U', n-1, nrhs, one, a( 1, 2 ),lda, b( 2, 1 ), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**t \p**t * b) ] + call stdlib_qlacpy( 'F', 1, n, a( 1, 1 ), lda+1, work( n ), 1) + if( n>1 ) then + call stdlib_qlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_qlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_qgtsv( n, nrhs, work( 1 ), work( n ), work( 2*n ), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**t \p**t * b) ) ] + call stdlib_qtrsm( 'L', 'U', 'N', 'U', n-1, nrhs, one, a( 1, 2 ),lda, b( 2, 1 ), & + ldb) + ! pivot, p * b -> b [ p * (u \ (t \ (u**t \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**t. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_qtrsm( 'L', 'L', 'N', 'U', n-1, nrhs, one, a( 2, 1 ),lda, b( 2, 1 ), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_qlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_qlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1 ) + call stdlib_qlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_qgtsv( n, nrhs, work( 1 ), work(n), work( 2*n ), b, ldb,info) + ! 3) backward substitution with l**t + if( n>1 ) then + ! compute (l**t \ b) -> b [ l**t \ (t \ (l \p**t * b) ) ] + call stdlib_qtrsm( 'L', 'L', 'T', 'U', n-1, nrhs, one, a( 2, 1 ),lda, b( 2, 1 ), & + ldb) + ! pivot, p * b -> b [ p * (l**t \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_qsytrs_aa + + !> DSYTRS_ROOK: solves a system of linear equations A*X = B with + !> a real symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by DSYTRF_ROOK. + + pure subroutine stdlib_qsytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_qger( k-1, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_qscal( nrhs, one / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1 ) + if( kp/=k-1 )call stdlib_qswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + if( k>2 ) then + call stdlib_qger( k-2, nrhs, -one, a( 1, k ), 1, b( k, 1 ),ldb, b( 1, 1 ), & + ldb ) + call stdlib_qger( k-2, nrhs, -one, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ),& + ldb ) + end if + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 )call stdlib_qgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k ), 1, & + one, b( k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_qgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k ), 1, one, b( & + k, 1 ), ldb ) + call stdlib_qgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k+1 ), 1, one, & + b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1). + kp = -ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_qswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_qswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k DTBCON: estimates the reciprocal of the condition number of a + !> triangular band matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_qtbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,iwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_qlatbs( uplo, 'NO TRANSPOSE', diag, normin, n, kd,ab, ldab, work, & + scale, work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_qlatbs( uplo, 'TRANSPOSE', diag, normin, n, kd, ab,ldab, work, & + scale, work( 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_iqamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale DTBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by DTBTRS or some other + !> means before entering this routine. DTBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_qtbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, ferr,& + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: ab(ldab,*), b(ldb,*), x(ldx,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldabsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_qtbsv( uplo, transt, diag, n, kd, ab, ldab,work( n+1 ), 1 ) + + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_qtbsv( uplo, trans, diag, n, kd, ab, ldab,work( n+1 ), 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_qtbrfs + + !> DTBTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by NRHS matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_qtbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(qp), intent(in) :: ab(ldab,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab Level 3 BLAS like routine for A in RFP Format. + !> DTFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_qtfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, diag, side, trans, uplo + integer(ilp), intent(in) :: ldb, m, n + real(qp), intent(in) :: alpha + ! Array Arguments + real(qp), intent(in) :: a(0:*) + real(qp), intent(inout) :: b(0:ldb-1,0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans + integer(ilp) :: m1, m2, n1, n2, k, info, i, j + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lside = stdlib_lsame( side, 'L' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lside .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -3 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -4 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 ) then + info = -7 + else if( ldb DTFTRI: computes the inverse of a triangular matrix A stored in RFP + !> format. + !> This is a Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_qtftri( transr, uplo, diag, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo, diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_qtrtri( 'L', diag, n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_qtrmm( 'R', 'L', 'N', diag, n2, n1, -one, a( 0 ),n, a( n1 ), n ) + + call stdlib_qtrtri( 'U', diag, n2, a( n ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_qtrmm( 'L', 'U', 'T', diag, n2, n1, one, a( n ), n,a( n1 ), n ) + + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_qtrtri( 'L', diag, n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_qtrmm( 'L', 'L', 'T', diag, n1, n2, -one, a( n2 ),n, a( 0 ), n ) + + call stdlib_qtrtri( 'U', diag, n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_qtrmm( 'R', 'U', 'N', diag, n1, n2, one, a( n1 ),n, a( 0 ), n ) + + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_qtrtri( 'U', diag, n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_qtrmm( 'L', 'U', 'N', diag, n1, n2, -one, a( 0 ),n1, a( n1*n1 ), & + n1 ) + call stdlib_qtrtri( 'L', diag, n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_qtrmm( 'R', 'L', 'T', diag, n1, n2, one, a( 1 ),n1, a( n1*n1 ), & + n1 ) + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_qtrtri( 'U', diag, n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_qtrmm( 'R', 'U', 'T', diag, n2, n1, -one,a( n2*n2 ), n2, a( 0 ), & + n2 ) + call stdlib_qtrtri( 'L', diag, n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_qtrmm( 'L', 'L', 'N', diag, n2, n1, one,a( n1*n2 ), n2, a( 0 ), & + n2 ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_qtrtri( 'L', diag, k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_qtrmm( 'R', 'L', 'N', diag, k, k, -one, a( 1 ),n+1, a( k+1 ), n+1 & + ) + call stdlib_qtrtri( 'U', diag, k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_qtrmm( 'L', 'U', 'T', diag, k, k, one, a( 0 ), n+1,a( k+1 ), n+1 ) + + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_qtrtri( 'L', diag, k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_qtrmm( 'L', 'L', 'T', diag, k, k, -one, a( k+1 ),n+1, a( 0 ), n+1 & + ) + call stdlib_qtrtri( 'U', diag, k, a( k ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_qtrmm( 'R', 'U', 'N', diag, k, k, one, a( k ), n+1,a( 0 ), n+1 ) + + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_qtrtri( 'U', diag, k, a( k ), k, info ) + if( info>0 )return + call stdlib_qtrmm( 'L', 'U', 'N', diag, k, k, -one, a( k ), k,a( k*( k+1 ) ), & + k ) + call stdlib_qtrtri( 'L', diag, k, a( 0 ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_qtrmm( 'R', 'L', 'T', diag, k, k, one, a( 0 ), k,a( k*( k+1 ) ), & + k ) + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_qtrtri( 'U', diag, k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_qtrmm( 'R', 'U', 'T', diag, k, k, -one,a( k*( k+1 ) ), k, a( 0 ), & + k ) + call stdlib_qtrtri( 'L', diag, k, a( k*k ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_qtrmm( 'L', 'L', 'N', diag, k, k, one, a( k*k ), k,a( 0 ), k ) + + end if + end if + end if + return + end subroutine stdlib_qtftri + + !> DTFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + + pure subroutine stdlib_qtfttp( transr, uplo, n, arf, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(out) :: ap(0:*) + real(qp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTFTTP', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + ap( 0 ) = arf( 0 ) + else + ap( 0 ) = arf( 0 ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_qtfttp + + !> DTFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + + pure subroutine stdlib_qtfttr( transr, uplo, n, arf, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(qp), intent(out) :: a(0:lda-1,0:*) + real(qp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt, nx2, np1x2 + integer(ilp) :: i, j, l, ij + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DTGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of real matrices (S,P), where S is a quasi-triangular matrix + !> and P is upper triangular. Matrix pairs of this type are produced by + !> the generalized Schur factorization of a matrix pair (A,B): + !> A = Q*S*Z**T, B = Q*P*Z**T + !> as computed by DGGHRD + DHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal blocks of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the orthogonal factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + + pure subroutine stdlib_qtgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, & + mm, m, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldp, lds, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(qp), intent(in) :: p(ldp,*), s(lds,*) + real(qp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: safety = 1.0e+2_qp + + ! Local Scalars + logical(lk) :: compl, compr, il2by2, ilabad, ilall, ilback, ilbbad, ilcomp, ilcplx, & + lsa, lsb + integer(ilp) :: i, ibeg, ieig, iend, ihwmny, iinfo, im, iside, j, ja, jc, je, jr, jw, & + na, nw + real(qp) :: acoef, acoefa, anorm, ascale, bcoefa, bcoefi, bcoefr, big, bignum, bnorm, & + bscale, cim2a, cim2b, cimaga, cimagb, cre2a, cre2b, creala, crealb, dmin, safmin, & + salfar, sbeta, scale, small, temp, temp2, temp2i, temp2r, ulp, xmax, xscale + ! Local Arrays + real(qp) :: bdiag(2), sum(2,2), sums(2,2), sump(2,2) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! decode and test the input parameters + if( stdlib_lsame( howmny, 'A' ) ) then + ihwmny = 1 + ilall = .true. + ilback = .false. + else if( stdlib_lsame( howmny, 'S' ) ) then + ihwmny = 2 + ilall = .false. + ilback = .false. + else if( stdlib_lsame( howmny, 'B' ) ) then + ihwmny = 3 + ilall = .true. + ilback = .true. + else + ihwmny = -1 + ilall = .true. + end if + if( stdlib_lsame( side, 'R' ) ) then + iside = 1 + compl = .false. + compr = .true. + else if( stdlib_lsame( side, 'L' ) ) then + iside = 2 + compl = .true. + compr = .false. + else if( stdlib_lsame( side, 'B' ) ) then + iside = 3 + compl = .true. + compr = .true. + else + iside = -1 + end if + info = 0 + if( iside<0 ) then + info = -1 + else if( ihwmny<0 ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lds1 )anorm = anorm + abs( s( 2, 1 ) ) + bnorm = abs( p( 1, 1 ) ) + work( 1 ) = zero + work( n+1 ) = zero + do j = 2, n + temp = zero + temp2 = zero + if( s( j, j-1 )==zero ) then + iend = j - 1 + else + iend = j - 2 + end if + do i = 1, iend + temp = temp + abs( s( i, j ) ) + temp2 = temp2 + abs( p( i, j ) ) + end do + work( j ) = temp + work( n+j ) = temp2 + do i = iend + 1, min( j+1, n ) + temp = temp + abs( s( i, j ) ) + temp2 = temp2 + abs( p( i, j ) ) + end do + anorm = max( anorm, temp ) + bnorm = max( bnorm, temp2 ) + end do + ascale = one / max( anorm, safmin ) + bscale = one / max( bnorm, safmin ) + ! left eigenvectors + if( compl ) then + ieig = 0 + ! main loop over eigenvalues + ilcplx = .false. + loop_220: do je = 1, n + ! skip this iteration if (a) howmny='s' and select=.false., or + ! (b) this would be the second of a complex pair. + ! check for complex eigenvalue, so as to be sure of which + ! entry(-ies) of select to look at. + if( ilcplx ) then + ilcplx = .false. + cycle loop_220 + end if + nw = 1 + if( je=safmin .and. abs( acoef )=safmin .and. abs( bcoefr )=safmin )scale = ( safmin / ulp ) / & + acoefa + if( bcoefa*ulp=safmin )scale = max( scale, ( safmin / & + ulp ) / bcoefa ) + if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa ) + if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) ) + + if( scale/=one ) then + acoef = scale*acoef + acoefa = abs( acoef ) + bcoefr = scale*bcoefr + bcoefi = scale*bcoefi + bcoefa = abs( bcoefr ) + abs( bcoefi ) + end if + ! compute first two components of eigenvector + temp = acoef*s( je+1, je ) + temp2r = acoef*s( je, je ) - bcoefr*p( je, je ) + temp2i = -bcoefi*p( je, je ) + if( abs( temp )>abs( temp2r )+abs( temp2i ) ) then + work( 2*n+je ) = one + work( 3*n+je ) = zero + work( 2*n+je+1 ) = -temp2r / temp + work( 3*n+je+1 ) = -temp2i / temp + else + work( 2*n+je+1 ) = one + work( 3*n+je+1 ) = zero + temp = acoef*s( je, je+1 ) + work( 2*n+je ) = ( bcoefr*p( je+1, je+1 )-acoef*s( je+1, je+1 ) ) / & + temp + work( 3*n+je ) = bcoefi*p( je+1, je+1 ) / temp + end if + xmax = max( abs( work( 2*n+je ) )+abs( work( 3*n+je ) ),abs( work( 2*n+je+1 ) & + )+abs( work( 3*n+je+1 ) ) ) + end if + dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin ) + ! t + ! triangular solve of (a a - b b) y = 0 + ! t + ! (rowwise in (a a - b b) , or columnwise in (a a - b b) ) + il2by2 = .false. + loop_160: do j = je + nw, n + if( il2by2 ) then + il2by2 = .false. + cycle loop_160 + end if + na = 1 + bdiag( 1 ) = p( j, j ) + if( jbignum*xscale ) then + do jw = 0, nw - 1 + do jr = je, j - 1 + work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr ) + end do + end do + xmax = xmax*xscale + end if + ! compute dot products + ! j-1 + ! sum = sum conjg( a*s(k,j) - b*p(k,j) )*x(k) + ! k=je + ! to reduce the op count, this is done as + ! _ j-1 _ j-1 + ! a*conjg( sum s(k,j)*x(k) ) - b*conjg( sum p(k,j)*x(k) ) + ! k=je k=je + ! which may cause underflow problems if a or b are close + ! to underflow. (e.g., less than small.) + do jw = 1, nw + do ja = 1, na + sums( ja, jw ) = zero + sump( ja, jw ) = zero + do jr = je, j - 1 + sums( ja, jw ) = sums( ja, jw ) +s( jr, j+ja-1 )*work( ( jw+1 )*n+jr & + ) + sump( ja, jw ) = sump( ja, jw ) +p( jr, j+ja-1 )*work( ( jw+1 )*n+jr & + ) + end do + end do + end do + do ja = 1, na + if( ilcplx ) then + sum( ja, 1 ) = -acoef*sums( ja, 1 ) +bcoefr*sump( ja, 1 ) -bcoefi*sump( & + ja, 2 ) + sum( ja, 2 ) = -acoef*sums( ja, 2 ) +bcoefr*sump( ja, 2 ) +bcoefi*sump( & + ja, 1 ) + else + sum( ja, 1 ) = -acoef*sums( ja, 1 ) +bcoefr*sump( ja, 1 ) + end if + end do + ! t + ! solve ( a a - b b ) y = sum(,) + ! with scaling and perturbation of the denominator + call stdlib_qlaln2( .true., na, nw, dmin, acoef, s( j, j ), lds,bdiag( 1 ), & + bdiag( 2 ), sum, 2, bcoefr,bcoefi, work( 2*n+j ), n, scale, temp,iinfo ) + + if( scalesafmin ) then + xscale = one / xmax + do jw = 0, nw - 1 + do jr = ibeg, n + vl( jr, ieig+jw ) = xscale*vl( jr, ieig+jw ) + end do + end do + end if + ieig = ieig + nw - 1 + end do loop_220 + end if + ! right eigenvectors + if( compr ) then + ieig = im + 1 + ! main loop over eigenvalues + ilcplx = .false. + loop_500: do je = n, 1, -1 + ! skip this iteration if (a) howmny='s' and select=.false., or + ! (b) this would be the second of a complex pair. + ! check for complex eigenvalue, so as to be sure of which + ! entry(-ies) of select to look at -- if complex, select(je) + ! or select(je-1). + ! if this is a complex pair, the 2-by-2 diagonal block + ! corresponding to the eigenvalue is in rows/columns je-1:je + if( ilcplx ) then + ilcplx = .false. + cycle loop_500 + end if + nw = 1 + if( je>1 ) then + if( s( je, je-1 )/=zero ) then + ilcplx = .true. + nw = 2 + end if + end if + if( ilall ) then + ilcomp = .true. + else if( ilcplx ) then + ilcomp = select( je ) .or. select( je-1 ) + else + ilcomp = select( je ) + end if + if( .not.ilcomp )cycle loop_500 + ! decide if (a) singular pencil, (b) real eigenvalue, or + ! (c) complex eigenvalue. + if( .not.ilcplx ) then + if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then + ! singular matrix pencil -- unit eigenvector + ieig = ieig - 1 + do jr = 1, n + vr( jr, ieig ) = zero + end do + vr( ieig, ieig ) = one + cycle loop_500 + end if + end if + ! clear vector + do jw = 0, nw - 1 + do jr = 1, n + work( ( jw+2 )*n+jr ) = zero + end do + end do + ! compute coefficients in ( a a - b b ) x = 0 + ! a is acoef + ! b is bcoefr + i*bcoefi + if( .not.ilcplx ) then + ! real eigenvalue + temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin & + ) + salfar = ( temp*s( je, je ) )*ascale + sbeta = ( temp*p( je, je ) )*bscale + acoef = sbeta*ascale + bcoefr = salfar*bscale + bcoefi = zero + ! scale to avoid underflow + scale = one + lsa = abs( sbeta )>=safmin .and. abs( acoef )=safmin .and. abs( bcoefr )=safmin )scale = ( safmin / ulp ) / & + acoefa + if( bcoefa*ulp=safmin )scale = max( scale, ( safmin / & + ulp ) / bcoefa ) + if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa ) + if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) ) + + if( scale/=one ) then + acoef = scale*acoef + acoefa = abs( acoef ) + bcoefr = scale*bcoefr + bcoefi = scale*bcoefi + bcoefa = abs( bcoefr ) + abs( bcoefi ) + end if + ! compute first two components of eigenvector + ! and contribution to sums + temp = acoef*s( je, je-1 ) + temp2r = acoef*s( je, je ) - bcoefr*p( je, je ) + temp2i = -bcoefi*p( je, je ) + if( abs( temp )>=abs( temp2r )+abs( temp2i ) ) then + work( 2*n+je ) = one + work( 3*n+je ) = zero + work( 2*n+je-1 ) = -temp2r / temp + work( 3*n+je-1 ) = -temp2i / temp + else + work( 2*n+je-1 ) = one + work( 3*n+je-1 ) = zero + temp = acoef*s( je-1, je ) + work( 2*n+je ) = ( bcoefr*p( je-1, je-1 )-acoef*s( je-1, je-1 ) ) / & + temp + work( 3*n+je ) = bcoefi*p( je-1, je-1 ) / temp + end if + xmax = max( abs( work( 2*n+je ) )+abs( work( 3*n+je ) ),abs( work( 2*n+je-1 ) & + )+abs( work( 3*n+je-1 ) ) ) + ! compute contribution from columns je and je-1 + ! of a and b to the sums. + creala = acoef*work( 2*n+je-1 ) + cimaga = acoef*work( 3*n+je-1 ) + crealb = bcoefr*work( 2*n+je-1 ) -bcoefi*work( 3*n+je-1 ) + cimagb = bcoefi*work( 2*n+je-1 ) +bcoefr*work( 3*n+je-1 ) + cre2a = acoef*work( 2*n+je ) + cim2a = acoef*work( 3*n+je ) + cre2b = bcoefr*work( 2*n+je ) - bcoefi*work( 3*n+je ) + cim2b = bcoefi*work( 2*n+je ) + bcoefr*work( 3*n+je ) + do jr = 1, je - 2 + work( 2*n+jr ) = -creala*s( jr, je-1 ) +crealb*p( jr, je-1 ) -cre2a*s( jr, & + je ) + cre2b*p( jr, je ) + work( 3*n+jr ) = -cimaga*s( jr, je-1 ) +cimagb*p( jr, je-1 ) -cim2a*s( jr, & + je ) + cim2b*p( jr, je ) + end do + end if + dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin ) + ! columnwise triangular solve of (a a - b b) x = 0 + il2by2 = .false. + loop_370: do j = je - nw, 1, -1 + ! if a 2-by-2 block, is in position j-1:j, wait until + ! next iteration to process it (when it will be j:j+1) + if( .not.il2by2 .and. j>1 ) then + if( s( j, j-1 )/=zero ) then + il2by2 = .true. + cycle loop_370 + end if + end if + bdiag( 1 ) = p( j, j ) + if( il2by2 ) then + na = 2 + bdiag( 2 ) = p( j+1, j+1 ) + else + na = 1 + end if + ! compute x(j) (and x(j+1), if 2-by-2 block) + call stdlib_qlaln2( .false., na, nw, dmin, acoef, s( j, j ),lds, bdiag( 1 ), & + bdiag( 2 ), work( 2*n+j ),n, bcoefr, bcoefi, sum, 2, scale, temp,iinfo ) + + if( scale1 ) then + ! check whether scaling is necessary for sum. + xscale = one / max( one, xmax ) + temp = acoefa*work( j ) + bcoefa*work( n+j ) + if( il2by2 )temp = max( temp, acoefa*work( j+1 )+bcoefa*work( n+j+1 ) ) + + temp = max( temp, acoefa, bcoefa ) + if( temp>bignum*xscale ) then + do jw = 0, nw - 1 + do jr = 1, je + work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr ) + end do + end do + xmax = xmax*xscale + end if + ! compute the contributions of the off-diagonals of + ! column j (and j+1, if 2-by-2 block) of a and b to the + ! sums. + do ja = 1, na + if( ilcplx ) then + creala = acoef*work( 2*n+j+ja-1 ) + cimaga = acoef*work( 3*n+j+ja-1 ) + crealb = bcoefr*work( 2*n+j+ja-1 ) -bcoefi*work( 3*n+j+ja-1 ) + cimagb = bcoefi*work( 2*n+j+ja-1 ) +bcoefr*work( 3*n+j+ja-1 ) + do jr = 1, j - 1 + work( 2*n+jr ) = work( 2*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(& + jr, j+ja-1 ) + work( 3*n+jr ) = work( 3*n+jr ) -cimaga*s( jr, j+ja-1 ) +cimagb*p(& + jr, j+ja-1 ) + end do + else + creala = acoef*work( 2*n+j+ja-1 ) + crealb = bcoefr*work( 2*n+j+ja-1 ) + do jr = 1, j - 1 + work( 2*n+jr ) = work( 2*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(& + jr, j+ja-1 ) + end do + end if + end do + end if + il2by2 = .false. + end do loop_370 + ! copy eigenvector to vr, back transforming if + ! howmny='b'. + ieig = ieig - nw + if( ilback ) then + do jw = 0, nw - 1 + do jr = 1, n + work( ( jw+4 )*n+jr ) = work( ( jw+2 )*n+1 )*vr( jr, 1 ) + end do + ! a series of compiler directives to defeat + ! vectorization for the next loop + do jc = 2, je + do jr = 1, n + work( ( jw+4 )*n+jr ) = work( ( jw+4 )*n+jr ) +work( ( jw+2 )*n+jc )& + *vr( jr, jc ) + end do + end do + end do + do jw = 0, nw - 1 + do jr = 1, n + vr( jr, ieig+jw ) = work( ( jw+4 )*n+jr ) + end do + end do + iend = n + else + do jw = 0, nw - 1 + do jr = 1, n + vr( jr, ieig+jw ) = work( ( jw+2 )*n+jr ) + end do + end do + iend = je + end if + ! scale eigenvector + xmax = zero + if( ilcplx ) then + do j = 1, iend + xmax = max( xmax, abs( vr( j, ieig ) )+abs( vr( j, ieig+1 ) ) ) + end do + else + do j = 1, iend + xmax = max( xmax, abs( vr( j, ieig ) ) ) + end do + end if + if( xmax>safmin ) then + xscale = one / xmax + do jw = 0, nw - 1 + do jr = 1, iend + vr( jr, ieig+jw ) = xscale*vr( jr, ieig+jw ) + end do + end do + end if + end do loop_500 + end if + return + end subroutine stdlib_qtgevc + + !> DTGEX2: swaps adjacent diagonal blocks (A11, B11) and (A22, B22) + !> of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair + !> (A, B) by an orthogonal equivalence transformation. + !> (A, B) must be in generalized real Schur canonical form (as returned + !> by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 + !> diagonal blocks. B is upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T + !> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T + + pure subroutine stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, n1, n2, & + work, lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, lda, ldb, ldq, ldz, lwork, n, n1, n2 + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_qcopy by calls to stdlib_qlaset, or by do + ! loops. sven hammarling, 1/5/02. + ! Parameters + real(qp), parameter :: twenty = 2.0e+01_qp + integer(ilp), parameter :: ldst = 4 + logical(lk), parameter :: wands = .true. + + + + + ! Local Scalars + logical(lk) :: strong, weak + integer(ilp) :: i, idum, linfo, m + real(qp) :: bqra21, brqa21, ddum, dnorma, dnormb, dscale, dsum, eps, f, g, sa, sb, & + scale, smlnum, thresha, threshb + ! Local Arrays + integer(ilp) :: iwork(ldst) + real(qp) :: ai(2), ar(2), be(2), ir(ldst,ldst), ircop(ldst,ldst), li(ldst,ldst), licop(& + ldst,ldst), s(ldst,ldst), scpy(ldst,ldst), t(ldst,ldst), taul(ldst), taur(ldst), tcpy(& + ldst,ldst) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=1 .or. n1<=0 .or. n2<=0 )return + if( n1>n .or. ( j1+n1 )>n )return + m = n1 + n2 + if( lwork=sb ) then + call stdlib_qlartg( s( 1, 1 ), s( 2, 1 ), li( 1, 1 ), li( 2, 1 ),ddum ) + else + call stdlib_qlartg( t( 1, 1 ), t( 2, 1 ), li( 1, 1 ), li( 2, 1 ),ddum ) + end if + call stdlib_qrot( 2, s( 1, 1 ), ldst, s( 2, 1 ), ldst, li( 1, 1 ),li( 2, 1 ) ) + + call stdlib_qrot( 2, t( 1, 1 ), ldst, t( 2, 1 ), ldst, li( 1, 1 ),li( 2, 1 ) ) + + li( 2, 2 ) = li( 1, 1 ) + li( 1, 2 ) = -li( 2, 1 ) + ! weak stability test: |s21| <= o(eps f-norm((a))) + ! and |t21| <= o(eps f-norm((b))) + weak = abs( s( 2, 1 ) ) <= thresha .and.abs( t( 2, 1 ) ) <= threshb + if( .not.weak )go to 70 + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_qlacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m ) + call stdlib_qgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + + call stdlib_qgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_qlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sa = dscale*sqrt( dsum ) + call stdlib_qlacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m ) + call stdlib_qgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + + call stdlib_qgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_qlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sb = dscale*sqrt( dsum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 70 + end if + ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and + ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)). + call stdlib_qrot( j1+1, a( 1, j1 ), 1, a( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 ) ) + + call stdlib_qrot( j1+1, b( 1, j1 ), 1, b( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 ) ) + + call stdlib_qrot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda,li( 1, 1 ), li( 2, 1 & + ) ) + call stdlib_qrot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb,li( 1, 1 ), li( 2, 1 & + ) ) + ! set n1-by-n2 (2,1) - blocks to zero. + a( j1+1, j1 ) = zero + b( j1+1, j1 ) = zero + ! accumulate transformations into q and z if requested. + if( wantz )call stdlib_qrot( n, z( 1, j1 ), 1, z( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 & + ) ) + if( wantq )call stdlib_qrot( n, q( 1, j1 ), 1, q( 1, j1+1 ), 1, li( 1, 1 ),li( 2, 1 & + ) ) + ! exit with info = 0 if swap was successfully performed. + return + else + ! case 2: swap 1-by-1 and 2-by-2 blocks, or 2-by-2 + ! and 2-by-2 blocks. + ! solve the generalized sylvester equation + ! s11 * r - l * s22 = scale * s12 + ! t11 * r - l * t22 = scale * t12 + ! for r and l. solutions in li and ir. + call stdlib_qlacpy( 'FULL', n1, n2, t( 1, n1+1 ), ldst, li, ldst ) + call stdlib_qlacpy( 'FULL', n1, n2, s( 1, n1+1 ), ldst,ir( n2+1, n1+1 ), ldst ) + + call stdlib_qtgsy2( 'N', 0, n1, n2, s, ldst, s( n1+1, n1+1 ), ldst,ir( n2+1, n1+1 ),& + ldst, t, ldst, t( n1+1, n1+1 ),ldst, li, ldst, scale, dsum, dscale, iwork, idum,& + linfo ) + if( linfo/=0 )go to 70 + ! compute orthogonal matrix ql: + ! ql**t * li = [ tl ] + ! [ 0 ] + ! where + ! li = [ -l ] + ! [ scale * identity(n2) ] + do i = 1, n2 + call stdlib_qscal( n1, -one, li( 1, i ), 1 ) + li( n1+i, i ) = scale + end do + call stdlib_qgeqr2( m, n2, li, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_qorg2r( m, m, n2, li, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + ! compute orthogonal matrix rq: + ! ir * rq**t = [ 0 tr], + ! where ir = [ scale * identity(n1), r ] + do i = 1, n1 + ir( n2+i, i ) = scale + end do + call stdlib_qgerq2( n1, m, ir( n2+1, 1 ), ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_qorgr2( m, m, n1, ir, ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + ! perform the swapping tentatively: + call stdlib_qgemm( 'T', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + call stdlib_qgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, s,ldst ) + call stdlib_qgemm( 'T', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + call stdlib_qgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, t,ldst ) + call stdlib_qlacpy( 'F', m, m, s, ldst, scpy, ldst ) + call stdlib_qlacpy( 'F', m, m, t, ldst, tcpy, ldst ) + call stdlib_qlacpy( 'F', m, m, ir, ldst, ircop, ldst ) + call stdlib_qlacpy( 'F', m, m, li, ldst, licop, ldst ) + ! triangularize the b-part by an rq factorization. + ! apply transformation (from left) to a-part, giving s. + call stdlib_qgerq2( m, m, t, ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_qormr2( 'R', 'T', m, m, m, t, ldst, taur, s, ldst, work,linfo ) + if( linfo/=0 )go to 70 + call stdlib_qormr2( 'L', 'N', m, m, m, t, ldst, taur, ir, ldst, work,linfo ) + if( linfo/=0 )go to 70 + ! compute f-norm(s21) in brqa21. (t21 is 0.) + dscale = zero + dsum = one + do i = 1, n2 + call stdlib_qlassq( n1, s( n2+1, i ), 1, dscale, dsum ) + end do + brqa21 = dscale*sqrt( dsum ) + ! triangularize the b-part by a qr factorization. + ! apply transformation (from right) to a-part, giving s. + call stdlib_qgeqr2( m, m, tcpy, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_qorm2r( 'L', 'T', m, m, m, tcpy, ldst, taul, scpy, ldst,work, info ) + + call stdlib_qorm2r( 'R', 'N', m, m, m, tcpy, ldst, taul, licop, ldst,work, info ) + + if( linfo/=0 )go to 70 + ! compute f-norm(s21) in bqra21. (t21 is 0.) + dscale = zero + dsum = one + do i = 1, n2 + call stdlib_qlassq( n1, scpy( n2+1, i ), 1, dscale, dsum ) + end do + bqra21 = dscale*sqrt( dsum ) + ! decide which method to use. + ! weak stability test: + ! f-norm(s21) <= o(eps * f-norm((s))) + if( bqra21<=brqa21 .and. bqra21<=thresha ) then + call stdlib_qlacpy( 'F', m, m, scpy, ldst, s, ldst ) + call stdlib_qlacpy( 'F', m, m, tcpy, ldst, t, ldst ) + call stdlib_qlacpy( 'F', m, m, ircop, ldst, ir, ldst ) + call stdlib_qlacpy( 'F', m, m, licop, ldst, li, ldst ) + else if( brqa21>=thresha ) then + go to 70 + end if + ! set lower triangle of b-part to zero + call stdlib_qlaset( 'LOWER', m-1, m-1, zero, zero, t(2,1), ldst ) + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_qlacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m ) + call stdlib_qgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + + call stdlib_qgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_qlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sa = dscale*sqrt( dsum ) + call stdlib_qlacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m ) + call stdlib_qgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + + call stdlib_qgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_qlassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sb = dscale*sqrt( dsum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 70 + end if + ! if the swap is accepted ("weakly" and "strongly"), apply the + ! transformations and set n1-by-n2 (2,1)-block to zero. + call stdlib_qlaset( 'FULL', n1, n2, zero, zero, s(n2+1,1), ldst ) + ! copy back m-by-m diagonal block starting at index j1 of (a, b) + call stdlib_qlacpy( 'F', m, m, s, ldst, a( j1, j1 ), lda ) + call stdlib_qlacpy( 'F', m, m, t, ldst, b( j1, j1 ), ldb ) + call stdlib_qlaset( 'FULL', ldst, ldst, zero, zero, t, ldst ) + ! standardize existing 2-by-2 blocks. + call stdlib_qlaset( 'FULL', m, m, zero, zero, work, m ) + work( 1 ) = one + t( 1, 1 ) = one + idum = lwork - m*m - 2 + if( n2>1 ) then + call stdlib_qlagv2( a( j1, j1 ), lda, b( j1, j1 ), ldb, ar, ai, be,work( 1 ), & + work( 2 ), t( 1, 1 ), t( 2, 1 ) ) + work( m+1 ) = -work( 2 ) + work( m+2 ) = work( 1 ) + t( n2, n2 ) = t( 1, 1 ) + t( 1, 2 ) = -t( 2, 1 ) + end if + work( m*m ) = one + t( m, m ) = one + if( n1>1 ) then + call stdlib_qlagv2( a( j1+n2, j1+n2 ), lda, b( j1+n2, j1+n2 ), ldb,taur, taul, & + work( m*m+1 ), work( n2*m+n2+1 ),work( n2*m+n2+2 ), t( n2+1, n2+1 ),t( m, m-1 ) ) + + work( m*m ) = work( n2*m+n2+1 ) + work( m*m-1 ) = -work( n2*m+n2+2 ) + t( m, m ) = t( n2+1, n2+1 ) + t( m-1, m ) = -t( m, m-1 ) + end if + call stdlib_qgemm( 'T', 'N', n2, n1, n2, one, work, m, a( j1, j1+n2 ),lda, zero, & + work( m*m+1 ), n2 ) + call stdlib_qlacpy( 'FULL', n2, n1, work( m*m+1 ), n2, a( j1, j1+n2 ),lda ) + call stdlib_qgemm( 'T', 'N', n2, n1, n2, one, work, m, b( j1, j1+n2 ),ldb, zero, & + work( m*m+1 ), n2 ) + call stdlib_qlacpy( 'FULL', n2, n1, work( m*m+1 ), n2, b( j1, j1+n2 ),ldb ) + call stdlib_qgemm( 'N', 'N', m, m, m, one, li, ldst, work, m, zero,work( m*m+1 ), m & + ) + call stdlib_qlacpy( 'FULL', m, m, work( m*m+1 ), m, li, ldst ) + call stdlib_qgemm( 'N', 'N', n2, n1, n1, one, a( j1, j1+n2 ), lda,t( n2+1, n2+1 ), & + ldst, zero, work, n2 ) + call stdlib_qlacpy( 'FULL', n2, n1, work, n2, a( j1, j1+n2 ), lda ) + call stdlib_qgemm( 'N', 'N', n2, n1, n1, one, b( j1, j1+n2 ), ldb,t( n2+1, n2+1 ), & + ldst, zero, work, n2 ) + call stdlib_qlacpy( 'FULL', n2, n1, work, n2, b( j1, j1+n2 ), ldb ) + call stdlib_qgemm( 'T', 'N', m, m, m, one, ir, ldst, t, ldst, zero,work, m ) + call stdlib_qlacpy( 'FULL', m, m, work, m, ir, ldst ) + ! accumulate transformations into q and z if requested. + if( wantq ) then + call stdlib_qgemm( 'N', 'N', n, m, m, one, q( 1, j1 ), ldq, li,ldst, zero, work, & + n ) + call stdlib_qlacpy( 'FULL', n, m, work, n, q( 1, j1 ), ldq ) + end if + if( wantz ) then + call stdlib_qgemm( 'N', 'N', n, m, m, one, z( 1, j1 ), ldz, ir,ldst, zero, work, & + n ) + call stdlib_qlacpy( 'FULL', n, m, work, n, z( 1, j1 ), ldz ) + end if + ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and + ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)). + i = j1 + m + if( i<=n ) then + call stdlib_qgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,a( j1, i ), lda, zero, & + work, m ) + call stdlib_qlacpy( 'FULL', m, n-i+1, work, m, a( j1, i ), lda ) + call stdlib_qgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,b( j1, i ), ldb, zero, & + work, m ) + call stdlib_qlacpy( 'FULL', m, n-i+1, work, m, b( j1, i ), ldb ) + end if + i = j1 - 1 + if( i>0 ) then + call stdlib_qgemm( 'N', 'N', i, m, m, one, a( 1, j1 ), lda, ir,ldst, zero, work, & + i ) + call stdlib_qlacpy( 'FULL', i, m, work, i, a( 1, j1 ), lda ) + call stdlib_qgemm( 'N', 'N', i, m, m, one, b( 1, j1 ), ldb, ir,ldst, zero, work, & + i ) + call stdlib_qlacpy( 'FULL', i, m, work, i, b( 1, j1 ), ldb ) + end if + ! exit with info = 0 if swap was successfully performed. + return + end if + ! exit with info = 1 if swap was rejected. + 70 continue + info = 1 + return + end subroutine stdlib_qtgex2 + + !> DTGEXC: reorders the generalized real Schur decomposition of a real + !> matrix pair (A,B) using an orthogonal equivalence transformation + !> (A, B) = Q * (A, B) * Z**T, + !> so that the diagonal block of (A, B) with row index IFST is moved + !> to row ILST. + !> (A, B) must be in generalized real Schur canonical form (as returned + !> by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 + !> diagonal blocks. B is upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T + !> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T + + pure subroutine stdlib_qtgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(inout) :: ifst, ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldq, ldz, lwork, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: here, lwmin, nbf, nbl, nbnext + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input arguments. + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -3 + else if( ldan ) then + info = -12 + else if( ilst<1 .or. ilst>n ) then + info = -13 + end if + if( info==0 ) then + if( n<=1 ) then + lwmin = 1 + else + lwmin = 4*n + 16 + end if + work(1) = lwmin + if (lwork1 ) then + if( a( ifst, ifst-1 )/=zero )ifst = ifst - 1 + end if + nbf = 1 + if( ifst1 ) then + if( a( ilst, ilst-1 )/=zero )ilst = ilst - 1 + end if + nbl = 1 + if( ilst=3 ) then + if( a( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, & + nbnext, nbf, work, lwork,info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - nbnext + ! test if 2-by-2 block breaks into two 1-by-1 blocks. + if( nbf==2 ) then + if( a( here+1, here )==zero )nbf = 3 + end if + else + ! current block consists of two 1-by-1 blocks, each of which + ! must be swapped individually. + nbnext = 1 + if( here>=3 ) then + if( a( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, & + nbnext, 1, work, lwork,info ) + if( info/=0 ) then + ilst = here + return + end if + if( nbnext==1 ) then + ! swap two 1-by-1 blocks. + call stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, & + nbnext, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + else + ! recompute nbnext in case of 2-by-2 split. + if( a( here, here-1 )==zero )nbnext = 1 + if( nbnext==2 ) then + ! 2-by-2 block did not split. + call stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here-1,& + 2, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 2 + else + ! 2-by-2 block did split. + call stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, & + 1, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + call stdlib_qtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, & + 1, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + end if + end if + end if + if( here>ilst )go to 20 + end if + ilst = here + work( 1 ) = lwmin + return + end subroutine stdlib_qtgexc + + !> DTGSEN: reorders the generalized real Schur decomposition of a real + !> matrix pair (A, B) (in terms of an orthonormal equivalence trans- + !> formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the upper quasi-triangular + !> matrix A and the upper triangular B. The leading columns of Q and + !> Z form orthonormal bases of the corresponding left and right eigen- + !> spaces (deflating subspaces). (A, B) must be in generalized real + !> Schur canonical form (as returned by DGGES), i.e. A is block upper + !> triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper + !> triangular. + !> DTGSEN also computes the generalized eigenvalues + !> w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, DTGSEN computes the estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + + pure subroutine stdlib_qtgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, alphai, & + beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(out) :: pl, pr + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(qp), intent(out) :: alphai(*), alphar(*), beta(*), dif(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, pair, swap, wantd, wantd1, wantd2, wantp + integer(ilp) :: i, ierr, ijb, k, kase, kk, ks, liwmin, lwmin, mn2, n1, n2 + real(qp) :: dscale, dsum, eps, rdscal, smlnum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! decode and test the input parameters + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ijob<0 .or. ijob>5 ) then + info = -1 + else if( n<0 ) then + info = -5 + else if( lda=4 + wantd1 = ijob==2 .or. ijob==4 + wantd2 = ijob==3 .or. ijob==5 + wantd = wantd1 .or. wantd2 + ! set m to the dimension of the specified pair of deflating + ! subspaces. + m = 0 + pair = .false. + if( .not.lquery .or. ijob/=0 ) then + do k = 1, n + if( pair ) then + pair = .false. + else + if( k0 ) then + ! swap is rejected: exit. + info = 1 + if( wantp ) then + pl = zero + pr = zero + end if + if( wantd ) then + dif( 1 ) = zero + dif( 2 ) = zero + end if + go to 60 + end if + if( pair )ks = ks + 1 + end if + end if + end do loop_30 + if( wantp ) then + ! solve generalized sylvester equation for r and l + ! and compute pl and pr. + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + call stdlib_qlacpy( 'FULL', n1, n2, a( 1, i ), lda, work, n1 ) + call stdlib_qlacpy( 'FULL', n1, n2, b( 1, i ), ldb, work( n1*n2+1 ),n1 ) + call stdlib_qtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,& + i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-2*n1*n2, & + iwork, ierr ) + ! estimate the reciprocal of norms of "projections" onto left + ! and right eigenspaces. + rdscal = zero + dsum = one + call stdlib_qlassq( n1*n2, work, 1, rdscal, dsum ) + pl = rdscal*sqrt( dsum ) + if( pl==zero ) then + pl = one + else + pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) ) + end if + rdscal = zero + dsum = one + call stdlib_qlassq( n1*n2, work( n1*n2+1 ), 1, rdscal, dsum ) + pr = rdscal*sqrt( dsum ) + if( pr==zero ) then + pr = one + else + pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) ) + end if + end if + if( wantd ) then + ! compute estimates of difu and difl. + if( wantd1 ) then + n1 = m + n2 = n - m + i = n1 + 1 + ijb = idifjb + ! frobenius norm-based difu-estimate. + call stdlib_qtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(& + i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1 ), work( 2*n1*n2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + ! frobenius norm-based difl-estimate. + call stdlib_qtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),& + ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2 ), work( 2*n1*n2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + else + ! compute 1-norm-based estimates of difu and difl using + ! reversed communication with stdlib_qlacn2. in each step a + ! generalized sylvester equation or a transposed variant + ! is solved. + kase = 0 + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + mn2 = 2*n1*n2 + ! 1-norm-based estimate of difu. + 40 continue + call stdlib_qlacn2( mn2, work( mn2+1 ), work, iwork, dif( 1 ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation. + call stdlib_qtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_qtgsyl( 'T', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 40 + end if + dif( 1 ) = dscale / dif( 1 ) + ! 1-norm-based estimate of difl. + 50 continue + call stdlib_qlacn2( mn2, work( mn2+1 ), work, iwork, dif( 2 ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation. + call stdlib_qtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_qtgsyl( 'T', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 50 + end if + dif( 2 ) = dscale / dif( 2 ) + end if + end if + 60 continue + ! compute generalized eigenvalues of reordered pair (a, b) and + ! normalize the generalized schur form. + pair = .false. + loop_80: do k = 1, n + if( pair ) then + pair = .false. + else + if( k DTGSJA: computes the generalized singular value decomposition (GSVD) + !> of two real upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine DGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), + !> where U, V and Q are orthogonal matrices. + !> R is a nonsingular upper triangular matrix, and D1 and D2 are + !> ``diagonal'' matrices, which are of the following structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the orthogonal transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + + pure subroutine stdlib_qtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobq, jobu, jobv + integer(ilp), intent(out) :: info, ncycle + integer(ilp), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p + real(qp), intent(in) :: tola, tolb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) + real(qp), intent(out) :: alpha(*), beta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + real(qp), parameter :: hugenum = huge(zero) + + + ! Local Scalars + logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv + integer(ilp) :: i, j, kcycle + real(qp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, & + ssmin + ! Intrinsic Functions + intrinsic :: abs,max,min,huge + ! Executable Statements + ! decode and test the input parameters + initu = stdlib_lsame( jobu, 'I' ) + wantu = initu .or. stdlib_lsame( jobu, 'U' ) + initv = stdlib_lsame( jobv, 'I' ) + wantv = initv .or. stdlib_lsame( jobv, 'V' ) + initq = stdlib_lsame( jobq, 'I' ) + wantq = initq .or. stdlib_lsame( jobq, 'Q' ) + info = 0 + if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -1 + else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -2 + else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( p<0 ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda=-hugenum) ) then + ! change sign if necessary + if( gamma=beta( k+i ) ) then + call stdlib_qscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) + else + call stdlib_qscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) + call stdlib_qcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + else + alpha( k+i ) = zero + beta( k+i ) = one + call stdlib_qcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + end do + ! post-assignment + do i = m + 1, k + l + alpha( i ) = zero + beta( i ) = one + end do + if( k+l DTGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B) in + !> generalized real Schur canonical form (or of any matrix pair + !> (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where + !> Z**T denotes the transpose of Z. + !> (A, B) must be in generalized real Schur form (as returned by DGGES), + !> i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal + !> blocks. B is upper triangular. + + pure subroutine stdlib_qtgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, & + dif, mm, m, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*) + real(qp), intent(out) :: dif(*), s(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: difdri = 3 + + + ! Local Scalars + logical(lk) :: lquery, pair, somcon, wantbh, wantdf, wants + integer(ilp) :: i, ierr, ifst, ilst, iz, k, ks, lwmin, n1, n2 + real(qp) :: alphai, alphar, alprqt, beta, c1, c2, cond, eps, lnrm, rnrm, root1, root2, & + scale, smlnum, tmpii, tmpir, tmpri, tmprr, uhav, uhavi, uhbv, uhbvi + ! Local Arrays + real(qp) :: dummy(1), dummy1(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantdf = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.wants .and. .not.wantdf ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lda0 ) then + ! ill-conditioned problem - swap rejected. + dif( ks ) = zero + else + ! reordering successful, solve generalized sylvester + ! equation for r and l, + ! a22 * r - l * a11 = a12 + ! b22 * r - l * b11 = b12, + ! and compute estimate of difl((a11,b11), (a22, b22)). + n1 = 1 + if( work( 2 )/=zero )n1 = 2 + n2 = n - n1 + if( n2==0 ) then + dif( ks ) = cond + else + i = n*n + 1 + iz = 2*n*n + 1 + call stdlib_qtgsyl( 'N', difdri, n2, n1, work( n*n1+n1+1 ),n, work, n, & + work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, & + dif( ks ),work( iz+1 ), lwork-2*n*n, iwork, ierr ) + if( pair )dif( ks ) = min( max( one, alprqt )*dif( ks ),cond ) + end if + end if + if( pair )dif( ks+1 ) = dif( ks ) + end if + if( pair )ks = ks + 1 + end do loop_20 + work( 1 ) = lwmin + return + end subroutine stdlib_qtgsna + + !> DTGSY2: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F, + !> using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, + !> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, + !> N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) + !> must be in generalized Schur canonical form, i.e. A, B are upper + !> quasi triangular and D, E are upper triangular. The solution (R, L) + !> overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor + !> chosen to avoid overflow. + !> In matrix notation solving equation (1) corresponds to solve + !> Z*x = scale*b, where Z is defined as + !> Z = [ kron(In, A) -kron(B**T, Im) ] (2) + !> [ kron(In, D) -kron(E**T, Im) ], + !> Ik is the identity matrix of size k and X**T is the transpose of X. + !> kron(X, Y) is the Kronecker product between the matrices X and Y. + !> In the process of solving (1), we solve a number of such systems + !> where Dim(In), Dim(In) = 1 or 2. + !> If TRANS = 'T', solve the transposed system Z**T*y = scale*b for y, + !> which is equivalent to solve for R and L in + !> A**T * R + D**T * L = scale * C (3) + !> R * B**T + L * E**T = scale * -F + !> This case is used to compute an estimate of Dif[(A, D), (B, E)] = + !> sigma_min(Z) using reverse communication with DLACON. + !> DTGSY2 also (IJOB >= 1) contributes to the computation in DTGSYL + !> of an upper bound on the separation between to matrix pairs. Then + !> the input (A, D), (B, E) are sub-pencils of the matrix pair in + !> DTGSYL. See DTGSYL for details. + + pure subroutine stdlib_qtgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, rdsum, rdscal,iwork, pq, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n + integer(ilp), intent(out) :: info, pq + real(qp), intent(inout) :: rdscal, rdsum + real(qp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + real(qp), intent(inout) :: c(ldc,*), f(ldf,*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_qcopy by calls to stdlib_qlaset. + ! sven hammarling, 27/5/02. + ! Parameters + integer(ilp), parameter :: ldz = 8 + + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ie, ierr, ii, is, isp1, j, je, jj, js, jsp1, k, mb, nb, p, q, & + zdim + real(qp) :: alpha, scaloc + ! Local Arrays + integer(ilp) :: ipiv(ldz), jpiv(ldz) + real(qp) :: rhs(ldz), z(ldz,ldz) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input parameters + info = 0 + ierr = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>2 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( ldam )go to 20 + p = p + 1 + iwork( p ) = i + if( i==m )go to 20 + if( a( i+1, i )/=zero ) then + i = i + 2 + else + i = i + 1 + end if + go to 10 + 20 continue + iwork( p+1 ) = m + 1 + ! determine block structure of b + q = p + 1 + j = 1 + 30 continue + if( j>n )go to 40 + q = q + 1 + iwork( q ) = j + if( j==n )go to 40 + if( b( j+1, j )/=zero ) then + j = j + 2 + else + j = j + 1 + end if + go to 30 + 40 continue + iwork( q+1 ) = n + 1 + pq = p*( q-p-1 ) + if( notran ) then + ! solve (i, j) - subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q + scale = one + scaloc = one + loop_120: do j = p + 2, q + js = iwork( j ) + jsp1 = js + 1 + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_110: do i = p, 1, -1 + is = iwork( i ) + isp1 = is + 1 + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + zdim = mb*nb*2 + if( ( mb==1 ) .and. ( nb==1 ) ) then + ! build a 2-by-2 system z * x = rhs + z( 1, 1 ) = a( is, is ) + z( 2, 1 ) = d( is, is ) + z( 1, 2 ) = -b( js, js ) + z( 2, 2 ) = -e( js, js ) + ! set up right hand side(s) + rhs( 1 ) = c( is, js ) + rhs( 2 ) = f( is, js ) + ! solve z * x = rhs + call stdlib_qgetc2( zdim, z, ldz, ipiv, jpiv, ierr ) + if( ierr>0 )info = ierr + if( ijob==0 ) then + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + else + call stdlib_qlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv ) + + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + f( is, js ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + alpha = -rhs( 1 ) + call stdlib_qaxpy( is-1, alpha, a( 1, is ), 1, c( 1, js ),1 ) + call stdlib_qaxpy( is-1, alpha, d( 1, is ), 1, f( 1, js ),1 ) + end if + if( j0 )info = ierr + if( ijob==0 ) then + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + else + call stdlib_qlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv ) + + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( is, jsp1 ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( is, jsp1 ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + call stdlib_qger( is-1, nb, -one, a( 1, is ), 1, rhs( 1 ),1, c( 1, js ),& + ldc ) + call stdlib_qger( is-1, nb, -one, d( 1, is ), 1, rhs( 1 ),1, f( 1, js ),& + ldf ) + end if + if( j0 )info = ierr + if( ijob==0 ) then + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + else + call stdlib_qlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv ) + + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( isp1, js ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( isp1, js ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + call stdlib_qgemv( 'N', is-1, mb, -one, a( 1, is ), lda,rhs( 1 ), 1, & + one, c( 1, js ), 1 ) + call stdlib_qgemv( 'N', is-1, mb, -one, d( 1, is ), ldd,rhs( 1 ), 1, & + one, f( 1, js ), 1 ) + end if + if( j0 )info = ierr + if( ijob==0 ) then + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + else + call stdlib_qlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv ) + + end if + ! unpack solution vector(s) + k = 1 + ii = mb*nb + 1 + do jj = 0, nb - 1 + call stdlib_qcopy( mb, rhs( k ), 1, c( is, js+jj ), 1 ) + call stdlib_qcopy( mb, rhs( ii ), 1, f( is, js+jj ), 1 ) + k = k + mb + ii = ii + mb + end do + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + call stdlib_qgemm( 'N', 'N', is-1, nb, mb, -one,a( 1, is ), lda, rhs( 1 & + ), mb, one,c( 1, js ), ldc ) + call stdlib_qgemm( 'N', 'N', is-1, nb, mb, -one,d( 1, is ), ldd, rhs( 1 & + ), mb, one,f( 1, js ), ldf ) + end if + if( j0 )info = ierr + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + f( is, js ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + alpha = rhs( 1 ) + call stdlib_qaxpy( js-1, alpha, b( 1, js ), 1, f( is, 1 ),ldf ) + alpha = rhs( 2 ) + call stdlib_qaxpy( js-1, alpha, e( 1, js ), 1, f( is, 1 ),ldf ) + end if + if( i

0 )info = ierr + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( is, jsp1 ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( is, jsp1 ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_qaxpy( js-1, rhs( 1 ), b( 1, js ), 1,f( is, 1 ), ldf ) + + call stdlib_qaxpy( js-1, rhs( 2 ), b( 1, jsp1 ), 1,f( is, 1 ), ldf ) + + call stdlib_qaxpy( js-1, rhs( 3 ), e( 1, js ), 1,f( is, 1 ), ldf ) + + call stdlib_qaxpy( js-1, rhs( 4 ), e( 1, jsp1 ), 1,f( is, 1 ), ldf ) + + end if + if( i

0 )info = ierr + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( isp1, js ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( isp1, js ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_qger( mb, js-1, one, rhs( 1 ), 1, b( 1, js ),1, f( is, 1 ), & + ldf ) + call stdlib_qger( mb, js-1, one, rhs( 3 ), 1, e( 1, js ),1, f( is, 1 ), & + ldf ) + end if + if( i

0 )info = ierr + call stdlib_qgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + k = 1 + ii = mb*nb + 1 + do jj = 0, nb - 1 + call stdlib_qcopy( mb, rhs( k ), 1, c( is, js+jj ), 1 ) + call stdlib_qcopy( mb, rhs( ii ), 1, f( is, js+jj ), 1 ) + k = k + mb + ii = ii + mb + end do + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_qgemm( 'N', 'T', mb, js-1, nb, one,c( is, js ), ldc, b( 1, & + js ), ldb, one,f( is, 1 ), ldf ) + call stdlib_qgemm( 'N', 'T', mb, js-1, nb, one,f( is, js ), ldf, e( 1, & + js ), lde, one,f( is, 1 ), ldf ) + end if + if( i

DTGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with real entries. (A, D) and (B, E) must be in + !> generalized (real) Schur canonical form, i.e. A, B are upper quasi + !> triangular and D, E are upper triangular. + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output + !> scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale b, where + !> Z is defined as + !> Z = [ kron(In, A) -kron(B**T, Im) ] (2) + !> [ kron(In, D) -kron(E**T, Im) ]. + !> Here Ik is the identity matrix of size k and X**T is the transpose of + !> X. kron(X, Y) is the Kronecker product between the matrices X and Y. + !> If TRANS = 'T', DTGSYL solves the transposed system Z**T*y = scale*b, + !> which is equivalent to solve for R and L in + !> A**T * R + D**T * L = scale * C (3) + !> R * B**T + L * E**T = scale * -F + !> This case (TRANS = 'T') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using DLACON. + !> If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate + !> of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. See [1-2] for more + !> information. + !> This is a level 3 BLAS algorithm. + + pure subroutine stdlib_qtgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, dif, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n + integer(ilp), intent(out) :: info + real(qp), intent(out) :: dif, scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + real(qp), intent(inout) :: c(ldc,*), f(ldf,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_qcopy by calls to stdlib_qlaset. + ! sven hammarling, 1/5/02. + + ! Local Scalars + logical(lk) :: lquery, notran + integer(ilp) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, & + p, ppqq, pq, q + real(qp) :: dscale, dsum, scale2, scaloc + ! Intrinsic Functions + intrinsic :: real,max,sqrt + ! Executable Statements + ! decode and test input parameters + info = 0 + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>4 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda=3 ) then + ifunc = ijob - 2 + call stdlib_qlaset( 'F', m, n, zero, zero, c, ldc ) + call stdlib_qlaset( 'F', m, n, zero, zero, f, ldf ) + else if( ijob>=1 ) then + isolve = 2 + end if + end if + if( ( mb<=1 .and. nb<=1 ) .or. ( mb>=m .and. nb>=n ) )then + loop_30: do iround = 1, isolve + ! use unblocked level 2 solver + dscale = zero + dsum = one + pq = 0 + call stdlib_qtgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,& + ldf, scale, dsum, dscale,iwork, pq, info ) + if( dscale/=zero ) then + if( ijob==1 .or. ijob==3 ) then + dif = sqrt( real( 2*m*n,KIND=qp) ) / ( dscale*sqrt( dsum ) ) + else + dif = sqrt( real( pq,KIND=qp) ) / ( dscale*sqrt( dsum ) ) + end if + end if + if( isolve==2 .and. iround==1 ) then + if( notran ) then + ifunc = ijob + end if + scale2 = scale + call stdlib_qlacpy( 'F', m, n, c, ldc, work, m ) + call stdlib_qlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m ) + call stdlib_qlaset( 'F', m, n, zero, zero, c, ldc ) + call stdlib_qlaset( 'F', m, n, zero, zero, f, ldf ) + else if( isolve==2 .and. iround==2 ) then + call stdlib_qlacpy( 'F', m, n, work, m, c, ldc ) + call stdlib_qlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf ) + scale = scale2 + end if + end do loop_30 + return + end if + ! determine block structure of a + p = 0 + i = 1 + 40 continue + if( i>m )go to 50 + p = p + 1 + iwork( p ) = i + i = i + mb + if( i>=m )go to 50 + if( a( i, i-1 )/=zero )i = i + 1 + go to 40 + 50 continue + iwork( p+1 ) = m + 1 + if( iwork( p )==iwork( p+1 ) )p = p - 1 + ! determine block structure of b + q = p + 1 + j = 1 + 60 continue + if( j>n )go to 70 + q = q + 1 + iwork( q ) = j + j = j + nb + if( j>=n )go to 70 + if( b( j, j-1 )/=zero )j = j + 1 + go to 60 + 70 continue + iwork( q+1 ) = n + 1 + if( iwork( q )==iwork( q+1 ) )q = q - 1 + if( notran ) then + loop_150: do iround = 1, isolve + ! solve (i, j)-subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1,..., 1; j = 1, 2,..., q + dscale = zero + dsum = one + pq = 0 + scale = one + loop_130: do j = p + 2, q + js = iwork( j ) + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_120: do i = p, 1, -1 + is = iwork( i ) + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + ppqq = 0 + call stdlib_qtgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), & + ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, & + scaloc, dsum, dscale,iwork( q+2 ), ppqq, linfo ) + if( linfo>0 )info = linfo + pq = pq + ppqq + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_qscal( is-1, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( is-1, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_qscal( m-ie, scaloc, c( ie+1, k ), 1 ) + call stdlib_qscal( m-ie, scaloc, f( ie+1, k ), 1 ) + end do + do k = je + 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + call stdlib_qgemm( 'N', 'N', is-1, nb, mb, -one,a( 1, is ), lda, c( is, & + js ), ldc, one,c( 1, js ), ldc ) + call stdlib_qgemm( 'N', 'N', is-1, nb, mb, -one,d( 1, is ), ldd, c( is, & + js ), ldc, one,f( 1, js ), ldf ) + end if + if( j0 )info = linfo + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_qscal( is-1, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( is-1, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_qscal( m-ie, scaloc, c( ie+1, k ), 1 ) + call stdlib_qscal( m-ie, scaloc, f( ie+1, k ), 1 ) + end do + do k = je + 1, n + call stdlib_qscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_qscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! substitute r(i, j) and l(i, j) into remaining equation. + if( j>p+2 ) then + call stdlib_qgemm( 'N', 'T', mb, js-1, nb, one, c( is, js ),ldc, b( 1, js )& + , ldb, one, f( is, 1 ),ldf ) + call stdlib_qgemm( 'N', 'T', mb, js-1, nb, one, f( is, js ),ldf, e( 1, js )& + , lde, one, f( is, 1 ),ldf ) + end if + if( i

DTPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_qtpcon( norm, uplo, diag, n, ap, rcond, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: ap(*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTPCON', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + rcond = one + return + end if + rcond = zero + smlnum = stdlib_qlamch( 'SAFE MINIMUM' )*real( max( 1, n ),KIND=qp) + ! compute the norm of the triangular matrix a. + anorm = stdlib_qlantp( norm, uplo, diag, n, ap, work ) + ! continue only if anorm > 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_qlatps( uplo, 'NO TRANSPOSE', diag, normin, n, ap,work, scale, & + work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_qlatps( uplo, 'TRANSPOSE', diag, normin, n, ap,work, scale, work( & + 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_iqamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale DTPLQT: computes a blocked LQ factorization of a real + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_qtplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, mb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, nb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( mb<1 .or. (mb>m .and. m>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_qtplqt2( ib, nb, lb, a(i,i), lda, b( i, 1 ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(i+ib:m,:) from the right + if( i+ib<=m ) then + call stdlib_qtprfb( 'R', 'N', 'F', 'R', m-i-ib+1, nb, ib, lb,b( i, 1 ), ldb, t( & + 1, i ), ldt,a( i+ib, i ), lda, b( i+ib, 1 ), ldb,work, m-i-ib+1) + end if + end do + return + end subroutine stdlib_qtplqt + + !> DTPLQT2: computes a LQ a factorization of a real "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_qtplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + real(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda DTPMQRT applies a real orthogonal matrix Q obtained from a + !> "triangular-pentagonal" real block reflector H to a general + !> real matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_qtpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, mb, ldt + ! Array Arguments + real(qp), intent(in) :: v(ldv,*), t(ldt,*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, nb, lb, kf, ldaq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldaq = max( 1, k ) + else if ( right ) then + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( mb<1 .or. (mb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_qtprfb( 'L', 'T', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + do i = 1, k, mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_qtprfb( 'R', 'N', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. tran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_qtprfb( 'L', 'N', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_qtprfb( 'R', 'T', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_qtpmlqt + + !> DTPMQRT: applies a real orthogonal matrix Q obtained from a + !> "triangular-pentagonal" real block reflector H to a general + !> real matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_qtpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt + ! Array Arguments + real(qp), intent(in) :: v(ldv,*), t(ldt,*) + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, mb, lb, kf, ldaq, ldvq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldvq = max( 1, m ) + ldaq = max( 1, k ) + else if ( right ) then + ldvq = max( 1, n ) + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( nb<1 .or. (nb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_qtprfb( 'L', 'T', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + do i = 1, k, nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_qtprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. notran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_qtprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_qtprfb( 'R', 'T', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_qtpmqrt + + !> DTPQRT: computes a blocked QR factorization of a real + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_qtpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, nb + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, mb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( nb<1 .or. (nb>n .and. n>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_qtpqrt2( mb, ib, lb, a(i,i), lda, b( 1, i ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(:,i+ib:n) from the left + if( i+ib<=n ) then + call stdlib_qtprfb( 'L', 'T', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1, i ), ldb, t( & + 1, i ), ldt,a( i, i+ib ), lda, b( 1, i+ib ), ldb,work, ib ) + end if + end do + return + end subroutine stdlib_qtpqrt + + !> DTPQRT2: computes a QR factorization of a real "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_qtpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + real(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda DTPRFB: applies a real "triangular-pentagonal" block reflector H or its + !> transpose H**T to a real matrix C, which is composed of two + !> blocks A and B, either from the left or right. + + pure subroutine stdlib_qtprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*), b(ldb,*) + real(qp), intent(in) :: t(ldt,*), v(ldv,*) + real(qp), intent(out) :: work(ldwork,*) + ! ========================================================================== + + ! Local Scalars + integer(ilp) :: i, j, mp, np, kp + logical(lk) :: left, forward, column, right, backward, row + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return + if( stdlib_lsame( storev, 'C' ) ) then + column = .true. + row = .false. + else if ( stdlib_lsame( storev, 'R' ) ) then + column = .false. + row = .true. + else + column = .false. + row = .false. + end if + if( stdlib_lsame( side, 'L' ) ) then + left = .true. + right = .false. + else if( stdlib_lsame( side, 'R' ) ) then + left = .false. + right = .true. + else + left = .false. + right = .false. + end if + if( stdlib_lsame( direct, 'F' ) ) then + forward = .true. + backward = .false. + else if( stdlib_lsame( direct, 'B' ) ) then + forward = .false. + backward = .true. + else + forward = .false. + backward = .false. + end if + ! --------------------------------------------------------------------------- + if( column .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (m-by-k) + ! form h c or h**t c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - t (a + v**t b) or a = a - t**t (a + v**t b) + ! b = b - v t (a + v**t b) or b = b - v t**t (a + v**t b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'U', 'T', 'N', l, n, one, v( mp, 1 ), ldv,work, ldwork ) + + call stdlib_qgemm( 'T', 'N', l, n, m-l, one, v, ldv, b, ldb,one, work, ldwork ) + + call stdlib_qgemm( 'T', 'N', k-l, n, m, one, v( 1, kp ), ldv,b, ldb, zero, work( kp,& + 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'N', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb ) + + call stdlib_qgemm( 'N', 'N', l, n, k-l, -one, v( mp, kp ), ldv,work( kp, 1 ), & + ldwork, one, b( mp, 1 ), ldb ) + call stdlib_qtrmm( 'L', 'U', 'N', 'N', l, n, one, v( mp, 1 ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (n-by-k) + ! form c h or c h**t where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - (a + b v) t or a = a - (a + b v) t**t + ! b = b - (a + b v) t v**t or b = b - (a + b v) t**t v**t + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_qtrmm( 'R', 'U', 'N', 'N', m, l, one, v( np, 1 ), ldv,work, ldwork ) + + call stdlib_qgemm( 'N', 'N', m, l, n-l, one, b, ldb,v, ldv, one, work, ldwork ) + + call stdlib_qgemm( 'N', 'N', m, k-l, n, one, b, ldb,v( 1, kp ), ldv, zero, work( 1, & + kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_qgemm( 'N', 'T', m, l, k-l, -one, work( 1, kp ), ldwork,v( np, kp ), & + ldv, one, b( 1, np ), ldb ) + call stdlib_qtrmm( 'R', 'U', 'T', 'N', m, l, one, v( np, 1 ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (m-by-k) + ! [ i ] (k-by-k) + ! form h c or h**t c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - t (a + v**t b) or a = a - t**t (a + v**t b) + ! b = b - v t (a + v**t b) or b = b - v t**t (a + v**t b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'L', 'T', 'N', l, n, one, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_qgemm( 'T', 'N', l, n, m-l, one, v( mp, kp ), ldv,b( mp, 1 ), ldb, one, & + work( kp, 1 ), ldwork ) + call stdlib_qgemm( 'T', 'N', k-l, n, m, one, v, ldv,b, ldb, zero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'L', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'N', 'N', m-l, n, k, -one, v( mp, 1 ), ldv,work, ldwork, one, b( & + mp, 1 ), ldb ) + call stdlib_qgemm( 'N', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb ) + + call stdlib_qtrmm( 'L', 'L', 'N', 'N', l, n, one, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (n-by-k) + ! [ i ] (k-by-k) + ! form c h or c h**t where c = [ b a ] (b is m-by-n, a is m-by-k) + ! h = i - w t w**t or h**t = i - w t**t w**t + ! a = a - (a + b v) t or a = a - (a + b v) t**t + ! b = b - (a + b v) t v**t or b = b - (a + b v) t**t v**t + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_qtrmm( 'R', 'L', 'N', 'N', m, l, one, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_qgemm( 'N', 'N', m, l, n-l, one, b( 1, np ), ldb,v( np, kp ), ldv, one, & + work( 1, kp ), ldwork ) + call stdlib_qgemm( 'N', 'N', m, k-l, n, one, b, ldb,v, ldv, zero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v( np, 1 ), ldv, one, b( & + 1, np ), ldb ) + call stdlib_qgemm( 'N', 'T', m, l, k-l, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_qtrmm( 'R', 'L', 'T', 'N', m, l, one, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**t c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - t (a + v b) or a = a - t**t (a + v b) + ! b = b - v**t t (a + v b) or b = b - v**t t**t (a + v b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'L', 'N', 'N', l, n, one, v( 1, mp ), ldv,work, ldb ) + + call stdlib_qgemm( 'N', 'N', l, n, m-l, one, v, ldv,b, ldb,one, work, ldwork ) + + call stdlib_qgemm( 'N', 'N', k-l, n, m, one, v( kp, 1 ), ldv,b, ldb, zero, work( kp,& + 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'T', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb ) + + call stdlib_qgemm( 'T', 'N', l, n, k-l, -one, v( kp, mp ), ldv,work( kp, 1 ), & + ldwork, one, b( mp, 1 ), ldb ) + call stdlib_qtrmm( 'L', 'L', 'T', 'N', l, n, one, v( 1, mp ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**t where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - (a + b v**t) t or a = a - (a + b v**t) t**t + ! b = b - (a + b v**t) t v or b = b - (a + b v**t) t**t v + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_qtrmm( 'R', 'L', 'T', 'N', m, l, one, v( 1, np ), ldv,work, ldwork ) + + call stdlib_qgemm( 'N', 'T', m, l, n-l, one, b, ldb, v, ldv,one, work, ldwork ) + + call stdlib_qgemm( 'N', 'T', m, k-l, n, one, b, ldb,v( kp, 1 ), ldv, zero, work( 1, & + kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_qgemm( 'N', 'N', m, l, k-l, -one, work( 1, kp ), ldwork,v( kp, np ), & + ldv, one, b( 1, np ), ldb ) + call stdlib_qtrmm( 'R', 'L', 'N', 'N', m, l, one, v( 1, np ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**t c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - t (a + v b) or a = a - t**t (a + v b) + ! b = b - v**t t (a + v b) or b = b - v**t t**t (a + v b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'U', 'N', 'N', l, n, one, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_qgemm( 'N', 'N', l, n, m-l, one, v( kp, mp ), ldv,b( mp, 1 ), ldb, one, & + work( kp, 1 ), ldwork ) + call stdlib_qgemm( 'N', 'N', k-l, n, m, one, v, ldv, b, ldb,zero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'L', 'L ', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'T', 'N', m-l, n, k, -one, v( 1, mp ), ldv,work, ldwork, one, b( & + mp, 1 ), ldb ) + call stdlib_qgemm( 'T', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb ) + + call stdlib_qtrmm( 'L', 'U', 'T', 'N', l, n, one, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**t where c = [ b a ] (a is m-by-k, b is m-by-n) + ! h = i - w**t t w or h**t = i - w**t t**t w + ! a = a - (a + b v**t) t or a = a - (a + b v**t) t**t + ! b = b - (a + b v**t) t v or b = b - (a + b v**t) t**t v + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_qtrmm( 'R', 'U', 'T', 'N', m, l, one, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_qgemm( 'N', 'T', m, l, n-l, one, b( 1, np ), ldb,v( kp, np ), ldv, one, & + work( 1, kp ), ldwork ) + call stdlib_qgemm( 'N', 'T', m, k-l, n, one, b, ldb, v, ldv,zero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_qtrmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_qgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v( 1, np ), ldv, one, b( & + 1, np ), ldb ) + call stdlib_qgemm( 'N', 'N', m, l, k-l , -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_qtrmm( 'R', 'U', 'N', 'N', m, l, one, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + end if + return + end subroutine stdlib_qtprfb + + !> DTPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by DTPTRS or some other + !> means before entering this routine. DTPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_qtprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: ap(*), b(ldb,*), x(ldx,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, kc, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_qtpsv( uplo, transt, diag, n, ap, work( n+1 ), 1 ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_qtpsv( uplo, trans, diag, n, ap, work( n+1 ), 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_qtprfs + + !> DTPTRI: computes the inverse of a real upper or lower triangular + !> matrix A stored in packed format. + + pure subroutine stdlib_qtptri( uplo, diag, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc, jclast, jj + real(qp) :: ajj + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTPTRI', -info ) + return + end if + ! check for singularity if non-unit. + if( nounit ) then + if( upper ) then + jj = 0 + do info = 1, n + jj = jj + info + if( ap( jj )==zero )return + end do + else + jj = 1 + do info = 1, n + if( ap( jj )==zero )return + jj = jj + n - info + 1 + end do + end if + info = 0 + end if + if( upper ) then + ! compute inverse of upper triangular matrix. + jc = 1 + do j = 1, n + if( nounit ) then + ap( jc+j-1 ) = one / ap( jc+j-1 ) + ajj = -ap( jc+j-1 ) + else + ajj = -one + end if + ! compute elements 1:j-1 of j-th column. + call stdlib_qtpmv( 'UPPER', 'NO TRANSPOSE', diag, j-1, ap,ap( jc ), 1 ) + call stdlib_qscal( j-1, ajj, ap( jc ), 1 ) + jc = jc + j + end do + else + ! compute inverse of lower triangular matrix. + jc = n*( n+1 ) / 2 + do j = n, 1, -1 + if( nounit ) then + ap( jc ) = one / ap( jc ) + ajj = -ap( jc ) + else + ajj = -one + end if + if( j DTPTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + + pure subroutine stdlib_qtptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(qp), intent(in) :: ap(*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldb DTPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + + pure subroutine stdlib_qtpttf( transr, uplo, n, ap, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: ap(0:*) + real(qp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTPTTF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + arf( 0 ) = ap( 0 ) + else + arf( 0 ) = ap( 0 ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! n is odd, transr = 'n', and uplo = 'l' + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + else + ! n is odd, transr = 'n', and uplo = 'u' + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! n is odd, transr = 't', and uplo = 'l' + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! n is odd, transr = 't', and uplo = 'u' + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! n is even, transr = 'n', and uplo = 'l' + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + else + ! n is even, transr = 'n', and uplo = 'u' + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 't' + if( lower ) then + ! n is even, transr = 't', and uplo = 'l' + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! n is even, transr = 't', and uplo = 'u' + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_qtpttf + + !> DTPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + + pure subroutine stdlib_qtpttr( uplo, n, ap, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(qp), intent(out) :: a(lda,*) + real(qp), intent(in) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DTRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_qtrcon( norm, uplo, diag, n, a, lda, rcond, work,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_qlacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_qlatrs( uplo, 'NO TRANSPOSE', diag, normin, n, a,lda, work, scale,& + work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_qlatrs( uplo, 'TRANSPOSE', diag, normin, n, a, lda,work, scale, & + work( 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_iqamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale DTREVC: computes some or all of the right and/or left eigenvectors of + !> a real upper quasi-triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal blocks of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the orthogonal factor that reduces a matrix + !> A to Schur form T, then Q*X and Q*Y are the matrices of right and + !> left eigenvectors of A. + + pure subroutine stdlib_qtrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + real(qp), intent(in) :: t(ldt,*) + real(qp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, over, pair, rightv, somev + integer(ilp) :: i, ierr, ii, ip, is, j, j1, j2, jnxt, k, ki, n2 + real(qp) :: beta, bignum, emax, ovfl, rec, remax, scale, smin, smlnum, ulp, unfl, & + vcrit, vmax, wi, wr, xnorm + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Local Arrays + real(qp) :: x(2,2) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldtjnxt )cycle loop_60 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_qlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_qscal( ki, scale, work( 1+n ), 1 ) + work( j+n ) = x( 1, 1 ) + ! update right-hand side + call stdlib_qaxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_qlaln2( .false., 2, 1, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+n ), n, wr, zero, x, 2,scale, xnorm, ierr ) + ! scale x(1,1) and x(2,1) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 2, 1 ) = x( 2, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_qscal( ki, scale, work( 1+n ), 1 ) + work( j-1+n ) = x( 1, 1 ) + work( j+n ) = x( 2, 1 ) + ! update right-hand side + call stdlib_qaxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+n ), 1 ) + + call stdlib_qaxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + end if + end do loop_60 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_qcopy( ki, work( 1+n ), 1, vr( 1, is ), 1 ) + ii = stdlib_iqamax( ki, vr( 1, is ), 1 ) + remax = one / abs( vr( ii, is ) ) + call stdlib_qscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = zero + end do + else + if( ki>1 )call stdlib_qgemv( 'N', n, ki-1, one, vr, ldvr,work( 1+n ), 1, & + work( ki+n ),vr( 1, ki ), 1 ) + ii = stdlib_iqamax( n, vr( 1, ki ), 1 ) + remax = one / abs( vr( ii, ki ) ) + call stdlib_qscal( n, remax, vr( 1, ki ), 1 ) + end if + else + ! complex right eigenvector. + ! initial solve + ! [ (t(ki-1,ki-1) t(ki-1,ki) ) - (wr + i* wi)]*x = 0. + ! [ (t(ki,ki-1) t(ki,ki) ) ] + if( abs( t( ki-1, ki ) )>=abs( t( ki, ki-1 ) ) ) then + work( ki-1+n ) = one + work( ki+n2 ) = wi / t( ki-1, ki ) + else + work( ki-1+n ) = -wi / t( ki, ki-1 ) + work( ki+n2 ) = one + end if + work( ki+n ) = zero + work( ki-1+n2 ) = zero + ! form right-hand side + do k = 1, ki - 2 + work( k+n ) = -work( ki-1+n )*t( k, ki-1 ) + work( k+n2 ) = -work( ki+n2 )*t( k, ki ) + end do + ! solve upper quasi-triangular system: + ! (t(1:ki-2,1:ki-2) - (wr+i*wi))*x = scale*(work+i*work2) + jnxt = ki - 2 + loop_90: do j = ki - 2, 1, -1 + if( j>jnxt )cycle loop_90 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_qlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr, wi,x, 2, scale, xnorm, ierr ) + ! scale x(1,1) and x(1,2) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 1, 2 ) = x( 1, 2 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( ki, scale, work( 1+n ), 1 ) + call stdlib_qscal( ki, scale, work( 1+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + ! update the right-hand side + call stdlib_qaxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + call stdlib_qaxpy( j-1, -x( 1, 2 ), t( 1, j ), 1,work( 1+n2 ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_qlaln2( .false., 2, 2, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+n ), n, wr, wi, x, 2, scale,xnorm, ierr ) + ! scale x to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + rec = one / xnorm + x( 1, 1 ) = x( 1, 1 )*rec + x( 1, 2 ) = x( 1, 2 )*rec + x( 2, 1 ) = x( 2, 1 )*rec + x( 2, 2 ) = x( 2, 2 )*rec + scale = scale*rec + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( ki, scale, work( 1+n ), 1 ) + call stdlib_qscal( ki, scale, work( 1+n2 ), 1 ) + end if + work( j-1+n ) = x( 1, 1 ) + work( j+n ) = x( 2, 1 ) + work( j-1+n2 ) = x( 1, 2 ) + work( j+n2 ) = x( 2, 2 ) + ! update the right-hand side + call stdlib_qaxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+n ), 1 ) + + call stdlib_qaxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + call stdlib_qaxpy( j-2, -x( 1, 2 ), t( 1, j-1 ), 1,work( 1+n2 ), 1 ) + + call stdlib_qaxpy( j-2, -x( 2, 2 ), t( 1, j ), 1,work( 1+n2 ), 1 ) + + end if + end do loop_90 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_qcopy( ki, work( 1+n ), 1, vr( 1, is-1 ), 1 ) + call stdlib_qcopy( ki, work( 1+n2 ), 1, vr( 1, is ), 1 ) + emax = zero + do k = 1, ki + emax = max( emax, abs( vr( k, is-1 ) )+abs( vr( k, is ) ) ) + end do + remax = one / emax + call stdlib_qscal( ki, remax, vr( 1, is-1 ), 1 ) + call stdlib_qscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is-1 ) = zero + vr( k, is ) = zero + end do + else + if( ki>2 ) then + call stdlib_qgemv( 'N', n, ki-2, one, vr, ldvr,work( 1+n ), 1, work( ki-& + 1+n ),vr( 1, ki-1 ), 1 ) + call stdlib_qgemv( 'N', n, ki-2, one, vr, ldvr,work( 1+n2 ), 1, work( & + ki+n2 ),vr( 1, ki ), 1 ) + else + call stdlib_qscal( n, work( ki-1+n ), vr( 1, ki-1 ), 1 ) + call stdlib_qscal( n, work( ki+n2 ), vr( 1, ki ), 1 ) + end if + emax = zero + do k = 1, n + emax = max( emax, abs( vr( k, ki-1 ) )+abs( vr( k, ki ) ) ) + end do + remax = one / emax + call stdlib_qscal( n, remax, vr( 1, ki-1 ), 1 ) + call stdlib_qscal( n, remax, vr( 1, ki ), 1 ) + end if + end if + is = is - 1 + if( ip/=0 )is = is - 1 + 130 continue + if( ip==1 )ip = 0 + if( ip==-1 )ip = 1 + end do loop_140 + end if + if( leftv ) then + ! compute left eigenvectors. + ip = 0 + is = 1 + loop_260: do ki = 1, n + if( ip==-1 )go to 250 + if( ki==n )go to 150 + if( t( ki+1, ki )==zero )go to 150 + ip = 1 + 150 continue + if( somev ) then + if( .not.select( ki ) )go to 250 + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki+1 ) ) )*sqrt( abs( t( ki+1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! real left eigenvector. + work( ki+n ) = one + ! form right-hand side + do k = ki + 1, n + work( k+n ) = -t( ki, k ) + end do + ! solve the quasi-triangular system: + ! (t(ki+1:n,ki+1:n) - wr)**t*x = scale*work + vmax = one + vcrit = bignum + jnxt = ki + 1 + loop_170: do j = ki + 1, n + if( jvcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work( ki+n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_qdot( j-ki-1, t( ki+1, j ), 1,work( & + ki+1+n ), 1 ) + ! solve (t(j,j)-wr)**t*x = work + call stdlib_qlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_qscal( n-ki+1, scale, work( ki+n ), 1 ) + + work( j+n ) = x( 1, 1 ) + vmax = max( abs( work( j+n ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work( ki+n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_qdot( j-ki-1, t( ki+1, j ), 1,work( & + ki+1+n ), 1 ) + work( j+1+n ) = work( j+1+n ) -stdlib_qdot( j-ki-1, t( ki+1, j+1 ), 1,& + work( ki+1+n ), 1 ) + ! solve + ! [t(j,j)-wr t(j,j+1) ]**t * x = scale*( work1 ) + ! [t(j+1,j) t(j+1,j+1)-wr] ( work2 ) + call stdlib_qlaln2( .true., 2, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_qscal( n-ki+1, scale, work( ki+n ), 1 ) + + work( j+n ) = x( 1, 1 ) + work( j+1+n ) = x( 2, 1 ) + vmax = max( abs( work( j+n ) ),abs( work( j+1+n ) ), vmax ) + vcrit = bignum / vmax + end if + end do loop_170 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + call stdlib_qcopy( n-ki+1, work( ki+n ), 1, vl( ki, is ), 1 ) + ii = stdlib_iqamax( n-ki+1, vl( ki, is ), 1 ) + ki - 1 + remax = one / abs( vl( ii, is ) ) + call stdlib_qscal( n-ki+1, remax, vl( ki, is ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + end do + else + if( ki=abs( t( ki+1, ki ) ) ) then + work( ki+n ) = wi / t( ki, ki+1 ) + work( ki+1+n2 ) = one + else + work( ki+n ) = one + work( ki+1+n2 ) = -wi / t( ki+1, ki ) + end if + work( ki+1+n ) = zero + work( ki+n2 ) = zero + ! form right-hand side + do k = ki + 2, n + work( k+n ) = -work( ki+n )*t( ki, k ) + work( k+n2 ) = -work( ki+1+n2 )*t( ki+1, k ) + end do + ! solve complex quasi-triangular system: + ! ( t(ki+2,n:ki+2,n) - (wr-i*wi) )*x = work1+i*work2 + vmax = one + vcrit = bignum + jnxt = ki + 2 + loop_200: do j = ki + 2, n + if( jvcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work( ki+n ), 1 ) + call stdlib_qscal( n-ki+1, rec, work( ki+n2 ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_qdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n ), 1 ) + work( j+n2 ) = work( j+n2 ) -stdlib_qdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n2 ), 1 ) + ! solve (t(j,j)-(wr-i*wi))*(x11+i*x12)= wk+i*wk2 + call stdlib_qlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( n-ki+1, scale, work( ki+n ), 1 ) + call stdlib_qscal( n-ki+1, scale, work( ki+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + vmax = max( abs( work( j+n ) ),abs( work( j+n2 ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side elements. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work( ki+n ), 1 ) + call stdlib_qscal( n-ki+1, rec, work( ki+n2 ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_qdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n ), 1 ) + work( j+n2 ) = work( j+n2 ) -stdlib_qdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n2 ), 1 ) + work( j+1+n ) = work( j+1+n ) -stdlib_qdot( j-ki-2, t( ki+2, j+1 ), 1,& + work( ki+2+n ), 1 ) + work( j+1+n2 ) = work( j+1+n2 ) -stdlib_qdot( j-ki-2, t( ki+2, j+1 ), 1,& + work( ki+2+n2 ), 1 ) + ! solve 2-by-2 complex linear equation + ! ([t(j,j) t(j,j+1) ]**t-(wr-i*wi)*i)*x = scale*b + ! ([t(j+1,j) t(j+1,j+1)] ) + call stdlib_qlaln2( .true., 2, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( n-ki+1, scale, work( ki+n ), 1 ) + call stdlib_qscal( n-ki+1, scale, work( ki+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + work( j+1+n ) = x( 2, 1 ) + work( j+1+n2 ) = x( 2, 2 ) + vmax = max( abs( x( 1, 1 ) ), abs( x( 1, 2 ) ),abs( x( 2, 1 ) ), abs( x(& + 2, 2 ) ), vmax ) + vcrit = bignum / vmax + end if + end do loop_200 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + call stdlib_qcopy( n-ki+1, work( ki+n ), 1, vl( ki, is ), 1 ) + call stdlib_qcopy( n-ki+1, work( ki+n2 ), 1, vl( ki, is+1 ),1 ) + emax = zero + do k = ki, n + emax = max( emax, abs( vl( k, is ) )+abs( vl( k, is+1 ) ) ) + end do + remax = one / emax + call stdlib_qscal( n-ki+1, remax, vl( ki, is ), 1 ) + call stdlib_qscal( n-ki+1, remax, vl( ki, is+1 ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + vl( k, is+1 ) = zero + end do + else + if( ki DTREVC3: computes some or all of the right and/or left eigenvectors of + !> a real upper quasi-triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**T)*T = w*(y**T) + !> where y**T denotes the transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal blocks of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the orthogonal factor that reduces a matrix + !> A to Schur form T, then Q*X and Q*Y are the matrices of right and + !> left eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + + pure subroutine stdlib_qtrevc3( side, howmny, select, n, t, ldt, vl, ldvl,vr, ldvr, mm, m, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + real(qp), intent(in) :: t(ldt,*) + real(qp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmin = 8 + integer(ilp), parameter :: nbmax = 128 + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, lquery, over, pair, rightv, somev + integer(ilp) :: i, ierr, ii, ip, is, j, j1, j2, jnxt, k, ki, iv, maxwrk, nb, & + ki2 + real(qp) :: beta, bignum, emax, ovfl, rec, remax, scale, smin, smlnum, ulp, unfl, & + vcrit, vmax, wi, wr, xnorm + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Local Arrays + real(qp) :: x(2,2) + integer(ilp) :: iscomplex(nbmax) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + info = 0 + nb = stdlib_ilaenv( 1, 'DTREVC', side // howmny, n, -1, -1, -1 ) + maxwrk = n + 2*n*nb + work(1) = maxwrk + lquery = ( lwork==-1 ) + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt= n + 2*n*nbmin ) then + nb = (lwork - n) / (2*n) + nb = min( nb, nbmax ) + call stdlib_qlaset( 'F', n, 1+2*nb, zero, zero, work, n ) + else + nb = 1 + end if + ! set the constants to control overflow. + unfl = stdlib_qlamch( 'SAFE MINIMUM' ) + ovfl = one / unfl + call stdlib_qlabad( unfl, ovfl ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = unfl*( n / ulp ) + bignum = ( one-ulp ) / smlnum + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + work( 1 ) = zero + do j = 2, n + work( j ) = zero + do i = 1, j - 1 + work( j ) = work( j ) + abs( t( i, j ) ) + end do + end do + ! index ip is used to specify the real or complex eigenvalue: + ! ip = 0, real eigenvalue, + ! 1, first of conjugate complex pair: (wr,wi) + ! -1, second of conjugate complex pair: (wr,wi) + ! iscomplex array stores ip for each column in current block. + if( rightv ) then + ! ============================================================ + ! compute right eigenvectors. + ! iv is index of column in current block. + ! for complex right vector, uses iv-1 for real part and iv for complex part. + ! non-blocked version always uses iv=2; + ! blocked version starts with iv=nb, goes down to 1 or 2. + ! (note the "0-th" column is used for 1-norms computed above.) + iv = 2 + if( nb>2 ) then + iv = nb + end if + ip = 0 + is = m + loop_140: do ki = n, 1, -1 + if( ip==-1 ) then + ! previous iteration (ki+1) was second of conjugate pair, + ! so this ki is first of conjugate pair; skip to end of loop + ip = 1 + cycle loop_140 + else if( ki==1 ) then + ! last column, so this ki must be real eigenvalue + ip = 0 + else if( t( ki, ki-1 )==zero ) then + ! zero on sub-diagonal, so this ki is real eigenvalue + ip = 0 + else + ! non-zero on sub-diagonal, so this ki is second of conjugate pair + ip = -1 + end if + if( somev ) then + if( ip==0 ) then + if( .not.select( ki ) )cycle loop_140 + else + if( .not.select( ki-1 ) )cycle loop_140 + end if + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki-1 ) ) )*sqrt( abs( t( ki-1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! -------------------------------------------------------- + ! real right eigenvector + work( ki + iv*n ) = one + ! form right-hand side. + do k = 1, ki - 1 + work( k + iv*n ) = -t( k, ki ) + end do + ! solve upper quasi-triangular system: + ! [ t(1:ki-1,1:ki-1) - wr ]*x = scale*work. + jnxt = ki - 1 + loop_60: do j = ki - 1, 1, -1 + if( j>jnxt )cycle loop_60 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_qlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_qscal( ki, scale, work( 1+iv*n ), 1 ) + + work( j+iv*n ) = x( 1, 1 ) + ! update right-hand side + call stdlib_qaxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+iv*n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_qlaln2( .false., 2, 1, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+iv*n ), n, wr, zero, x, 2,scale, xnorm, ierr ) + ! scale x(1,1) and x(2,1) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 2, 1 ) = x( 2, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_qscal( ki, scale, work( 1+iv*n ), 1 ) + + work( j-1+iv*n ) = x( 1, 1 ) + work( j +iv*n ) = x( 2, 1 ) + ! update right-hand side + call stdlib_qaxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+iv*n ), 1 ) + + call stdlib_qaxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+iv*n ), 1 ) + + end if + end do loop_60 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_qcopy( ki, work( 1 + iv*n ), 1, vr( 1, is ), 1 ) + ii = stdlib_iqamax( ki, vr( 1, is ), 1 ) + remax = one / abs( vr( ii, is ) ) + call stdlib_qscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>1 )call stdlib_qgemv( 'N', n, ki-1, one, vr, ldvr,work( 1 + iv*n ), & + 1, work( ki + iv*n ),vr( 1, ki ), 1 ) + ii = stdlib_iqamax( n, vr( 1, ki ), 1 ) + remax = one / abs( vr( ii, ki ) ) + call stdlib_qscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + iv*n ) = zero + end do + iscomplex( iv ) = ip + ! back-transform and normalization is done below + end if + else + ! -------------------------------------------------------- + ! complex right eigenvector. + ! initial solve + ! [ ( t(ki-1,ki-1) t(ki-1,ki) ) - (wr + i*wi) ]*x = 0. + ! [ ( t(ki, ki-1) t(ki, ki) ) ] + if( abs( t( ki-1, ki ) )>=abs( t( ki, ki-1 ) ) ) then + work( ki-1 + (iv-1)*n ) = one + work( ki + (iv )*n ) = wi / t( ki-1, ki ) + else + work( ki-1 + (iv-1)*n ) = -wi / t( ki, ki-1 ) + work( ki + (iv )*n ) = one + end if + work( ki + (iv-1)*n ) = zero + work( ki-1 + (iv )*n ) = zero + ! form right-hand side. + do k = 1, ki - 2 + work( k+(iv-1)*n ) = -work( ki-1+(iv-1)*n )*t(k,ki-1) + work( k+(iv )*n ) = -work( ki +(iv )*n )*t(k,ki ) + end do + ! solve upper quasi-triangular system: + ! [ t(1:ki-2,1:ki-2) - (wr+i*wi) ]*x = scale*(work+i*work2) + jnxt = ki - 2 + loop_90: do j = ki - 2, 1, -1 + if( j>jnxt )cycle loop_90 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_qlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+(iv-1)*n ), n,wr, wi, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) and x(1,2) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 1, 2 ) = x( 1, 2 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( ki, scale, work( 1+(iv-1)*n ), 1 ) + call stdlib_qscal( ki, scale, work( 1+(iv )*n ), 1 ) + end if + work( j+(iv-1)*n ) = x( 1, 1 ) + work( j+(iv )*n ) = x( 1, 2 ) + ! update the right-hand side + call stdlib_qaxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+(iv-1)*n ), 1 ) + + call stdlib_qaxpy( j-1, -x( 1, 2 ), t( 1, j ), 1,work( 1+(iv )*n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_qlaln2( .false., 2, 2, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+(iv-1)*n ), n, wr, wi, x, 2,scale, xnorm, ierr ) + ! scale x to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + rec = one / xnorm + x( 1, 1 ) = x( 1, 1 )*rec + x( 1, 2 ) = x( 1, 2 )*rec + x( 2, 1 ) = x( 2, 1 )*rec + x( 2, 2 ) = x( 2, 2 )*rec + scale = scale*rec + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( ki, scale, work( 1+(iv-1)*n ), 1 ) + call stdlib_qscal( ki, scale, work( 1+(iv )*n ), 1 ) + end if + work( j-1+(iv-1)*n ) = x( 1, 1 ) + work( j +(iv-1)*n ) = x( 2, 1 ) + work( j-1+(iv )*n ) = x( 1, 2 ) + work( j +(iv )*n ) = x( 2, 2 ) + ! update the right-hand side + call stdlib_qaxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+(iv-1)*n ),& + 1 ) + call stdlib_qaxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+(iv-1)*n ), & + 1 ) + call stdlib_qaxpy( j-2, -x( 1, 2 ), t( 1, j-1 ), 1,work( 1+(iv )*n ), & + 1 ) + call stdlib_qaxpy( j-2, -x( 2, 2 ), t( 1, j ), 1,work( 1+(iv )*n ), 1 ) + + end if + end do loop_90 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_qcopy( ki, work( 1+(iv-1)*n ), 1, vr(1,is-1), 1 ) + call stdlib_qcopy( ki, work( 1+(iv )*n ), 1, vr(1,is ), 1 ) + emax = zero + do k = 1, ki + emax = max( emax, abs( vr( k, is-1 ) )+abs( vr( k, is ) ) ) + end do + remax = one / emax + call stdlib_qscal( ki, remax, vr( 1, is-1 ), 1 ) + call stdlib_qscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is-1 ) = zero + vr( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>2 ) then + call stdlib_qgemv( 'N', n, ki-2, one, vr, ldvr,work( 1 + (iv-1)*n ), & + 1,work( ki-1 + (iv-1)*n ), vr(1,ki-1), 1) + call stdlib_qgemv( 'N', n, ki-2, one, vr, ldvr,work( 1 + (iv)*n ), 1,& + work( ki + (iv)*n ), vr( 1, ki ), 1 ) + else + call stdlib_qscal( n, work(ki-1+(iv-1)*n), vr(1,ki-1), 1) + call stdlib_qscal( n, work(ki +(iv )*n), vr(1,ki ), 1) + end if + emax = zero + do k = 1, n + emax = max( emax, abs( vr( k, ki-1 ) )+abs( vr( k, ki ) ) ) + end do + remax = one / emax + call stdlib_qscal( n, remax, vr( 1, ki-1 ), 1 ) + call stdlib_qscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + (iv-1)*n ) = zero + work( k + (iv )*n ) = zero + end do + iscomplex( iv-1 ) = -ip + iscomplex( iv ) = ip + iv = iv - 1 + ! back-transform and normalization is done below + end if + end if + if( nb>1 ) then + ! -------------------------------------------------------- + ! blocked version of back-transform + ! for complex case, ki2 includes both vectors (ki-1 and ki) + if( ip==0 ) then + ki2 = ki + else + ki2 = ki - 1 + end if + ! columns iv:nb of work are valid vectors. + ! when the number of vectors stored reaches nb-1 or nb, + ! or if this was last vector, do the gemm + if( (iv<=2) .or. (ki2==1) ) then + call stdlib_qgemm( 'N', 'N', n, nb-iv+1, ki2+nb-iv, one,vr, ldvr,work( 1 + & + (iv)*n ), n,zero,work( 1 + (nb+iv)*n ), n ) + ! normalize vectors + do k = iv, nb + if( iscomplex(k)==0 ) then + ! real eigenvector + ii = stdlib_iqamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / abs( work( ii + (nb+k)*n ) ) + else if( iscomplex(k)==1 ) then + ! first eigenvector of conjugate pair + emax = zero + do ii = 1, n + emax = max( emax,abs( work( ii + (nb+k )*n ) )+abs( work( ii + (& + nb+k+1)*n ) ) ) + end do + remax = one / emax + ! else if iscomplex(k)==-1 + ! second eigenvector of conjugate pair + ! reuse same remax as previous k + end if + call stdlib_qscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_qlacpy( 'F', n, nb-iv+1,work( 1 + (nb+iv)*n ), n,vr( 1, ki2 ), & + ldvr ) + iv = nb + else + iv = iv - 1 + end if + end if ! blocked back-transform + is = is - 1 + if( ip/=0 )is = is - 1 + end do loop_140 + end if + if( leftv ) then + ! ============================================================ + ! compute left eigenvectors. + ! iv is index of column in current block. + ! for complex left vector, uses iv for real part and iv+1 for complex part. + ! non-blocked version always uses iv=1; + ! blocked version starts with iv=1, goes up to nb-1 or nb. + ! (note the "0-th" column is used for 1-norms computed above.) + iv = 1 + ip = 0 + is = 1 + loop_260: do ki = 1, n + if( ip==1 ) then + ! previous iteration (ki-1) was first of conjugate pair, + ! so this ki is second of conjugate pair; skip to end of loop + ip = -1 + cycle loop_260 + else if( ki==n ) then + ! last column, so this ki must be real eigenvalue + ip = 0 + else if( t( ki+1, ki )==zero ) then + ! zero on sub-diagonal, so this ki is real eigenvalue + ip = 0 + else + ! non-zero on sub-diagonal, so this ki is first of conjugate pair + ip = 1 + end if + if( somev ) then + if( .not.select( ki ) )cycle loop_260 + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki+1 ) ) )*sqrt( abs( t( ki+1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! -------------------------------------------------------- + ! real left eigenvector + work( ki + iv*n ) = one + ! form right-hand side. + do k = ki + 1, n + work( k + iv*n ) = -t( ki, k ) + end do + ! solve transposed quasi-triangular system: + ! [ t(ki+1:n,ki+1:n) - wr ]**t * x = scale*work + vmax = one + vcrit = bignum + jnxt = ki + 1 + loop_170: do j = ki + 1, n + if( jvcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work( ki+iv*n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+iv*n ) = work( j+iv*n ) -stdlib_qdot( j-ki-1, t( ki+1, j ), 1,& + work( ki+1+iv*n ), 1 ) + ! solve [ t(j,j) - wr ]**t * x = work + call stdlib_qlaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_qscal( n-ki+1, scale, work( ki+iv*n ), 1 ) + + work( j+iv*n ) = x( 1, 1 ) + vmax = max( abs( work( j+iv*n ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work( ki+iv*n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+iv*n ) = work( j+iv*n ) -stdlib_qdot( j-ki-1, t( ki+1, j ), 1,& + work( ki+1+iv*n ), 1 ) + work( j+1+iv*n ) = work( j+1+iv*n ) -stdlib_qdot( j-ki-1, t( ki+1, j+1 )& + , 1,work( ki+1+iv*n ), 1 ) + ! solve + ! [ t(j,j)-wr t(j,j+1) ]**t * x = scale*( work1 ) + ! [ t(j+1,j) t(j+1,j+1)-wr ] ( work2 ) + call stdlib_qlaln2( .true., 2, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_qscal( n-ki+1, scale, work( ki+iv*n ), 1 ) + + work( j +iv*n ) = x( 1, 1 ) + work( j+1+iv*n ) = x( 2, 1 ) + vmax = max( abs( work( j +iv*n ) ),abs( work( j+1+iv*n ) ), vmax ) + + vcrit = bignum / vmax + end if + end do loop_170 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vl and normalize. + call stdlib_qcopy( n-ki+1, work( ki + iv*n ), 1,vl( ki, is ), 1 ) + ii = stdlib_iqamax( n-ki+1, vl( ki, is ), 1 ) + ki - 1 + remax = one / abs( vl( ii, is ) ) + call stdlib_qscal( n-ki+1, remax, vl( ki, is ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki=abs( t( ki+1, ki ) ) ) then + work( ki + (iv )*n ) = wi / t( ki, ki+1 ) + work( ki+1 + (iv+1)*n ) = one + else + work( ki + (iv )*n ) = one + work( ki+1 + (iv+1)*n ) = -wi / t( ki+1, ki ) + end if + work( ki+1 + (iv )*n ) = zero + work( ki + (iv+1)*n ) = zero + ! form right-hand side. + do k = ki + 2, n + work( k+(iv )*n ) = -work( ki +(iv )*n )*t(ki, k) + work( k+(iv+1)*n ) = -work( ki+1+(iv+1)*n )*t(ki+1,k) + end do + ! solve transposed quasi-triangular system: + ! [ t(ki+2:n,ki+2:n)**t - (wr-i*wi) ]*x = work1+i*work2 + vmax = one + vcrit = bignum + jnxt = ki + 2 + loop_200: do j = ki + 2, n + if( jvcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work(ki+(iv )*n), 1 ) + call stdlib_qscal( n-ki+1, rec, work(ki+(iv+1)*n), 1 ) + vmax = one + vcrit = bignum + end if + work( j+(iv )*n ) = work( j+(iv)*n ) -stdlib_qdot( j-ki-2, t( ki+2, j )& + , 1,work( ki+2+(iv)*n ), 1 ) + work( j+(iv+1)*n ) = work( j+(iv+1)*n ) -stdlib_qdot( j-ki-2, t( ki+2, & + j ), 1,work( ki+2+(iv+1)*n ), 1 ) + ! solve [ t(j,j)-(wr-i*wi) ]*(x11+i*x12)= wk+i*wk2 + call stdlib_qlaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( n-ki+1, scale, work(ki+(iv )*n), 1) + call stdlib_qscal( n-ki+1, scale, work(ki+(iv+1)*n), 1) + end if + work( j+(iv )*n ) = x( 1, 1 ) + work( j+(iv+1)*n ) = x( 1, 2 ) + vmax = max( abs( work( j+(iv )*n ) ),abs( work( j+(iv+1)*n ) ), vmax ) + + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side elements. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_qscal( n-ki+1, rec, work(ki+(iv )*n), 1 ) + call stdlib_qscal( n-ki+1, rec, work(ki+(iv+1)*n), 1 ) + vmax = one + vcrit = bignum + end if + work( j +(iv )*n ) = work( j+(iv)*n ) -stdlib_qdot( j-ki-2, t( ki+2, & + j ), 1,work( ki+2+(iv)*n ), 1 ) + work( j +(iv+1)*n ) = work( j+(iv+1)*n ) -stdlib_qdot( j-ki-2, t( ki+2,& + j ), 1,work( ki+2+(iv+1)*n ), 1 ) + work( j+1+(iv )*n ) = work( j+1+(iv)*n ) -stdlib_qdot( j-ki-2, t( ki+2,& + j+1 ), 1,work( ki+2+(iv)*n ), 1 ) + work( j+1+(iv+1)*n ) = work( j+1+(iv+1)*n ) -stdlib_qdot( j-ki-2, t( ki+& + 2, j+1 ), 1,work( ki+2+(iv+1)*n ), 1 ) + ! solve 2-by-2 complex linear equation + ! [ (t(j,j) t(j,j+1) )**t - (wr-i*wi)*i ]*x = scale*b + ! [ (t(j+1,j) t(j+1,j+1)) ] + call stdlib_qlaln2( .true., 2, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_qscal( n-ki+1, scale, work(ki+(iv )*n), 1) + call stdlib_qscal( n-ki+1, scale, work(ki+(iv+1)*n), 1) + end if + work( j +(iv )*n ) = x( 1, 1 ) + work( j +(iv+1)*n ) = x( 1, 2 ) + work( j+1+(iv )*n ) = x( 2, 1 ) + work( j+1+(iv+1)*n ) = x( 2, 2 ) + vmax = max( abs( x( 1, 1 ) ), abs( x( 1, 2 ) ),abs( x( 2, 1 ) ), abs( x(& + 2, 2 ) ),vmax ) + vcrit = bignum / vmax + end if + end do loop_200 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vl and normalize. + call stdlib_qcopy( n-ki+1, work( ki + (iv )*n ), 1,vl( ki, is ), 1 ) + + call stdlib_qcopy( n-ki+1, work( ki + (iv+1)*n ), 1,vl( ki, is+1 ), 1 ) + + emax = zero + do k = ki, n + emax = max( emax, abs( vl( k, is ) )+abs( vl( k, is+1 ) ) ) + end do + remax = one / emax + call stdlib_qscal( n-ki+1, remax, vl( ki, is ), 1 ) + call stdlib_qscal( n-ki+1, remax, vl( ki, is+1 ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + vl( k, is+1 ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki1 ) then + ! -------------------------------------------------------- + ! blocked version of back-transform + ! for complex case, ki2 includes both vectors (ki and ki+1) + if( ip==0 ) then + ki2 = ki + else + ki2 = ki + 1 + end if + ! columns 1:iv of work are valid vectors. + ! when the number of vectors stored reaches nb-1 or nb, + ! or if this was last vector, do the gemm + if( (iv>=nb-1) .or. (ki2==n) ) then + call stdlib_qgemm( 'N', 'N', n, iv, n-ki2+iv, one,vl( 1, ki2-iv+1 ), ldvl,& + work( ki2-iv+1 + (1)*n ), n,zero,work( 1 + (nb+1)*n ), n ) + ! normalize vectors + do k = 1, iv + if( iscomplex(k)==0) then + ! real eigenvector + ii = stdlib_iqamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / abs( work( ii + (nb+k)*n ) ) + else if( iscomplex(k)==1) then + ! first eigenvector of conjugate pair + emax = zero + do ii = 1, n + emax = max( emax,abs( work( ii + (nb+k )*n ) )+abs( work( ii + (& + nb+k+1)*n ) ) ) + end do + remax = one / emax + ! else if iscomplex(k)==-1 + ! second eigenvector of conjugate pair + ! reuse same remax as previous k + end if + call stdlib_qscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_qlacpy( 'F', n, iv,work( 1 + (nb+1)*n ), n,vl( 1, ki2-iv+1 ), & + ldvl ) + iv = 1 + else + iv = iv + 1 + end if + end if ! blocked back-transform + is = is + 1 + if( ip/=0 )is = is + 1 + end do loop_260 + end if + return + end subroutine stdlib_qtrevc3 + + !> DTREXC: reorders the real Schur factorization of a real matrix + !> A = Q*T*Q**T, so that the diagonal block of T with row index IFST is + !> moved to row ILST. + !> The real Schur form T is reordered by an orthogonal similarity + !> transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors + !> is updated by postmultiplying it with Z. + !> T must be in Schur canonical form (as returned by DHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_qtrexc( compq, n, t, ldt, q, ldq, ifst, ilst, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq + integer(ilp), intent(inout) :: ifst, ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, ldt, n + ! Array Arguments + real(qp), intent(inout) :: q(ldq,*), t(ldt,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantq + integer(ilp) :: here, nbf, nbl, nbnext + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test the input arguments. + info = 0 + wantq = stdlib_lsame( compq, 'V' ) + if( .not.wantq .and. .not.stdlib_lsame( compq, 'N' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldtn ).and.( n>0 )) then + info = -7 + else if(( ilst<1 .or. ilst>n ).and.( n>0 )) then + info = -8 + end if + if( info/=0 ) then + call stdlib_xerbla( 'DTREXC', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + ! determine the first row of specified block + ! and find out it is 1 by 1 or 2 by 2. + if( ifst>1 ) then + if( t( ifst, ifst-1 )/=zero )ifst = ifst - 1 + end if + nbf = 1 + if( ifst1 ) then + if( t( ilst, ilst-1 )/=zero )ilst = ilst - 1 + end if + nbl = 1 + if( ilst=3 ) then + if( t( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_qlaexc( wantq, n, t, ldt, q, ldq, here-nbnext, nbnext,nbf, work, & + info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - nbnext + ! test if 2 by 2 block breaks into two 1 by 1 blocks + if( nbf==2 ) then + if( t( here+1, here )==zero )nbf = 3 + end if + else + ! current block consists of two 1 by 1 blocks each of which + ! must be swapped individually + nbnext = 1 + if( here>=3 ) then + if( t( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_qlaexc( wantq, n, t, ldt, q, ldq, here-nbnext, nbnext,1, work, info ) + + if( info/=0 ) then + ilst = here + return + end if + if( nbnext==1 ) then + ! swap two 1 by 1 blocks, no problems possible + call stdlib_qlaexc( wantq, n, t, ldt, q, ldq, here, nbnext, 1,work, info ) + + here = here - 1 + else + ! recompute nbnext in case 2 by 2 split + if( t( here, here-1 )==zero )nbnext = 1 + if( nbnext==2 ) then + ! 2 by 2 block did not split + call stdlib_qlaexc( wantq, n, t, ldt, q, ldq, here-1, 2, 1,work, info ) + + if( info/=0 ) then + ilst = here + return + end if + here = here - 2 + else + ! 2 by 2 block did split + call stdlib_qlaexc( wantq, n, t, ldt, q, ldq, here, 1, 1,work, info ) + + call stdlib_qlaexc( wantq, n, t, ldt, q, ldq, here-1, 1, 1,work, info ) + + here = here - 2 + end if + end if + end if + if( here>ilst )go to 20 + end if + ilst = here + return + end subroutine stdlib_qtrexc + + !> DTRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by DTRTRS or some other + !> means before entering this routine. DTRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_qtrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: a(lda,*), b(ldb,*), x(ldx,*) + real(qp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_qlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_qlacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_qtrsv( uplo, transt, diag, n, a, lda, work( n+1 ),1 ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_qtrsv( uplo, trans, diag, n, a, lda, work( n+1 ),1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_qtrrfs + + !> DTRSEN: reorders the real Schur factorization of a real matrix + !> A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in + !> the leading diagonal blocks of the upper quasi-triangular matrix T, + !> and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + !> T must be in Schur canonical form (as returned by DHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_qtrsen( job, compq, select, n, t, ldt, q, ldq, wr, wi,m, s, sep, work, & + lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldq, ldt, liwork, lwork, n + real(qp), intent(out) :: s, sep + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: q(ldq,*), t(ldt,*) + real(qp), intent(out) :: wi(*), work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, pair, swap, wantbh, wantq, wants, wantsp + integer(ilp) :: ierr, k, kase, kk, ks, liwmin, lwmin, n1, n2, nn + real(qp) :: est, rnorm, scale + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + wantq = stdlib_lsame( compq, 'V' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( job, 'N' ) .and. .not.wants .and. .not.wantsp )then + info = -1 + else if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt DTRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a real upper + !> quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q + !> orthogonal). + !> T must be in Schur canonical form (as returned by DHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_qtrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm, m, & + work, ldwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, ldwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: s(*), sep(*), work(ldwork,*) + real(qp), intent(in) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: pair, somcon, wantbh, wants, wantsp + integer(ilp) :: i, ierr, ifst, ilst, j, k, kase, ks, n2, nn + real(qp) :: bignum, cond, cs, delta, dumm, eps, est, lnrm, mu, prod, prod1, prod2, & + rnrm, scale, smlnum, sn + ! Local Arrays + integer(ilp) :: isave(3) + real(qp) :: dummy(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + if( .not.wants .and. .not.wantsp ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt DTRSYL: solves the real Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**T, and A and B are both upper quasi- + !> triangular. A is M-by-M and B is N-by-N; the right hand side C and + !> the solution X are M-by-N; and scale is an output scale factor, set + !> <= 1 to avoid overflow in X. + !> A and B must be in Schur canonical form (as returned by DHSEQR), that + !> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; + !> each 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_qtrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trana, tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, lda, ldb, ldc, m, n + real(qp), intent(out) :: scale + ! Array Arguments + real(qp), intent(in) :: a(lda,*), b(ldb,*) + real(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notrna, notrnb + integer(ilp) :: ierr, j, k, k1, k2, knext, l, l1, l2, lnext + real(qp) :: a11, bignum, da11, db, eps, scaloc, sgn, smin, smlnum, suml, sumr, & + xnorm + ! Local Arrays + real(qp) :: dum(1), vec(2,2), x(2,2) + ! Intrinsic Functions + intrinsic :: abs,real,max,min + ! Executable Statements + ! decode and test input parameters + notrna = stdlib_lsame( trana, 'N' ) + notrnb = stdlib_lsame( tranb, 'N' ) + info = 0 + if( .not.notrna .and. .not.stdlib_lsame( trana, 'T' ) .and. .not.stdlib_lsame( trana, & + 'C' ) ) then + info = -1 + else if( .not.notrnb .and. .not.stdlib_lsame( tranb, 'T' ) .and. .not.stdlib_lsame( & + tranb, 'C' ) ) then + info = -2 + else if( isgn/=1 .and. isgn/=-1 ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldaknext )cycle loop_50 + if( k==1 ) then + k1 = k + k2 = k + else + if( a( k, k-1 )/=zero ) then + k1 = k - 1 + k2 = k + knext = k - 2 + else + k1 = k + k2 = k + knext = k - 1 + end if + end if + if( l1==l2 .and. k1==k2 ) then + suml = stdlib_qdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + scaloc = one + a11 = a( k1, k1 ) + sgn*b( l1, l1 ) + da11 = abs( a11 ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( vec( 1, 1 ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_qdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_qlaln2( .false., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_qdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_qdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l2 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_qlaln2( .true., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_qdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k2, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_qlasy2( .false., .false., isgn, 2, 2,a( k1, k1 ), lda, b( l1, & + l1 ), ldb, vec,2, scaloc, x, 2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_50 + end do loop_60 + else if( .not.notrna .and. notrnb ) then + ! solve a**t *x + isgn*x*b = scale*c. + ! the (k,l)th block of x is determined starting from + ! upper-left corner column by column by + ! a(k,k)**t*x(k,l) + isgn*x(k,l)*b(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 t l-1 + ! r(k,l) = sum [a(i,k)**t*x(i,l)] +isgn*sum [x(k,j)*b(j,l)] + ! i=1 j=1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = 1 + loop_120: do l = 1, n + if( lone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_qlaln2( .true., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_qlaln2( .true., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k2 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_qdot( l1-1, c( k2, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_qlasy2( .true., .false., isgn, 2, 2, a( k1, k1 ),lda, b( l1, & + l1 ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_110 + end do loop_120 + else if( .not.notrna .and. .not.notrnb ) then + ! solve a**t*x + isgn*x*b**t = scale*c. + ! the (k,l)th block of x is determined starting from + ! top-right corner column by column by + ! a(k,k)**t*x(k,l) + isgn*x(k,l)*b(l,l)**t = c(k,l) - r(k,l) + ! where + ! k-1 n + ! r(k,l) = sum [a(i,k)**t*x(i,l)] + isgn*sum [x(k,j)*b(l,j)**t]. + ! i=1 j=l+1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = n + loop_180: do l = n, 1, -1 + if( l>lnext )cycle loop_180 + if( l==1 ) then + l1 = l + l2 = l + else + if( b( l, l-1 )/=zero ) then + l1 = l - 1 + l2 = l + lnext = l - 2 + else + l1 = l + l2 = l + lnext = l - 1 + end if + end if + ! start row loop (index = k) + ! k1 (k2): row index of the first (last) row of x(k,l) + knext = 1 + loop_170: do k = 1, m + if( kone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_qlaln2( .true., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_qlaln2( .false., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( k1-1, a( 1, k2 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_qlasy2( .true., .true., isgn, 2, 2, a( k1, k1 ),lda, b( l1, l1 & + ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_170 + end do loop_180 + else if( notrna .and. .not.notrnb ) then + ! solve a*x + isgn*x*b**t = scale*c. + ! the (k,l)th block of x is determined starting from + ! bottom-right corner column by column by + ! a(k,k)*x(k,l) + isgn*x(k,l)*b(l,l)**t = c(k,l) - r(k,l) + ! where + ! m n + ! r(k,l) = sum [a(k,i)*x(i,l)] + isgn*sum [x(k,j)*b(l,j)**t]. + ! i=k+1 j=l+1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = n + loop_240: do l = n, 1, -1 + if( l>lnext )cycle loop_240 + if( l==1 ) then + l1 = l + l2 = l + else + if( b( l, l-1 )/=zero ) then + l1 = l - 1 + l2 = l + lnext = l - 2 + else + l1 = l + l2 = l + lnext = l - 1 + end if + end if + ! start row loop (index = k) + ! k1 (k2): row index of the first (last) row of x(k,l) + knext = m + loop_230: do k = m, 1, -1 + if( k>knext )cycle loop_230 + if( k==1 ) then + k1 = k + k2 = k + else + if( a( k, k-1 )/=zero ) then + k1 = k - 1 + k2 = k + knext = k - 2 + else + k1 = k + k2 = k + knext = k - 1 + end if + end if + if( l1==l2 .and. k1==k2 ) then + suml = stdlib_qdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( n-l1, c( k1, min( l1+1, n ) ), ldc,b( l1, min( l1+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + scaloc = one + a11 = a( k1, k1 ) + sgn*b( l1, l1 ) + da11 = abs( a11 ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( vec( 1, 1 ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_qdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_qlaln2( .false., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_qdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_qdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l2 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_qlaln2( .false., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_qdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_qdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_qdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_qlasy2( .false., .true., isgn, 2, 2, a( k1, k1 ),lda, b( l1, & + l1 ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_qscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_230 + end do loop_240 + end if + return + end subroutine stdlib_qtrsyl + + !> DTRTI2: computes the inverse of a real upper or lower triangular + !> matrix. + !> This is the Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_qtrti2( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DTRTRI: computes the inverse of a real upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_qtrtri( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jb, nb, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_qtrti2( uplo, diag, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute inverse of upper triangular matrix + do j = 1, n, nb + jb = min( nb, n-j+1 ) + ! compute rows 1:j-1 of current block column + call stdlib_qtrmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, one, a, lda,& + a( 1, j ), lda ) + call stdlib_qtrsm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, -one, a( j,& + j ), lda, a( 1, j ), lda ) + ! compute inverse of current diagonal block + call stdlib_qtrti2( 'UPPER', diag, jb, a( j, j ), lda, info ) + end do + else + ! compute inverse of lower triangular matrix + nn = ( ( n-1 ) / nb )*nb + 1 + do j = nn, 1, -nb + jb = min( nb, n-j+1 ) + if( j+jb<=n ) then + ! compute rows j+jb:n of current block column + call stdlib_qtrmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, one,& + a( j+jb, j+jb ), lda,a( j+jb, j ), lda ) + call stdlib_qtrsm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, -& + one, a( j, j ), lda,a( j+jb, j ), lda ) + end if + ! compute inverse of current diagonal block + call stdlib_qtrti2( 'LOWER', diag, jb, a( j, j ), lda, info ) + end do + end if + end if + return + end subroutine stdlib_qtrtri + + !> DTRTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_qtrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( lda DTRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + + pure subroutine stdlib_qtrttf( transr, uplo, n, a, lda, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(qp), intent(in) :: a(0:lda-1,0:*) + real(qp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2 + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda DTRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + + pure subroutine stdlib_qtrttp( uplo, n, a, lda, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda DTZRZF: reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A + !> to upper triangular form by means of orthogonal transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper + !> triangular matrix. + + pure subroutine stdlib_qtzrzf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iws, ki, kk, ldwork, lwkmin, lwkopt, m1, mu, nb, nbmin, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_qlarzt( 'BACKWARD', 'ROWWISE', n-m, ib, a( i, m1 ),lda, tau( i ), & + work, ldwork ) + ! apply h to a(1:i-1,i:n) from the right + call stdlib_qlarzb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', i-1, n-i+1,& + ib, n-m, a( i, m1 ),lda, work, ldwork, a( 1, i ), lda,work( ib+1 ), ldwork ) + + end if + end do + mu = i + nb - 1 + else + mu = m + end if + ! use unblocked code to factor the last or only block + if( mu>0 )call stdlib_qlatrz( mu, n, n-m, a, lda, tau, work ) + work( 1 ) = lwkopt + return + end subroutine stdlib_qtzrzf + + !> DZSUM1: takes the sum of the absolute values of a complex + !> vector and returns a quad precision result. + !> Based on DZASUM from the Level 1 BLAS. + !> The change is to use the 'genuine' absolute value. + + pure real(qp) function stdlib_qzsum1( n, cx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(in) :: cx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + real(qp) :: stemp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + stdlib_qzsum1 = zero + stemp = zero + if( n<=0 )return + if( incx==1 )go to 20 + ! code for increment not equal to 1 + nincx = n*incx + do i = 1, nincx, incx + ! next line modified. + stemp = stemp + abs( cx( i ) ) + end do + stdlib_qzsum1 = stemp + return + ! code for increment equal to 1 + 20 continue + do i = 1, n + ! next line modified. + stemp = stemp + abs( cx( i ) ) + end do + stdlib_qzsum1 = stemp + return + end function stdlib_qzsum1 + + !> DLAG2Q: converts a SINGLE PRECISION matrix, SA, to a DOUBLE + !> PRECISION matrix, A. + !> Note that while it is possible to overflow while converting + !> from double to single, it is not possible to overflow when + !> converting from single to double. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_qlag2q( m, n, sa, ldsa, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + real(qp), intent(in) :: sa(ldsa,*) + real(qp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + info = 0 + do j = 1, n + do i = 1, m + a( i, j ) = sa( i, j ) + end do + end do + return + end subroutine stdlib_qlag2q + + + +end module stdlib_linalg_lapack_q +#:endif diff --git a/src/stdlib_linalg_lapack_s.fypp b/src/stdlib_linalg_lapack_s.fypp new file mode 100644 index 000000000..51897399a --- /dev/null +++ b/src/stdlib_linalg_lapack_s.fypp @@ -0,0 +1,84837 @@ +#:include "common.fypp" +module stdlib_linalg_lapack_s + use stdlib_linalg_constants + use stdlib_linalg_blas + use stdlib_linalg_lapack_aux + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_sbbcsd + public :: stdlib_sbdsdc + public :: stdlib_sbdsqr + public :: stdlib_scsum1 + public :: stdlib_sdisna + public :: stdlib_sgbbrd + public :: stdlib_sgbcon + public :: stdlib_sgbequ + public :: stdlib_sgbequb + public :: stdlib_sgbrfs + public :: stdlib_sgbsv + public :: stdlib_sgbsvx + public :: stdlib_sgbtf2 + public :: stdlib_sgbtrf + public :: stdlib_sgbtrs + public :: stdlib_sgebak + public :: stdlib_sgebal + public :: stdlib_sgebd2 + public :: stdlib_sgebrd + public :: stdlib_sgecon + public :: stdlib_sgeequ + public :: stdlib_sgeequb + public :: stdlib_sgees + public :: stdlib_sgeesx + public :: stdlib_sgeev + public :: stdlib_sgeevx + public :: stdlib_sgehd2 + public :: stdlib_sgehrd + public :: stdlib_sgejsv + public :: stdlib_sgelq + public :: stdlib_sgelq2 + public :: stdlib_sgelqf + public :: stdlib_sgelqt + public :: stdlib_sgelqt3 + public :: stdlib_sgels + public :: stdlib_sgelsd + public :: stdlib_sgelss + public :: stdlib_sgelsy + public :: stdlib_sgemlq + public :: stdlib_sgemlqt + public :: stdlib_sgemqr + public :: stdlib_sgemqrt + public :: stdlib_sgeql2 + public :: stdlib_sgeqlf + public :: stdlib_sgeqp3 + public :: stdlib_sgeqr + public :: stdlib_sgeqr2 + public :: stdlib_sgeqr2p + public :: stdlib_sgeqrf + public :: stdlib_sgeqrfp + public :: stdlib_sgeqrt + public :: stdlib_sgeqrt2 + public :: stdlib_sgeqrt3 + public :: stdlib_sgerfs + public :: stdlib_sgerq2 + public :: stdlib_sgerqf + public :: stdlib_sgesc2 + public :: stdlib_sgesdd + public :: stdlib_sgesv + public :: stdlib_sgesvd + public :: stdlib_sgesvdq + public :: stdlib_sgesvj + public :: stdlib_sgesvx + public :: stdlib_sgetc2 + public :: stdlib_sgetf2 + public :: stdlib_sgetrf + public :: stdlib_sgetrf2 + public :: stdlib_sgetri + public :: stdlib_sgetrs + public :: stdlib_sgetsls + public :: stdlib_sgetsqrhrt + public :: stdlib_sggbak + public :: stdlib_sggbal + public :: stdlib_sgges + public :: stdlib_sgges3 + public :: stdlib_sggesx + public :: stdlib_sggev + public :: stdlib_sggev3 + public :: stdlib_sggevx + public :: stdlib_sggglm + public :: stdlib_sgghd3 + public :: stdlib_sgghrd + public :: stdlib_sgglse + public :: stdlib_sggqrf + public :: stdlib_sggrqf + public :: stdlib_sgsvj0 + public :: stdlib_sgsvj1 + public :: stdlib_sgtcon + public :: stdlib_sgtrfs + public :: stdlib_sgtsv + public :: stdlib_sgtsvx + public :: stdlib_sgttrf + public :: stdlib_sgttrs + public :: stdlib_sgtts2 + public :: stdlib_shgeqz + public :: stdlib_shsein + public :: stdlib_shseqr + public :: stdlib_sisnan + public :: stdlib_sla_gbamv + public :: stdlib_sla_gbrcond + public :: stdlib_sla_gbrpvgrw + public :: stdlib_sla_geamv + public :: stdlib_sla_gercond + public :: stdlib_sla_gerpvgrw + public :: stdlib_sla_lin_berr + public :: stdlib_sla_porcond + public :: stdlib_sla_porpvgrw + public :: stdlib_sla_syamv + public :: stdlib_sla_syrcond + public :: stdlib_sla_syrpvgrw + public :: stdlib_sla_wwaddw + public :: stdlib_slabad + public :: stdlib_slabrd + public :: stdlib_slacn2 + public :: stdlib_slacon + public :: stdlib_slacpy + public :: stdlib_sladiv + public :: stdlib_sladiv1 + public :: stdlib_sladiv2 + public :: stdlib_slae2 + public :: stdlib_slaebz + public :: stdlib_slaed0 + public :: stdlib_slaed1 + public :: stdlib_slaed2 + public :: stdlib_slaed3 + public :: stdlib_slaed4 + public :: stdlib_slaed5 + public :: stdlib_slaed6 + public :: stdlib_slaed7 + public :: stdlib_slaed8 + public :: stdlib_slaed9 + public :: stdlib_slaeda + public :: stdlib_slaein + public :: stdlib_slaev2 + public :: stdlib_slaexc + public :: stdlib_slag2 + public :: stdlib_slag2d + public :: stdlib_slags2 + public :: stdlib_slagtf + public :: stdlib_slagtm + public :: stdlib_slagts + public :: stdlib_slagv2 + public :: stdlib_slahqr + public :: stdlib_slahr2 + public :: stdlib_slaic1 + public :: stdlib_slaisnan + public :: stdlib_slaln2 + public :: stdlib_slals0 + public :: stdlib_slalsa + public :: stdlib_slalsd + public :: stdlib_slamch + public :: stdlib_slamc3 + public :: stdlib_slamrg + public :: stdlib_slamswlq + public :: stdlib_slamtsqr + public :: stdlib_slaneg + public :: stdlib_slangb + public :: stdlib_slange + public :: stdlib_slangt + public :: stdlib_slanhs + public :: stdlib_slansb + public :: stdlib_slansf + public :: stdlib_slansp + public :: stdlib_slanst + public :: stdlib_slansy + public :: stdlib_slantb + public :: stdlib_slantp + public :: stdlib_slantr + public :: stdlib_slanv2 + public :: stdlib_slaorhr_col_getrfnp + public :: stdlib_slaorhr_col_getrfnp2 + public :: stdlib_slapll + public :: stdlib_slapmr + public :: stdlib_slapmt + public :: stdlib_slapy2 + public :: stdlib_slapy3 + public :: stdlib_slaqgb + public :: stdlib_slaqge + public :: stdlib_slaqp2 + public :: stdlib_slaqps + public :: stdlib_slaqr0 + public :: stdlib_slaqr1 + public :: stdlib_slaqr2 + public :: stdlib_slaqr3 + public :: stdlib_slaqr4 + public :: stdlib_slaqr5 + public :: stdlib_slaqsb + public :: stdlib_slaqsp + public :: stdlib_slaqsy + public :: stdlib_slaqtr + public :: stdlib_slaqz0 + public :: stdlib_slaqz1 + public :: stdlib_slaqz2 + public :: stdlib_slaqz3 + public :: stdlib_slaqz4 + public :: stdlib_slar1v + public :: stdlib_slar2v + public :: stdlib_slarf + public :: stdlib_slarfb + public :: stdlib_slarfb_gett + public :: stdlib_slarfg + public :: stdlib_slarfgp + public :: stdlib_slarft + public :: stdlib_slarfx + public :: stdlib_slarfy + public :: stdlib_slargv + public :: stdlib_slarnv + public :: stdlib_slarra + public :: stdlib_slarrb + public :: stdlib_slarrc + public :: stdlib_slarrd + public :: stdlib_slarre + public :: stdlib_slarrf + public :: stdlib_slarrj + public :: stdlib_slarrk + public :: stdlib_slarrr + public :: stdlib_slarrv + public :: stdlib_slartg + public :: stdlib_slartgp + public :: stdlib_slartgs + public :: stdlib_slartv + public :: stdlib_slaruv + public :: stdlib_slarz + public :: stdlib_slarzb + public :: stdlib_slarzt + public :: stdlib_slas2 + public :: stdlib_slascl + public :: stdlib_slasd0 + public :: stdlib_slasd1 + public :: stdlib_slasd2 + public :: stdlib_slasd3 + public :: stdlib_slasd4 + public :: stdlib_slasd5 + public :: stdlib_slasd6 + public :: stdlib_slasd7 + public :: stdlib_slasd8 + public :: stdlib_slasda + public :: stdlib_slasdq + public :: stdlib_slasdt + public :: stdlib_slaset + public :: stdlib_slasq1 + public :: stdlib_slasq2 + public :: stdlib_slasq3 + public :: stdlib_slasq4 + public :: stdlib_slasq5 + public :: stdlib_slasq6 + public :: stdlib_slasr + public :: stdlib_slasrt + public :: stdlib_slassq + public :: stdlib_slasv2 + public :: stdlib_slaswlq + public :: stdlib_slaswp + public :: stdlib_slasy2 + public :: stdlib_slasyf + public :: stdlib_slasyf_aa + public :: stdlib_slasyf_rk + public :: stdlib_slasyf_rook + public :: stdlib_slatbs + public :: stdlib_slatdf + public :: stdlib_slatps + public :: stdlib_slatrd + public :: stdlib_slatrs + public :: stdlib_slatrz + public :: stdlib_slatsqr + public :: stdlib_slauu2 + public :: stdlib_slauum + public :: stdlib_sopgtr + public :: stdlib_sopmtr + public :: stdlib_sorbdb + public :: stdlib_sorbdb1 + public :: stdlib_sorbdb2 + public :: stdlib_sorbdb3 + public :: stdlib_sorbdb4 + public :: stdlib_sorbdb5 + public :: stdlib_sorbdb6 + public :: stdlib_sorcsd + public :: stdlib_sorcsd2by1 + public :: stdlib_sorg2l + public :: stdlib_sorg2r + public :: stdlib_sorgbr + public :: stdlib_sorghr + public :: stdlib_sorgl2 + public :: stdlib_sorglq + public :: stdlib_sorgql + public :: stdlib_sorgqr + public :: stdlib_sorgr2 + public :: stdlib_sorgrq + public :: stdlib_sorgtr + public :: stdlib_sorgtsqr + public :: stdlib_sorgtsqr_row + public :: stdlib_sorhr_col + public :: stdlib_sorm22 + public :: stdlib_sorm2l + public :: stdlib_sorm2r + public :: stdlib_sormbr + public :: stdlib_sormhr + public :: stdlib_sorml2 + public :: stdlib_sormlq + public :: stdlib_sormql + public :: stdlib_sormqr + public :: stdlib_sormr2 + public :: stdlib_sormr3 + public :: stdlib_sormrq + public :: stdlib_sormrz + public :: stdlib_sormtr + public :: stdlib_spbcon + public :: stdlib_spbequ + public :: stdlib_spbrfs + public :: stdlib_spbstf + public :: stdlib_spbsv + public :: stdlib_spbsvx + public :: stdlib_spbtf2 + public :: stdlib_spbtrf + public :: stdlib_spbtrs + public :: stdlib_spftrf + public :: stdlib_spftri + public :: stdlib_spftrs + public :: stdlib_spocon + public :: stdlib_spoequ + public :: stdlib_spoequb + public :: stdlib_sporfs + public :: stdlib_sposv + public :: stdlib_sposvx + public :: stdlib_spotf2 + public :: stdlib_spotrf + public :: stdlib_spotrf2 + public :: stdlib_spotri + public :: stdlib_spotrs + public :: stdlib_sppcon + public :: stdlib_sppequ + public :: stdlib_spprfs + public :: stdlib_sppsv + public :: stdlib_sppsvx + public :: stdlib_spptrf + public :: stdlib_spptri + public :: stdlib_spptrs + public :: stdlib_spstf2 + public :: stdlib_spstrf + public :: stdlib_sptcon + public :: stdlib_spteqr + public :: stdlib_sptrfs + public :: stdlib_sptsv + public :: stdlib_sptsvx + public :: stdlib_spttrf + public :: stdlib_spttrs + public :: stdlib_sptts2 + public :: stdlib_srscl + public :: stdlib_ssb2st_kernels + public :: stdlib_ssbev + public :: stdlib_ssbevd + public :: stdlib_ssbevx + public :: stdlib_ssbgst + public :: stdlib_ssbgv + public :: stdlib_ssbgvd + public :: stdlib_ssbgvx + public :: stdlib_ssbtrd + public :: stdlib_ssfrk + public :: stdlib_sspcon + public :: stdlib_sspev + public :: stdlib_sspevd + public :: stdlib_sspevx + public :: stdlib_sspgst + public :: stdlib_sspgv + public :: stdlib_sspgvd + public :: stdlib_sspgvx + public :: stdlib_ssprfs + public :: stdlib_sspsv + public :: stdlib_sspsvx + public :: stdlib_ssptrd + public :: stdlib_ssptrf + public :: stdlib_ssptri + public :: stdlib_ssptrs + public :: stdlib_sstebz + public :: stdlib_sstedc + public :: stdlib_sstegr + public :: stdlib_sstein + public :: stdlib_sstemr + public :: stdlib_ssteqr + public :: stdlib_ssterf + public :: stdlib_sstev + public :: stdlib_sstevd + public :: stdlib_sstevr + public :: stdlib_sstevx + public :: stdlib_ssycon + public :: stdlib_ssycon_rook + public :: stdlib_ssyconv + public :: stdlib_ssyconvf + public :: stdlib_ssyconvf_rook + public :: stdlib_ssyequb + public :: stdlib_ssyev + public :: stdlib_ssyevd + public :: stdlib_ssyevr + public :: stdlib_ssyevx + public :: stdlib_ssygs2 + public :: stdlib_ssygst + public :: stdlib_ssygv + public :: stdlib_ssygvd + public :: stdlib_ssygvx + public :: stdlib_ssyrfs + public :: stdlib_ssysv + public :: stdlib_ssysv_aa + public :: stdlib_ssysv_rk + public :: stdlib_ssysv_rook + public :: stdlib_ssysvx + public :: stdlib_ssyswapr + public :: stdlib_ssytd2 + public :: stdlib_ssytf2 + public :: stdlib_ssytf2_rk + public :: stdlib_ssytf2_rook + public :: stdlib_ssytrd + public :: stdlib_ssytrd_sb2st + public :: stdlib_ssytrd_sy2sb + public :: stdlib_ssytrf + public :: stdlib_ssytrf_aa + public :: stdlib_ssytrf_rk + public :: stdlib_ssytrf_rook + public :: stdlib_ssytri + public :: stdlib_ssytri_rook + public :: stdlib_ssytrs + public :: stdlib_ssytrs2 + public :: stdlib_ssytrs_3 + public :: stdlib_ssytrs_aa + public :: stdlib_ssytrs_rook + public :: stdlib_stbcon + public :: stdlib_stbrfs + public :: stdlib_stbtrs + public :: stdlib_stfsm + public :: stdlib_stftri + public :: stdlib_stfttp + public :: stdlib_stfttr + public :: stdlib_stgevc + public :: stdlib_stgex2 + public :: stdlib_stgexc + public :: stdlib_stgsen + public :: stdlib_stgsja + public :: stdlib_stgsna + public :: stdlib_stgsy2 + public :: stdlib_stgsyl + public :: stdlib_stpcon + public :: stdlib_stplqt + public :: stdlib_stplqt2 + public :: stdlib_stpmlqt + public :: stdlib_stpmqrt + public :: stdlib_stpqrt + public :: stdlib_stpqrt2 + public :: stdlib_stprfb + public :: stdlib_stprfs + public :: stdlib_stptri + public :: stdlib_stptrs + public :: stdlib_stpttf + public :: stdlib_stpttr + public :: stdlib_strcon + public :: stdlib_strevc + public :: stdlib_strevc3 + public :: stdlib_strexc + public :: stdlib_strrfs + public :: stdlib_strsen + public :: stdlib_strsna + public :: stdlib_strsyl + public :: stdlib_strti2 + public :: stdlib_strtri + public :: stdlib_strtrs + public :: stdlib_strttf + public :: stdlib_strttp + public :: stdlib_stzrzf + + ! 32-bit real constants + real(sp), parameter, private :: negone = -1.00_sp + real(sp), parameter, private :: zero = 0.00_sp + real(sp), parameter, private :: half = 0.50_sp + real(sp), parameter, private :: one = 1.00_sp + real(sp), parameter, private :: two = 2.00_sp + real(sp), parameter, private :: three = 3.00_sp + real(sp), parameter, private :: four = 4.00_sp + real(sp), parameter, private :: eight = 8.00_sp + real(sp), parameter, private :: ten = 10.00_sp + + ! 32-bit complex constants + complex(sp), parameter, private :: czero = ( 0.0_sp,0.0_sp) + complex(sp), parameter, private :: chalf = ( 0.5_sp,0.0_sp) + complex(sp), parameter, private :: cone = ( 1.0_sp,0.0_sp) + complex(sp), parameter, private :: cnegone = (-1.0_sp,0.0_sp) + + ! 32-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(sp), parameter, private :: rradix = real(radix(zero),sp) + real(sp), parameter, private :: ulp = epsilon(zero) + real(sp), parameter, private :: eps = ulp*half + real(sp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(sp), parameter, private :: safmax = one/safmin + real(sp), parameter, private :: smlnum = safmin/ulp + real(sp), parameter, private :: bignum = safmax*ulp + real(sp), parameter, private :: rtmin = sqrt(smlnum) + real(sp), parameter, private :: rtmax = sqrt(bignum) + + ! 32-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(sp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(sp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(sp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(sp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> SCSUM1: takes the sum of the absolute values of a complex + !> vector and returns a single precision result. + !> Based on SCASUM from the Level 1 BLAS. + !> The change is to use the 'genuine' absolute value. + + pure real(sp) function stdlib_scsum1( n, cx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(sp), intent(in) :: cx(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, nincx + real(sp) :: stemp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + stdlib_scsum1 = zero + stemp = zero + if( n<=0 )return + if( incx==1 )go to 20 + ! code for increment not equal to 1 + nincx = n*incx + do i = 1, nincx, incx + ! next line modified. + stemp = stemp + abs( cx( i ) ) + end do + stdlib_scsum1 = stemp + return + ! code for increment equal to 1 + 20 continue + do i = 1, n + ! next line modified. + stemp = stemp + abs( cx( i ) ) + end do + stdlib_scsum1 = stemp + return + end function stdlib_scsum1 + + !> SGBTF2: computes an LU factorization of a real m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_sgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jp, ju, km, kv + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in. + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab0 ) then + ! compute multipliers. + call stdlib_sscal( km, one / ab( kv+1, j ), ab( kv+2, j ), 1 ) + ! update trailing submatrix within the band. + if( ju>j )call stdlib_sger( km, ju-j, -one, ab( kv+2, j ), 1,ab( kv, j+1 ), & + ldab-1, ab( kv+1, j+1 ),ldab-1 ) + end if + else + ! if pivot is zero, set info to the index of the pivot + ! unless a zero pivot has already been found. + if( info==0 )info = j + end if + end do loop_40 + return + end subroutine stdlib_sgbtf2 + + !> SGBTRS: solves a system of linear equations + !> A * X = B or A**T * X = B + !> with a general band matrix A using the LU factorization computed + !> by SGBTRF. + + pure subroutine stdlib_sgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, notran + integer(ilp) :: i, j, kd, l, lm + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab<( 2*kl+ku+1 ) ) then + info = -7 + else if( ldb0 + if( notran ) then + ! solve a*x = b. + ! solve l*x = b, overwriting b with x. + ! l is represented as a product of permutations and unit lower + ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1), + ! where each transformation l(i) is a rank-one modification of + ! the identity matrix. + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + l = ipiv( j ) + if( l/=j )call stdlib_sswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + call stdlib_sger( lm, nrhs, -one, ab( kd+1, j ), 1, b( j, 1 ),ldb, b( j+1, 1 )& + , ldb ) + end do + end if + do i = 1, nrhs + ! solve u*x = b, overwriting b with x. + call stdlib_stbsv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, kl+ku,ab, ldab, b( 1, & + i ), 1 ) + end do + else + ! solve a**t*x = b. + do i = 1, nrhs + ! solve u**t*x = b, overwriting b with x. + call stdlib_stbsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, kl+ku, ab,ldab, b( 1, i )& + , 1 ) + end do + ! solve l**t*x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_sgemv( 'TRANSPOSE', lm, nrhs, -one, b( j+1, 1 ),ldb, ab( kd+1, j )& + , 1, one, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_sswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_sgbtrs + + !> SGEBAK: forms the right or left eigenvectors of a real general matrix + !> by backward transformation on the computed eigenvectors of the + !> balanced matrix output by SGEBAL. + + pure subroutine stdlib_sgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(inout) :: v(ldv,*) + real(sp), intent(in) :: scale(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, ii, k + real(sp) :: s + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! decode and test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( m<0 ) then + info = -7 + else if( ldv=ilo .and. i<=ihi )cycle loop_40 + if( i=ilo .and. i<=ihi )cycle loop_50 + if( i SGGBAK: forms the right or left eigenvectors of a real generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> SGGBAL. + + pure subroutine stdlib_sggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(in) :: lscale(*), rscale(*) + real(sp), intent(inout) :: v(ldv,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, k + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( n==0 .and. ihi==0 .and. ilo/=1 ) then + info = -4 + else if( n>0 .and. ( ihimax( 1, n ) ) )then + info = -5 + else if( n==0 .and. ilo==1 .and. ihi/=0 ) then + info = -5 + else if( m<0 ) then + info = -8 + else if( ldv SGTSV: solves the equation + !> A*X = B, + !> where A is an n by n tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T*X = B may be solved by interchanging the + !> order of the arguments DU and DL. + + pure subroutine stdlib_sgtsv( n, nrhs, dl, d, du, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: b(ldb,*), d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: fact, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb=abs( dl( i ) ) ) then + ! no row interchange required + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + b( i+1, 1 ) = b( i+1, 1 ) - fact*b( i, 1 ) + else + info = i + return + end if + dl( i ) = zero + else + ! interchange rows i and i+1 + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + dl( i ) = du( i+1 ) + du( i+1 ) = -fact*dl( i ) + du( i ) = temp + temp = b( i, 1 ) + b( i, 1 ) = b( i+1, 1 ) + b( i+1, 1 ) = temp - fact*b( i+1, 1 ) + end if + end do loop_10 + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + b( i+1, 1 ) = b( i+1, 1 ) - fact*b( i, 1 ) + else + info = i + return + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + du( i ) = temp + temp = b( i, 1 ) + b( i, 1 ) = b( i+1, 1 ) + b( i+1, 1 ) = temp - fact*b( i+1, 1 ) + end if + end if + if( d( n )==zero ) then + info = n + return + end if + else + loop_40: do i = 1, n - 2 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + ! no row interchange required + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + do j = 1, nrhs + b( i+1, j ) = b( i+1, j ) - fact*b( i, j ) + end do + else + info = i + return + end if + dl( i ) = zero + else + ! interchange rows i and i+1 + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + dl( i ) = du( i+1 ) + du( i+1 ) = -fact*dl( i ) + du( i ) = temp + do j = 1, nrhs + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - fact*b( i+1, j ) + end do + end if + end do loop_40 + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + d( i+1 ) = d( i+1 ) - fact*du( i ) + do j = 1, nrhs + b( i+1, j ) = b( i+1, j ) - fact*b( i, j ) + end do + else + info = i + return + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + temp = d( i+1 ) + d( i+1 ) = du( i ) - fact*temp + du( i ) = temp + do j = 1, nrhs + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - fact*b( i+1, j ) + end do + end if + end if + if( d( n )==zero ) then + info = n + return + end if + end if + ! back solve with the matrix u from the factorization. + if( nrhs<=2 ) then + j = 1 + 70 continue + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) / d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-dl( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-dl( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + return + end subroutine stdlib_sgtsv + + !> SGTTRF: computes an LU factorization of a real tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + + pure subroutine stdlib_sgttrf( n, dl, d, du, du2, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: d(*), dl(*), du(*) + real(sp), intent(out) :: du2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: fact, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'SGTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! initialize ipiv(i) = i and du2(i) = 0 + do i = 1, n + ipiv( i ) = i + end do + do i = 1, n - 2 + du2( i ) = zero + end do + do i = 1, n - 2 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + ! no row interchange required, eliminate dl(i) + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + ! interchange rows i and i+1, eliminate dl(i) + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + du2( i ) = du( i+1 ) + du( i+1 ) = -fact*du( i+1 ) + ipiv( i ) = i + 1 + end if + end do + if( n>1 ) then + i = n - 1 + if( abs( d( i ) )>=abs( dl( i ) ) ) then + if( d( i )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + ipiv( i ) = i + 1 + end if + end if + ! check for a zero on the diagonal of u. + do i = 1, n + if( d( i )==zero ) then + info = i + go to 50 + end if + end do + 50 continue + return + end subroutine stdlib_sgttrf + + !> SGTTS2: solves one of the systems of equations + !> A*X = B or A**T*X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by SGTTRF. + + pure subroutine stdlib_sgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: itrans, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ip, j + real(sp) :: temp + ! Executable Statements + ! quick return if possible + if( n==0 .or. nrhs==0 )return + if( itrans==0 ) then + ! solve a*x = b using the lu factorization of a, + ! overwriting each right hand side vector with its solution. + if( nrhs<=1 ) then + j = 1 + 10 continue + ! solve l*x = b. + do i = 1, n - 1 + ip = ipiv( i ) + temp = b( i+1-ip+i, j ) - dl( i )*b( ip, j ) + b( i, j ) = b( ip, j ) + b( i+1, j ) = temp + end do + ! solve u*x = b. + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + else + ! solve a**t * x = b. + if( nrhs<=1 ) then + ! solve u**t*x = b. + j = 1 + 70 continue + b( 1, j ) = b( 1, j ) / d( 1 ) + if( n>1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d( i & + ) + end do + ! solve l**t*x = b. + do i = n - 1, 1, -1 + ip = ipiv( i ) + temp = b( i, j ) - dl( i )*b( i+1, j ) + b( i, j ) = b( ip, j ) + b( ip, j ) = temp + end do + if( j1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d(& + i ) + end do + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + end do + end if + end if + end subroutine stdlib_sgtts2 + + !> SLA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(sp) function stdlib_sla_gbrpvgrw( n, kl, ku, ncols, ab, ldab, afb,ldafb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, kl, ku, ncols, ldab, ldafb + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*), afb(ldafb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j, kd + real(sp) :: amax, umax, rpvgrw + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + rpvgrw = one + kd = ku + 1 + do j = 1, ncols + amax = zero + umax = zero + do i = max( j-ku, 1 ), min( j+kl, n ) + amax = max( abs( ab( kd+i-j, j)), amax ) + end do + do i = max( j-ku, 1 ), j + umax = max( abs( afb( kd+i-j, j ) ), umax ) + end do + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_sla_gbrpvgrw = rpvgrw + end function stdlib_sla_gbrpvgrw + + !> SLA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(sp) function stdlib_sla_gerpvgrw( n, ncols, a, lda, af, ldaf ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, ncols, lda, ldaf + ! Array Arguments + real(sp), intent(in) :: a(lda,*), af(ldaf,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: amax, umax, rpvgrw + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + rpvgrw = one + do j = 1, ncols + amax = zero + umax = zero + do i = 1, n + amax = max( abs( a( i, j ) ), amax ) + end do + do i = 1, j + umax = max( abs( af( i, j ) ), umax ) + end do + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_sla_gerpvgrw = rpvgrw + end function stdlib_sla_gerpvgrw + + !> SLA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + + pure subroutine stdlib_sla_wwaddw( n, x, y, w ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: x(*), y(*) + real(sp), intent(in) :: w(*) + ! ===================================================================== + ! Local Scalars + real(sp) :: s + integer(ilp) :: i + ! Executable Statements + do 10 i = 1, n + s = x(i) + w(i) + s = (s + s) - s + y(i) = ((x(i) - s) + w(i)) + y(i) + x(i) = s + 10 continue + return + end subroutine stdlib_sla_wwaddw + + !> SLABAD: takes as input the values computed by SLAMCH for underflow and + !> overflow, and returns the square root of each of these values if the + !> log of LARGE is sufficiently large. This subroutine is intended to + !> identify machines with a large exponent range, such as the Crays, and + !> redefine the underflow and overflow limits to be the square roots of + !> the values computed by SLAMCH. This subroutine is needed because + !> SLAMCH does not compensate for poor arithmetic in the upper half of + !> the exponent range, as is found on a Cray. + + pure subroutine stdlib_slabad( small, large ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(inout) :: large, small + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: log10,sqrt + ! Executable Statements + ! if it looks like we're on a cray, take the square root of + ! small and large to avoid overflow and underflow problems. + if( log10( large )>2000. ) then + small = sqrt( small ) + large = sqrt( large ) + end if + return + end subroutine stdlib_slabad + + !> SLACN2: estimates the 1-norm of a square, real matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + pure subroutine stdlib_slacn2( n, v, x, isgn, est, kase, isave ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(out) :: isgn(*) + integer(ilp), intent(inout) :: isave(3) + real(sp), intent(out) :: v(*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + ! Local Scalars + integer(ilp) :: i, jlast + real(sp) :: altsgn, estold, temp, xs + ! Intrinsic Functions + intrinsic :: abs,nint,real + ! Executable Statements + if( kase==0 ) then + do i = 1, n + x( i ) = one / real( n,KIND=sp) + end do + kase = 1 + isave( 1 ) = 1 + return + end if + go to ( 20, 40, 70, 110, 140 )isave( 1 ) + ! ................ entry (isave( 1 ) = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 150 + end if + est = stdlib_sasum( n, x, 1 ) + do i = 1, n + if( x(i)>=zero ) then + x(i) = one + else + x(i) = -one + end if + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + isave( 1 ) = 2 + return + ! ................ entry (isave( 1 ) = 2) + ! first iteration. x has been overwritten by transpose(a)*x. + 40 continue + isave( 2 ) = stdlib_isamax( n, x, 1 ) + isave( 3 ) = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = zero + end do + x( isave( 2 ) ) = one + kase = 1 + isave( 1 ) = 3 + return + ! ................ entry (isave( 1 ) = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_scopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_sasum( n, v, 1 ) + do i = 1, n + if( x(i)>=zero ) then + xs = one + else + xs = -one + end if + if( nint( xs,KIND=ilp)/=isgn( i ) )go to 90 + end do + ! repeated sign vector detected, hence algorithm has converged. + go to 120 + 90 continue + ! test for cycling. + if( est<=estold )go to 120 + do i = 1, n + if( x(i)>=zero ) then + x(i) = one + else + x(i) = -one + end if + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + isave( 1 ) = 4 + return + ! ................ entry (isave( 1 ) = 4) + ! x has been overwritten by transpose(a)*x. + 110 continue + jlast = isave( 2 ) + isave( 2 ) = stdlib_isamax( n, x, 1 ) + if( ( x( jlast )/=abs( x( isave( 2 ) ) ) ) .and.( isave( 3 )est ) then + call stdlib_scopy( n, x, 1, v, 1 ) + est = temp + end if + 150 continue + kase = 0 + return + end subroutine stdlib_slacn2 + + !> SLACON: estimates the 1-norm of a square, real matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + subroutine stdlib_slacon( n, v, x, isgn, est, kase ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(sp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(out) :: isgn(*) + real(sp), intent(out) :: v(*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + ! Local Scalars + integer(ilp) :: i, iter, j, jlast, jump + real(sp) :: altsgn, estold, temp + ! Intrinsic Functions + intrinsic :: abs,nint,real,sign + ! Save Statement + save + ! Executable Statements + if( kase==0 ) then + do i = 1, n + x( i ) = one / real( n,KIND=sp) + end do + kase = 1 + jump = 1 + return + end if + go to ( 20, 40, 70, 110, 140 )jump + ! ................ entry (jump = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 150 + end if + est = stdlib_sasum( n, x, 1 ) + do i = 1, n + x( i ) = sign( one, x( i ) ) + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + jump = 2 + return + ! ................ entry (jump = 2) + ! first iteration. x has been overwritten by transpose(a)*x. + 40 continue + j = stdlib_isamax( n, x, 1 ) + iter = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = zero + end do + x( j ) = one + kase = 1 + jump = 3 + return + ! ................ entry (jump = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_scopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_sasum( n, v, 1 ) + do i = 1, n + if( nint( sign( one, x( i ) ),KIND=ilp)/=isgn( i ) )go to 90 + end do + ! repeated sign vector detected, hence algorithm has converged. + go to 120 + 90 continue + ! test for cycling. + if( est<=estold )go to 120 + do i = 1, n + x( i ) = sign( one, x( i ) ) + isgn( i ) = nint( x( i ),KIND=ilp) + end do + kase = 2 + jump = 4 + return + ! ................ entry (jump = 4) + ! x has been overwritten by transpose(a)*x. + 110 continue + jlast = j + j = stdlib_isamax( n, x, 1 ) + if( ( x( jlast )/=abs( x( j ) ) ) .and. ( iterest ) then + call stdlib_scopy( n, x, 1, v, 1 ) + est = temp + end if + 150 continue + kase = 0 + return + end subroutine stdlib_slacon + + !> SLACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + + pure subroutine stdlib_slacpy( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_slacpy + + + pure real(sp) function stdlib_sladiv2( a, b, c, d, r, t ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: a, b, c, d, r, t + ! ===================================================================== + + ! Local Scalars + real(sp) :: br + ! Executable Statements + if( r/=zero ) then + br = b * r + if( br/=zero ) then + stdlib_sladiv2 = (a + br) * t + else + stdlib_sladiv2 = a * t + (b * t) * r + end if + else + stdlib_sladiv2 = (a + d * (b / c)) * t + end if + return + end function stdlib_sladiv2 + + !> SLAE2: computes the eigenvalues of a 2-by-2 symmetric matrix + !> [ A B ] + !> [ B C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, and RT2 + !> is the eigenvalue of smaller absolute value. + + pure subroutine stdlib_slae2( a, b, c, rt1, rt2 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: a, b, c + real(sp), intent(out) :: rt1, rt2 + ! ===================================================================== + + + + + ! Local Scalars + real(sp) :: ab, acmn, acmx, adf, df, rt, sm, tb + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ! compute the eigenvalues + sm = a + c + df = a - c + adf = abs( df ) + tb = b + b + ab = abs( tb ) + if( abs( a )>abs( c ) ) then + acmx = a + acmn = c + else + acmx = c + acmn = a + end if + if( adf>ab ) then + rt = adf*sqrt( one+( ab / adf )**2 ) + else if( adfzero ) then + rt1 = half*( sm+rt ) + ! order of execution important. + ! to get fully accurate smaller eigenvalue, + ! next line needs to be executed in higher precision. + rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b + else + ! includes case rt1 = rt2 = 0 + rt1 = half*rt + rt2 = -half*rt + end if + return + end subroutine stdlib_slae2 + + !> SLAEBZ: contains the iteration loops which compute and use the + !> function N(w), which is the count of eigenvalues of a symmetric + !> tridiagonal matrix T less than or equal to its argument w. It + !> performs a choice of two types of loops: + !> IJOB=1, followed by + !> IJOB=2: It takes as input a list of intervals and returns a list of + !> sufficiently small intervals whose union contains the same + !> eigenvalues as the union of the original intervals. + !> The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. + !> The output interval (AB(j,1),AB(j,2)] will contain + !> eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. + !> IJOB=3: It performs a binary search in each input interval + !> (AB(j,1),AB(j,2)] for a point w(j) such that + !> N(w(j))=NVAL(j), and uses C(j) as the starting point of + !> the search. If such a w(j) is found, then on output + !> AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output + !> (AB(j,1),AB(j,2)] will be a small interval containing the + !> point where N(w) jumps through NVAL(j), unless that point + !> lies outside the initial interval. + !> Note that the intervals are in all cases half-open intervals, + !> i.e., of the form (a,b] , which includes b but not a . + !> To avoid underflow, the matrix should be scaled so that its largest + !> element is no greater than overflow**(1/2) * underflow**(1/4) + !> in absolute value. To assure the most accurate computation + !> of small eigenvalues, the matrix should be scaled to be + !> not much smaller than that, either. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966 + !> Note: the arguments are, in general, *not* checked for unreasonable + !> values. + + pure subroutine stdlib_slaebz( ijob, nitmax, n, mmax, minp, nbmin, abstol,reltol, pivmin, d, & + e, e2, nval, ab, c, mout,nab, work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ijob, minp, mmax, n, nbmin, nitmax + integer(ilp), intent(out) :: info, mout + real(sp), intent(in) :: abstol, pivmin, reltol + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(inout) :: nab(mmax,*), nval(*) + real(sp), intent(inout) :: ab(mmax,*), c(*) + real(sp), intent(in) :: d(*), e(*), e2(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: itmp1, itmp2, j, ji, jit, jp, kf, kfnew, kl, klnew + real(sp) :: tmp1, tmp2 + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! check for errors + info = 0 + if( ijob<1 .or. ijob>3 ) then + info = -1 + return + end if + ! initialize nab + if( ijob==1 ) then + ! compute the number of eigenvalues in the initial intervals. + mout = 0 + do ji = 1, minp + do jp = 1, 2 + tmp1 = d( 1 ) - ab( ji, jp ) + if( abs( tmp1 )=nbmin .and. nbmin>0 ) then + ! begin of parallel version of the loop + do ji = kf, kl + ! compute n(c), the number of eigenvalues less than c + work( ji ) = d( 1 ) - c( ji ) + iwork( ji ) = 0 + if( work( ji )<=pivmin ) then + iwork( ji ) = 1 + work( ji ) = min( work( ji ), -pivmin ) + end if + do j = 2, n + work( ji ) = d( j ) - e2( j-1 ) / work( ji ) - c( ji ) + if( work( ji )<=pivmin ) then + iwork( ji ) = iwork( ji ) + 1 + work( ji ) = min( work( ji ), -pivmin ) + end if + end do + end do + if( ijob<=2 ) then + ! ijob=2: choose all intervals containing eigenvalues. + klnew = kl + loop_70: do ji = kf, kl + ! insure that n(w) is monotone + iwork( ji ) = min( nab( ji, 2 ),max( nab( ji, 1 ), iwork( ji ) ) ) + ! update the queue -- add intervals if both halves + ! contain eigenvalues. + if( iwork( ji )==nab( ji, 2 ) ) then + ! no eigenvalue in the upper interval: + ! just use the lower interval. + ab( ji, 2 ) = c( ji ) + else if( iwork( ji )==nab( ji, 1 ) ) then + ! no eigenvalue in the lower interval: + ! just use the upper interval. + ab( ji, 1 ) = c( ji ) + else + klnew = klnew + 1 + if( klnew<=mmax ) then + ! eigenvalue in both intervals -- add upper to + ! queue. + ab( klnew, 2 ) = ab( ji, 2 ) + nab( klnew, 2 ) = nab( ji, 2 ) + ab( klnew, 1 ) = c( ji ) + nab( klnew, 1 ) = iwork( ji ) + ab( ji, 2 ) = c( ji ) + nab( ji, 2 ) = iwork( ji ) + else + info = mmax + 1 + end if + end if + end do loop_70 + if( info/=0 )return + kl = klnew + else + ! ijob=3: binary search. keep only the interval containing + ! w s.t. n(w) = nval + do ji = kf, kl + if( iwork( ji )<=nval( ji ) ) then + ab( ji, 1 ) = c( ji ) + nab( ji, 1 ) = iwork( ji ) + end if + if( iwork( ji )>=nval( ji ) ) then + ab( ji, 2 ) = c( ji ) + nab( ji, 2 ) = iwork( ji ) + end if + end do + end if + else + ! end of parallel version of the loop + ! begin of serial version of the loop + klnew = kl + loop_100: do ji = kf, kl + ! compute n(w), the number of eigenvalues less than w + tmp1 = c( ji ) + tmp2 = d( 1 ) - tmp1 + itmp1 = 0 + if( tmp2<=pivmin ) then + itmp1 = 1 + tmp2 = min( tmp2, -pivmin ) + end if + do j = 2, n + tmp2 = d( j ) - e2( j-1 ) / tmp2 - tmp1 + if( tmp2<=pivmin ) then + itmp1 = itmp1 + 1 + tmp2 = min( tmp2, -pivmin ) + end if + end do + if( ijob<=2 ) then + ! ijob=2: choose all intervals containing eigenvalues. + ! insure that n(w) is monotone + itmp1 = min( nab( ji, 2 ),max( nab( ji, 1 ), itmp1 ) ) + ! update the queue -- add intervals if both halves + ! contain eigenvalues. + if( itmp1==nab( ji, 2 ) ) then + ! no eigenvalue in the upper interval: + ! just use the lower interval. + ab( ji, 2 ) = tmp1 + else if( itmp1==nab( ji, 1 ) ) then + ! no eigenvalue in the lower interval: + ! just use the upper interval. + ab( ji, 1 ) = tmp1 + else if( klnew=nval( ji ) ) then + ab( ji, 2 ) = tmp1 + nab( ji, 2 ) = itmp1 + end if + end if + end do loop_100 + kl = klnew + end if + ! check for convergence + kfnew = kf + loop_110: do ji = kf, kl + tmp1 = abs( ab( ji, 2 )-ab( ji, 1 ) ) + tmp2 = max( abs( ab( ji, 2 ) ), abs( ab( ji, 1 ) ) ) + if( tmp1=nab( ji, 2 ) ) & + then + ! converged -- swap with position kfnew, + ! then increment kfnew + if( ji>kfnew ) then + tmp1 = ab( ji, 1 ) + tmp2 = ab( ji, 2 ) + itmp1 = nab( ji, 1 ) + itmp2 = nab( ji, 2 ) + ab( ji, 1 ) = ab( kfnew, 1 ) + ab( ji, 2 ) = ab( kfnew, 2 ) + nab( ji, 1 ) = nab( kfnew, 1 ) + nab( ji, 2 ) = nab( kfnew, 2 ) + ab( kfnew, 1 ) = tmp1 + ab( kfnew, 2 ) = tmp2 + nab( kfnew, 1 ) = itmp1 + nab( kfnew, 2 ) = itmp2 + if( ijob==3 ) then + itmp1 = nval( ji ) + nval( ji ) = nval( kfnew ) + nval( kfnew ) = itmp1 + end if + end if + kfnew = kfnew + 1 + end if + end do loop_110 + kf = kfnew + ! choose midpoints + do ji = kf, kl + c( ji ) = half*( ab( ji, 1 )+ab( ji, 2 ) ) + end do + ! if no more intervals to refine, quit. + if( kf>kl )go to 140 + end do loop_130 + ! converged + 140 continue + info = max( kl+1-kf, 0 ) + mout = kl + return + end subroutine stdlib_slaebz + + !> This subroutine computes the I-th eigenvalue of a symmetric rank-one + !> modification of a 2-by-2 diagonal matrix + !> diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal elements in the array D are assumed to satisfy + !> D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + + pure subroutine stdlib_slaed5( i, d, z, delta, rho, dlam ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i + real(sp), intent(out) :: dlam + real(sp), intent(in) :: rho + ! Array Arguments + real(sp), intent(in) :: d(2), z(2) + real(sp), intent(out) :: delta(2) + ! ===================================================================== + + ! Local Scalars + real(sp) :: b, c, del, tau, temp, w + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + del = d( 2 ) - d( 1 ) + if( i==1 ) then + w = one + two*rho*( z( 2 )*z( 2 )-z( 1 )*z( 1 ) ) / del + if( w>zero ) then + b = del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 1 )*z( 1 )*del + ! b > zero, always + tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) + dlam = d( 1 ) + tau + delta( 1 ) = -z( 1 ) / tau + delta( 2 ) = z( 2 ) / ( del-tau ) + else + b = -del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*del + if( b>zero ) then + tau = -two*c / ( b+sqrt( b*b+four*c ) ) + else + tau = ( b-sqrt( b*b+four*c ) ) / two + end if + dlam = d( 2 ) + tau + delta( 1 ) = -z( 1 ) / ( del+tau ) + delta( 2 ) = -z( 2 ) / tau + end if + temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + delta( 1 ) = delta( 1 ) / temp + delta( 2 ) = delta( 2 ) / temp + else + ! now i=2 + b = -del + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*del + if( b>zero ) then + tau = ( b+sqrt( b*b+four*c ) ) / two + else + tau = two*c / ( -b+sqrt( b*b+four*c ) ) + end if + dlam = d( 2 ) + tau + delta( 1 ) = -z( 1 ) / ( del+tau ) + delta( 2 ) = -z( 2 ) / tau + temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + delta( 1 ) = delta( 1 ) / temp + delta( 2 ) = delta( 2 ) / temp + end if + return + end subroutine stdlib_slaed5 + + !> SLAEDA: computes the Z vector corresponding to the merge step in the + !> CURLVLth step of the merge process with TLVLS steps for the CURPBMth + !> problem. + + pure subroutine stdlib_slaeda( n, tlvls, curlvl, curpbm, prmptr, perm, givptr,givcol, givnum,& + q, qptr, z, ztemp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, n, tlvls + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + real(sp), intent(in) :: givnum(2,*), q(*) + real(sp), intent(out) :: z(*), ztemp(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: bsiz1, bsiz2, curr, i, k, mid, psiz1, psiz2, ptr, zptr1 + ! Intrinsic Functions + intrinsic :: int,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLAEDA', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine location of first number in second half. + mid = n / 2 + 1 + ! gather last/first rows of appropriate eigenblocks into center of z + ptr = 1 + ! determine location of lowest level subproblem in the full storage + ! scheme + curr = ptr + curpbm*2**curlvl + 2**( curlvl-1 ) - 1 + ! determine size of these matrices. we add half to the value of + ! the sqrt in case the machine underestimates one of these square + ! roots. + bsiz1 = int( half+sqrt( real( qptr( curr+1 )-qptr( curr ),KIND=sp) ),KIND=ilp) + bsiz2 = int( half+sqrt( real( qptr( curr+2 )-qptr( curr+1 ),KIND=sp) ),KIND=ilp) + + do k = 1, mid - bsiz1 - 1 + z( k ) = zero + end do + call stdlib_scopy( bsiz1, q( qptr( curr )+bsiz1-1 ), bsiz1,z( mid-bsiz1 ), 1 ) + call stdlib_scopy( bsiz2, q( qptr( curr+1 ) ), bsiz2, z( mid ), 1 ) + do k = mid + bsiz2, n + z( k ) = zero + end do + ! loop through remaining levels 1 -> curlvl applying the givens + ! rotations and permutation and then multiplying the center matrices + ! against the current z. + ptr = 2**tlvls + 1 + loop_70: do k = 1, curlvl - 1 + curr = ptr + curpbm*2**( curlvl-k ) + 2**( curlvl-k-1 ) - 1 + psiz1 = prmptr( curr+1 ) - prmptr( curr ) + psiz2 = prmptr( curr+2 ) - prmptr( curr+1 ) + zptr1 = mid - psiz1 + ! apply givens at curr and curr+1 + do i = givptr( curr ), givptr( curr+1 ) - 1 + call stdlib_srot( 1, z( zptr1+givcol( 1, i )-1 ), 1,z( zptr1+givcol( 2, i )-1 ), & + 1, givnum( 1, i ),givnum( 2, i ) ) + end do + do i = givptr( curr+1 ), givptr( curr+2 ) - 1 + call stdlib_srot( 1, z( mid-1+givcol( 1, i ) ), 1,z( mid-1+givcol( 2, i ) ), 1, & + givnum( 1, i ),givnum( 2, i ) ) + end do + psiz1 = prmptr( curr+1 ) - prmptr( curr ) + psiz2 = prmptr( curr+2 ) - prmptr( curr+1 ) + do i = 0, psiz1 - 1 + ztemp( i+1 ) = z( zptr1+perm( prmptr( curr )+i )-1 ) + end do + do i = 0, psiz2 - 1 + ztemp( psiz1+i+1 ) = z( mid+perm( prmptr( curr+1 )+i )-1 ) + end do + ! multiply blocks at curr and curr+1 + ! determine size of these matrices. we add half to the value of + ! the sqrt in case the machine underestimates one of these + ! square roots. + bsiz1 = int( half+sqrt( real( qptr( curr+1 )-qptr( curr ),KIND=sp) ),KIND=ilp) + + bsiz2 = int( half+sqrt( real( qptr( curr+2 )-qptr( curr+1 ),KIND=sp) ),KIND=ilp) + + if( bsiz1>0 ) then + call stdlib_sgemv( 'T', bsiz1, bsiz1, one, q( qptr( curr ) ),bsiz1, ztemp( 1 ), & + 1, zero, z( zptr1 ), 1 ) + end if + call stdlib_scopy( psiz1-bsiz1, ztemp( bsiz1+1 ), 1, z( zptr1+bsiz1 ),1 ) + if( bsiz2>0 ) then + call stdlib_sgemv( 'T', bsiz2, bsiz2, one, q( qptr( curr+1 ) ),bsiz2, ztemp( & + psiz1+1 ), 1, zero, z( mid ), 1 ) + end if + call stdlib_scopy( psiz2-bsiz2, ztemp( psiz1+bsiz2+1 ), 1,z( mid+bsiz2 ), 1 ) + + ptr = ptr + 2**( tlvls-k ) + end do loop_70 + return + end subroutine stdlib_slaeda + + !> SLAEV2: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> [ A B ] + !> [ B C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + !> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + !> eigenvector for RT1, giving the decomposition + !> [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + !> [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + pure subroutine stdlib_slaev2( a, b, c, rt1, rt2, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: a, b, c + real(sp), intent(out) :: cs1, rt1, rt2, sn1 + ! ===================================================================== + + + + + ! Local Scalars + integer(ilp) :: sgn1, sgn2 + real(sp) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ! compute the eigenvalues + sm = a + c + df = a - c + adf = abs( df ) + tb = b + b + ab = abs( tb ) + if( abs( a )>abs( c ) ) then + acmx = a + acmn = c + else + acmx = c + acmn = a + end if + if( adf>ab ) then + rt = adf*sqrt( one+( ab / adf )**2 ) + else if( adfzero ) then + rt1 = half*( sm+rt ) + sgn1 = 1 + ! order of execution important. + ! to get fully accurate smaller eigenvalue, + ! next line needs to be executed in higher precision. + rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b + else + ! includes case rt1 = rt2 = 0 + rt1 = half*rt + rt2 = -half*rt + sgn1 = 1 + end if + ! compute the eigenvector + if( df>=zero ) then + cs = df + rt + sgn2 = 1 + else + cs = df - rt + sgn2 = -1 + end if + acs = abs( cs ) + if( acs>ab ) then + ct = -tb / cs + sn1 = one / sqrt( one+ct*ct ) + cs1 = ct*sn1 + else + if( ab==zero ) then + cs1 = one + sn1 = zero + else + tn = -cs / tb + cs1 = one / sqrt( one+tn*tn ) + sn1 = tn*cs1 + end if + end if + if( sgn1==sgn2 ) then + tn = cs1 + cs1 = -sn1 + sn1 = tn + end if + return + end subroutine stdlib_slaev2 + + !> SLAG2: computes the eigenvalues of a 2 x 2 generalized eigenvalue + !> problem A - w B, with scaling as necessary to avoid over-/underflow. + !> The scaling factor "s" results in a modified eigenvalue equation + !> s A - w B + !> where s is a non-negative scaling factor chosen so that w, w B, + !> and s A do not overflow and, if possible, do not underflow, either. + + pure subroutine stdlib_slag2( a, lda, b, ldb, safmin, scale1, scale2, wr1,wr2, wi ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb + real(sp), intent(in) :: safmin + real(sp), intent(out) :: scale1, scale2, wi, wr1, wr2 + ! Array Arguments + real(sp), intent(in) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: fuzzy1 = one+1.0e-5_sp + + + + ! Local Scalars + real(sp) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, & + binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r,& + rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, & + wsmall + ! Intrinsic Functions + intrinsic :: abs,max,min,sign,sqrt + ! Executable Statements + rtmin = sqrt( safmin ) + rtmax = one / rtmin + safmax = one / safmin + ! scale a + anorm = max( abs( a( 1, 1 ) )+abs( a( 2, 1 ) ),abs( a( 1, 2 ) )+abs( a( 2, 2 ) ), & + safmin ) + ascale = one / anorm + a11 = ascale*a( 1, 1 ) + a21 = ascale*a( 2, 1 ) + a12 = ascale*a( 1, 2 ) + a22 = ascale*a( 2, 2 ) + ! perturb b if necessary to insure non-singularity + b11 = b( 1, 1 ) + b12 = b( 1, 2 ) + b22 = b( 2, 2 ) + bmin = rtmin*max( abs( b11 ), abs( b12 ), abs( b22 ), rtmin ) + if( abs( b11 )=one ) then + discr = ( rtmin*pp )**2 + qq*safmin + r = sqrt( abs( discr ) )*rtmax + else + if( pp**2+abs( qq )<=safmin ) then + discr = ( rtmax*pp )**2 + qq*safmax + r = sqrt( abs( discr ) )*rtmin + else + discr = pp**2 + qq + r = sqrt( abs( discr ) ) + end if + end if + ! note: the test of r in the following if is to cover the case when + ! discr is small and negative and is flushed to zero during + ! the calculation of r. on machines which have a consistent + ! flush-to-zero threshold and handle numbers above that + ! threshold correctly, it would not be necessary. + if( discr>=zero .or. r==zero ) then + sum = pp + sign( r, pp ) + diff = pp - sign( r, pp ) + wbig = shift + sum + ! compute smaller eigenvalue + wsmall = shift + diff + if( half*abs( wbig )>max( abs( wsmall ), safmin ) ) then + wdet = ( a11*a22-a12*a21 )*( binv11*binv22 ) + wsmall = wdet / wbig + end if + ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1) + ! for wr1. + if( pp>abi22 ) then + wr1 = min( wbig, wsmall ) + wr2 = max( wbig, wsmall ) + else + wr1 = max( wbig, wsmall ) + wr2 = min( wbig, wsmall ) + end if + wi = zero + else + ! complex eigenvalues + wr1 = shift + pp + wr2 = wr1 + wi = r + end if + ! further scaling to avoid underflow and overflow in computing + ! scale1 and overflow in computing w*b. + ! this scale factor (wscale) is bounded from above using c1 and c2, + ! and from below using c3 and c4. + ! c1 implements the condition s a must never overflow. + ! c2 implements the condition w b must never overflow. + ! c3, with c2, + ! implement the condition that s a - w b must never overflow. + ! c4 implements the condition s should not underflow. + ! c5 implements the condition max(s,|w|) should be at least 2. + c1 = bsize*( safmin*max( one, ascale ) ) + c2 = safmin*max( one, bnorm ) + c3 = bsize*safmin + if( ascale<=one .and. bsize<=one ) then + c4 = min( one, ( ascale / safmin )*bsize ) + else + c4 = one + end if + if( ascale<=one .or. bsize<=one ) then + c5 = min( one, ascale*bsize ) + else + c5 = one + end if + ! scale first eigenvalue + wabs = abs( wr1 ) + abs( wi ) + wsize = max( safmin, c1, fuzzy1*( wabs*c2+c3 ),min( c4, half*max( wabs, c5 ) ) ) + + if( wsize/=one ) then + wscale = one / wsize + if( wsize>one ) then + scale1 = ( max( ascale, bsize )*wscale )*min( ascale, bsize ) + else + scale1 = ( min( ascale, bsize )*wscale )*max( ascale, bsize ) + end if + wr1 = wr1*wscale + if( wi/=zero ) then + wi = wi*wscale + wr2 = wr1 + scale2 = scale1 + end if + else + scale1 = ascale*bsize + scale2 = scale1 + end if + ! scale second eigenvalue (if real) + if( wi==zero ) then + wsize = max( safmin, c1, fuzzy1*( abs( wr2 )*c2+c3 ),min( c4, half*max( abs( wr2 ), & + c5 ) ) ) + if( wsize/=one ) then + wscale = one / wsize + if( wsize>one ) then + scale2 = ( max( ascale, bsize )*wscale )*min( ascale, bsize ) + else + scale2 = ( min( ascale, bsize )*wscale )*max( ascale, bsize ) + end if + wr2 = wr2*wscale + else + scale2 = ascale*bsize + end if + end if + return + end subroutine stdlib_slag2 + + !> SLAG2D: converts a SINGLE PRECISION matrix, SA, to a DOUBLE + !> PRECISION matrix, A. + !> Note that while it is possible to overflow while converting + !> from double to single, it is not possible to overflow when + !> converting from single to double. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_slag2d( m, n, sa, ldsa, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + real(sp), intent(in) :: sa(ldsa,*) + real(dp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + info = 0 + do j = 1, n + do i = 1, m + a( i, j ) = sa( i, j ) + end do + end do + return + end subroutine stdlib_slag2d + + !> SLAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + + pure subroutine stdlib_slagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(in) :: alpha, beta + ! Array Arguments + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(in) :: d(*), dl(*), du(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + if( n==0 )return + ! multiply b by beta if beta/=1. + if( beta==zero ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = zero + end do + end do + else if( beta==-one ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = -b( i, j ) + end do + end do + end if + if( alpha==one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b + a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + dl( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + dl( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + du( i & + )*x( i+1, j ) + end do + end if + end do + else + ! compute b := b + a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + du( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + du( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + dl( i & + )*x( i+1, j ) + end do + end if + end do + end if + else if( alpha==-one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b - a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - dl( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - dl( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - du( i & + )*x( i+1, j ) + end do + end if + end do + else + ! compute b := b - a**t*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - du( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - du( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - dl( i & + )*x( i+1, j ) + end do + end if + end do + end if + end if + return + end subroutine stdlib_slagtm + + !> This routine is not for general use. It exists solely to avoid + !> over-optimization in SISNAN. + !> SLAISNAN: checks for NaNs by comparing its two arguments for + !> inequality. NaN is the only floating-point value where NaN != NaN + !> returns .TRUE. To check for NaNs, pass the same variable as both + !> arguments. + !> A compiler must assume that the two arguments are + !> not the same variable, and the test will not be optimized away. + !> Interprocedural or whole-program optimization may delete this + !> test. The ISNAN functions will be replaced by the correct + !> Fortran 03 intrinsic once the intrinsic is widely available. + + pure logical(lk) function stdlib_slaisnan( sin1, sin2 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: sin1, sin2 + ! ===================================================================== + ! Executable Statements + stdlib_slaisnan = (sin1/=sin2) + return + end function stdlib_slaisnan + + !> SLAMCH: determines single precision machine parameters. + + pure real(sp) function stdlib_slamch( cmach ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: cmach + ! ===================================================================== + + ! Local Scalars + real(sp) :: rnd, eps, sfmin, small, rmach + ! Intrinsic Functions + intrinsic :: digits,epsilon,huge,maxexponent,minexponent,radix,tiny + ! Executable Statements + ! assume rounding, not chopping. always. + rnd = one + if( one==rnd ) then + eps = epsilon(zero) * 0.5 + else + eps = epsilon(zero) + end if + if( stdlib_lsame( cmach, 'E' ) ) then + rmach = eps + else if( stdlib_lsame( cmach, 'S' ) ) then + sfmin = tiny(zero) + small = one / huge(zero) + if( small>=sfmin ) then + ! use small plus a bit, to avoid the possibility of rounding + ! causing overflow when computing 1/sfmin. + sfmin = small*( one+eps ) + end if + rmach = sfmin + else if( stdlib_lsame( cmach, 'B' ) ) then + rmach = radix(zero) + else if( stdlib_lsame( cmach, 'P' ) ) then + rmach = eps * radix(zero) + else if( stdlib_lsame( cmach, 'N' ) ) then + rmach = digits(zero) + else if( stdlib_lsame( cmach, 'R' ) ) then + rmach = rnd + else if( stdlib_lsame( cmach, 'M' ) ) then + rmach = minexponent(zero) + else if( stdlib_lsame( cmach, 'U' ) ) then + rmach = tiny(zero) + else if( stdlib_lsame( cmach, 'L' ) ) then + rmach = maxexponent(zero) + else if( stdlib_lsame( cmach, 'O' ) ) then + rmach = huge(zero) + else + rmach = zero + end if + stdlib_slamch = rmach + return + end function stdlib_slamch + + + pure real(sp) function stdlib_slamc3( a, b ) + ! -- lapack auxiliary routine -- + ! univ. of tennessee, univ. of california berkeley and nag ltd.. + ! Scalar Arguments + real(sp), intent(in) :: a, b + ! ===================================================================== + ! Executable Statements + stdlib_slamc3 = a + b + return + end function stdlib_slamc3 + + !> SLAMRG: will create a permutation list which will merge the elements + !> of A (which is composed of two independently sorted sets) into a + !> single set which is sorted in ascending order. + + pure subroutine stdlib_slamrg( n1, n2, a, strd1, strd2, index ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n1, n2, strd1, strd2 + ! Array Arguments + integer(ilp), intent(out) :: index(*) + real(sp), intent(in) :: a(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ind1, ind2, n1sv, n2sv + ! Executable Statements + n1sv = n1 + n2sv = n2 + if( strd1>0 ) then + ind1 = 1 + else + ind1 = n1 + end if + if( strd2>0 ) then + ind2 = 1 + n1 + else + ind2 = n1 + n2 + end if + i = 1 + ! while ( (n1sv > 0) + 10 continue + if( n1sv>0 .and. n2sv>0 ) then + if( a( ind1 )<=a( ind2 ) ) then + index( i ) = ind1 + i = i + 1 + ind1 = ind1 + strd1 + n1sv = n1sv - 1 + else + index( i ) = ind2 + i = i + 1 + ind2 = ind2 + strd2 + n2sv = n2sv - 1 + end if + go to 10 + end if + ! end while + if( n1sv==0 ) then + do n1sv = 1, n2sv + index( i ) = ind2 + i = i + 1 + ind2 = ind2 + strd2 + end do + else + ! n2sv == 0 + do n2sv = 1, n1sv + index( i ) = ind1 + i = i + 1 + ind1 = ind1 + strd1 + end do + end if + return + end subroutine stdlib_slamrg + + !> SLAORHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine SORHR_COL. In SORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> SLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine SLAORHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, SLAORHR_COL_GETRFNP2 + !> is self-sufficient and can be used without SLAORHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + + pure recursive subroutine stdlib_slaorhr_col_getrfnp2( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: sfmin + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: abs,sign,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_sscal( m-1, one / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 2, m + a( i, 1 ) = a( i, 1 ) / a( 1, 1 ) + end do + end if + else + ! divide the matrix b into four submatrices + n1 = min( m, n ) / 2 + n2 = n-n1 + ! factor b11, recursive call + call stdlib_slaorhr_col_getrfnp2( n1, n1, a, lda, d, iinfo ) + ! solve for b21 + call stdlib_strsm( 'R', 'U', 'N', 'N', m-n1, n1, one, a, lda,a( n1+1, 1 ), lda ) + + ! solve for b12 + call stdlib_strsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1, n1+1 ), lda ) + + ! update b22, i.e. compute the schur complement + ! b22 := b22 - b21*b12 + call stdlib_sgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, one, a( n1+1, n1+1 ), lda ) + ! factor b22, recursive call + call stdlib_slaorhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo ) + + end if + return + end subroutine stdlib_slaorhr_col_getrfnp2 + + !> SLAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + + pure subroutine stdlib_slapmr( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + real(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, in, j, jj + real(sp) :: temp + ! Executable Statements + if( m<=1 )return + do i = 1, m + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, m + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do jj = 1, n + temp = x( j, jj ) + x( j, jj ) = x( in, jj ) + x( in, jj ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, m + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do jj = 1, n + temp = x( i, jj ) + x( i, jj ) = x( j, jj ) + x( j, jj ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_slapmr + + !> SLAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + + pure subroutine stdlib_slapmt( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + real(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ii, j, in + real(sp) :: temp + ! Executable Statements + if( n<=1 )return + do i = 1, n + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, n + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do ii = 1, m + temp = x( ii, j ) + x( ii, j ) = x( ii, in ) + x( ii, in ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, n + if( k( i )>0 )go to 100 + k( i ) = -k( i ) + j = k( i ) + 80 continue + if( j==i )go to 100 + do ii = 1, m + temp = x( ii, i ) + x( ii, i ) = x( ii, j ) + x( ii, j ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 80 + 100 continue + end do + end if + return + end subroutine stdlib_slapmt + + !> SLAPY3: returns sqrt(x**2+y**2+z**2), taking care not to cause + !> unnecessary overflow and unnecessary underflow. + + pure real(sp) function stdlib_slapy3( x, y, z ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: x, y, z + ! ===================================================================== + + ! Local Scalars + real(sp) :: w, xabs, yabs, zabs, hugeval + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + hugeval = stdlib_slamch( 'OVERFLOW' ) + xabs = abs( x ) + yabs = abs( y ) + zabs = abs( z ) + w = max( xabs, yabs, zabs ) + if( w==zero .or. w>hugeval ) then + ! w can be zero for max(0,nan,0) + ! adding all three entries together will make sure + ! nan will not disappear. + stdlib_slapy3 = xabs + yabs + zabs + else + stdlib_slapy3 = w*sqrt( ( xabs / w )**2+( yabs / w )**2+( zabs / w )**2 ) + end if + return + end function stdlib_slapy3 + + !> SLAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + + pure subroutine stdlib_slaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(sp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(in) :: c(*), r(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*ab( ku+1+i-j, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_slaqgb + + !> SLAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + + pure subroutine stdlib_slaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: c(*), r(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*a( i, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = 1, m + a( i, j ) = r( i )*a( i, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*r( i )*a( i, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_slaqge + + !> Given a 2-by-2 or 3-by-3 matrix H, SLAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) + !> scaling to avoid overflows and most underflows. It + !> is assumed that either + !> 1) sr1 = sr2 and si1 = -si2 + !> or + !> 2) si1 = si2 = 0. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + + pure subroutine stdlib_slaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: si1, si2, sr1, sr2 + integer(ilp), intent(in) :: ldh, n + ! Array Arguments + real(sp), intent(in) :: h(ldh,*) + real(sp), intent(out) :: v(*) + ! ================================================================ + + ! Local Scalars + real(sp) :: h21s, h31s, s + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n/=2 .and. n/=3 ) then + return + end if + if( n==2 ) then + s = abs( h( 1, 1 )-sr2 ) + abs( si2 ) + abs( h( 2, 1 ) ) + if( s==zero ) then + v( 1 ) = zero + v( 2 ) = zero + else + h21s = h( 2, 1 ) / s + v( 1 ) = h21s*h( 1, 2 ) + ( h( 1, 1 )-sr1 )*( ( h( 1, 1 )-sr2 ) / s ) - si1*( & + si2 / s ) + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-sr1-sr2 ) + end if + else + s = abs( h( 1, 1 )-sr2 ) + abs( si2 ) + abs( h( 2, 1 ) ) +abs( h( 3, 1 ) ) + if( s==zero ) then + v( 1 ) = zero + v( 2 ) = zero + v( 3 ) = zero + else + h21s = h( 2, 1 ) / s + h31s = h( 3, 1 ) / s + v( 1 ) = ( h( 1, 1 )-sr1 )*( ( h( 1, 1 )-sr2 ) / s ) -si1*( si2 / s ) + h( 1, 2 )& + *h21s + h( 1, 3 )*h31s + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-sr1-sr2 ) +h( 2, 3 )*h31s + v( 3 ) = h31s*( h( 1, 1 )+h( 3, 3 )-sr1-sr2 ) +h21s*h( 3, 2 ) + end if + end if + end subroutine stdlib_slaqr1 + + !> SLAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_slaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_slaqsb + + !> SLAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_slaqsp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(inout) :: ap(*) + real(sp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(sp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = j, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_slaqsp + + !> SLAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_slaqsy( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: amax, scond + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: s(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: thresh = 0.1e+0_sp + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_slaqsy + + !> SLAR2V: applies a vector of real plane rotations from both sides to + !> a sequence of 2-by-2 real symmetric matrices, defined by the elements + !> of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) + !> ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) + + pure subroutine stdlib_slar2v( n, x, y, z, incx, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, n + ! Array Arguments + real(sp), intent(in) :: c(*), s(*) + real(sp), intent(inout) :: x(*), y(*), z(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix + real(sp) :: ci, si, t1, t2, t3, t4, t5, t6, xi, yi, zi + ! Executable Statements + ix = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( ix ) + zi = z( ix ) + ci = c( ic ) + si = s( ic ) + t1 = si*zi + t2 = ci*zi + t3 = t2 - si*xi + t4 = t2 + si*yi + t5 = ci*xi + t1 + t6 = ci*yi - t1 + x( ix ) = ci*t5 + si*t4 + y( ix ) = ci*t6 - si*t3 + z( ix ) = ci*t4 - si*t5 + ix = ix + incx + ic = ic + incc + end do + return + end subroutine stdlib_slar2v + + !> SLARF: applies a real elementary reflector H to a real m by n matrix + !> C, from either the left or the right. H is represented in the form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix. + + pure subroutine stdlib_slarf( side, m, n, v, incv, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, ldc, m, n + real(sp), intent(in) :: tau + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(in) :: v(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: applyleft + integer(ilp) :: i, lastv, lastc + ! Executable Statements + applyleft = stdlib_lsame( side, 'L' ) + lastv = 0 + lastc = 0 + if( tau/=zero ) then + ! set up variables for scanning v. lastv begins pointing to the end + ! of v. + if( applyleft ) then + lastv = m + else + lastv = n + end if + if( incv>0 ) then + i = 1 + (lastv-1) * incv + else + i = 1 + end if + ! look for the last non-zero row in v. + do while( lastv>0 .and. v( i )==zero ) + lastv = lastv - 1 + i = i - incv + end do + if( applyleft ) then + ! scan for the last non-zero column in c(1:lastv,:). + lastc = stdlib_ilaslc(lastv, n, c, ldc) + else + ! scan for the last non-zero row in c(:,1:lastv). + lastc = stdlib_ilaslr(m, lastv, c, ldc) + end if + end if + ! note that lastc.eq.0_sp renders the blas operations null; no special + ! case is needed at this level. + if( applyleft ) then + ! form h * c + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastv,1:lastc)**t * v(1:lastv,1) + call stdlib_sgemv( 'TRANSPOSE', lastv, lastc, one, c, ldc, v, incv,zero, work, 1 & + ) + ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**t + call stdlib_sger( lastv, lastc, -tau, v, incv, work, 1, c, ldc ) + end if + else + ! form c * h + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1) + call stdlib_sgemv( 'NO TRANSPOSE', lastc, lastv, one, c, ldc,v, incv, zero, work,& + 1 ) + ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**t + call stdlib_sger( lastc, lastv, -tau, work, 1, v, incv, c, ldc ) + end if + end if + return + end subroutine stdlib_slarf + + !> SLARFB: applies a real block reflector H or its transpose H**T to a + !> real m by n matrix C, from either the left or the right. + + pure subroutine stdlib_slarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, ldc, & + work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(in) :: t(ldt,*), v(ldv,*) + real(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'T' + else + transt = 'N' + end if + if( stdlib_lsame( storev, 'C' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 ) (first k rows) + ! ( v2 ) + ! where v1 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v = (c1**t * v1 + c2**t * v2) (stored in work) + ! w := c1**t + do j = 1, k + call stdlib_scopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_strmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, one, v, ldv,& + work, ldwork ) + if( m>k ) then + ! w := w + c2**t * v2 + call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', n, k, m-k,one, c( k+1, 1 ),& + ldc, v( k+1, 1 ), ldv,one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_strmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v * w**t + if( m>k ) then + ! c2 := c2 - v2 * w**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-k, n, k,-one, v( k+1, 1 )& + , ldv, work, ldwork, one,c( k+1, 1 ), ldc ) + end if + ! w := w * v1**t + call stdlib_strmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', n, k,one, v, ldv, & + work, ldwork ) + ! c1 := c1 - w**t + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_scopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_strmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, one, v, ldv,& + work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,one, c( 1, k+& + 1 ), ldc, v( k+1, 1 ), ldv,one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_strmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v**t + if( n>k ) then + ! c2 := c2 - w * v2**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v( k+1, 1 ), ldv, one,c( 1, k+1 ), ldc ) + end if + ! w := w * v1**t + call stdlib_strmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', m, k,one, v, ldv, & + work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 ) + ! ( v2 ) (last k rows) + ! where v2 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v = (c1**t * v1 + c2**t * v2) (stored in work) + ! w := c2**t + do j = 1, k + call stdlib_scopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_strmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, one, v( m-k+& + 1, 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**t * v1 + call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', n, k, m-k,one, c, ldc, v, & + ldv, one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_strmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v * w**t + if( m>k ) then + ! c1 := c1 - v1 * w**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-k, n, k,-one, v, ldv, & + work, ldwork, one, c, ldc ) + end if + ! w := w * v2**t + call stdlib_strmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', n, k,one, v( m-k+1, & + 1 ), ldv, work, ldwork ) + ! c2 := c2 - w**t + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h' where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_scopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_strmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, one, v( n-k+& + 1, 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,one, c, ldc, & + v, ldv, one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_strmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v**t + if( n>k ) then + ! c1 := c1 - w * v1**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v, ldv, one, c, ldc ) + end if + ! w := w * v2**t + call stdlib_strmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', m, k,one, v( n-k+1, & + 1 ), ldv, work, ldwork ) + ! c2 := c2 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + else if( stdlib_lsame( storev, 'R' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 v2 ) (v1: first k columns) + ! where v1 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v**t = (c1**t * v1**t + c2**t * v2**t) (stored in work) + ! w := c1**t + do j = 1, k + call stdlib_scopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v1**t + call stdlib_strmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', n, k,one, v, ldv, & + work, ldwork ) + if( m>k ) then + ! w := w + c2**t * v2**t + call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, m-k, one,c( k+1, 1 ), & + ldc, v( 1, k+1 ), ldv, one,work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_strmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v**t * w**t + if( m>k ) then + ! c2 := c2 - v2**t * w**t + call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE', m-k, n, k, -one,v( 1, k+1 ), & + ldv, work, ldwork, one,c( k+1, 1 ), ldc ) + end if + ! w := w * v1 + call stdlib_strmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, one, v, ldv,& + work, ldwork ) + ! c1 := c1 - w**t + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v**t = (c1*v1**t + c2*v2**t) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_scopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1**t + call stdlib_strmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'UNIT', m, k,one, v, ldv, & + work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, n-k,one, c( 1, k+1 ),& + ldc, v( 1, k+1 ), ldv,one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_strmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c2 := c2 - w * v2 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v( 1, k+1 ), ldv, one,c( 1, k+1 ), ldc ) + end if + ! w := w * v1 + call stdlib_strmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, one, v, ldv,& + work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 v2 ) (v2: last k columns) + ! where v2 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c where c = ( c1 ) + ! ( c2 ) + ! w := c**t * v**t = (c1**t * v1**t + c2**t * v2**t) (stored in work) + ! w := c2**t + do j = 1, k + call stdlib_scopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w := w * v2**t + call stdlib_strmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', n, k,one, v( 1, m-k+& + 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**t * v1**t + call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, m-k, one,c, ldc, v, ldv,& + one, work, ldwork ) + end if + ! w := w * t**t or w * t + call stdlib_strmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,one, t, ldt, & + work, ldwork ) + ! c := c - v**t * w**t + if( m>k ) then + ! c1 := c1 - v1**t * w**t + call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE', m-k, n, k, -one,v, ldv, work, & + ldwork, one, c, ldc ) + end if + ! w := w * v2 + call stdlib_strmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, one, v( 1, & + m-k+1 ), ldv, work, ldwork ) + ! c2 := c2 - w**t + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) - work( i, j ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t where c = ( c1 c2 ) + ! w := c * v**t = (c1*v1**t + c2*v2**t) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_scopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2**t + call stdlib_strmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'UNIT', m, k,one, v( 1, n-k+& + 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, n-k,one, c, ldc, v, & + ldv, one, work, ldwork ) + end if + ! w := w * t or w * t**t + call stdlib_strmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,one, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c1 := c1 - w * v1 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-one, work, & + ldwork, v, ldv, one, c, ldc ) + end if + ! w := w * v2 + call stdlib_strmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, one, v( 1, & + n-k+1 ), ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + end if + return + end subroutine stdlib_slarfb + + !> SLARFB_GETT: applies a real Householder block reflector H from the + !> left to a real (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + + pure subroutine stdlib_slarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ident + integer(ilp), intent(in) :: k, lda, ldb, ldt, ldwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnotident + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<0 .or. n<=0 .or. k==0 .or. k>n )return + lnotident = .not.stdlib_lsame( ident, 'I' ) + ! ------------------------------------------------------------------ + ! first step. computation of the column block 2: + ! ( a2 ) := h * ( a2 ) + ! ( b2 ) ( b2 ) + ! ------------------------------------------------------------------ + if( n>k ) then + ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n) + ! into w2=work(1:k, 1:n-k) column-by-column. + do j = 1, n-k + call stdlib_scopy( k, a( 1, k+j ), 1, work( 1, j ), 1 ) + end do + if( lnotident ) then + ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2, + ! v1 is not an identy matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_strmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork ) + end if + ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2 + ! v2 stored in b1. + if( m>0 ) then + call stdlib_sgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1, k+1 ), ldb, one, work, & + ldwork ) + end if + ! col2_(4) compute w2: = t * w2, + ! t is upper-triangular. + call stdlib_strmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork ) + ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2, + ! v2 stored in b1. + if( m>0 ) then + call stdlib_sgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1, k+& + 1 ), ldb ) + end if + if( lnotident ) then + ! col2_(6) compute w2: = v1 * w2 = a1 * w2, + ! v1 is not an identity matrix, but unit lower-triangular, + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_strmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork ) + end if + ! col2_(7) compute a2: = a2 - w2 = + ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k), + ! column-by-column. + do j = 1, n-k + do i = 1, k + a( i, k+j ) = a( i, k+j ) - work( i, j ) + end do + end do + end if + ! ------------------------------------------------------------------ + ! second step. computation of the column block 1: + ! ( a1 ) := h * ( a1 ) + ! ( b1 ) ( 0 ) + ! ------------------------------------------------------------------ + ! col1_(1) compute w1: = a1. copy the upper-triangular + ! a1 = a(1:k, 1:k) into the upper-triangular + ! w1 = work(1:k, 1:k) column-by-column. + do j = 1, k + call stdlib_scopy( j, a( 1, j ), 1, work( 1, j ), 1 ) + end do + ! set the subdiagonal elements of w1 to zero column-by-column. + do j = 1, k - 1 + do i = j + 1, k + work( i, j ) = zero + end do + end do + if( lnotident ) then + ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_strmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork ) + end if + ! col1_(3) compute w1: = t * w1, + ! t is upper-triangular, + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_strmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork ) + ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1, + ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal. + if( m>0 ) then + call stdlib_strmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb ) + end if + if( lnotident ) then + ! col1_(5) compute w1: = v1 * w1 = a1 * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular on input with zeroes below the diagonal, + ! and square on output. + call stdlib_strmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork ) + ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k) + ! column-by-column. a1 is upper-triangular on input. + ! if ident, a1 is square on output, and w1 is square, + ! if not ident, a1 is upper-triangular on output, + ! w1 is upper-triangular. + ! col1_(6)_a compute elements of a1 below the diagonal. + do j = 1, k - 1 + do i = j + 1, k + a( i, j ) = - work( i, j ) + end do + end do + end if + ! col1_(6)_b compute elements of a1 on and above the diagonal. + do j = 1, k + do i = 1, j + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + return + end subroutine stdlib_slarfb_gett + + !> SLARFT: forms the triangular factor T of a real block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**T + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**T * T * V + + pure subroutine stdlib_slarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + real(sp), intent(out) :: t(ldt,*) + real(sp), intent(in) :: tau(*), v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, prevlastv, lastv + ! Executable Statements + ! quick return if possible + if( n==0 )return + if( stdlib_lsame( direct, 'F' ) ) then + prevlastv = n + do i = 1, k + prevlastv = max( i, prevlastv ) + if( tau( i )==zero ) then + ! h(i) = i + do j = 1, i + t( j, i ) = zero + end do + else + ! general case + if( stdlib_lsame( storev, 'C' ) ) then + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( lastv, i )/=zero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( i , j ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**t * v(i:j,i) + call stdlib_sgemv( 'TRANSPOSE', j-i, i-1, -tau( i ),v( i+1, 1 ), ldv, v( i+& + 1, i ), 1, one,t( 1, i ), 1 ) + else + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( i, lastv )/=zero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( j , i ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**t + call stdlib_sgemv( 'NO TRANSPOSE', i-1, j-i, -tau( i ),v( 1, i+1 ), ldv, v(& + i, i+1 ), ldv,one, t( 1, i ), 1 ) + end if + ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i) + call stdlib_strmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', i-1, t,ldt, t( 1, i ),& + 1 ) + t( i, i ) = tau( i ) + if( i>1 ) then + prevlastv = max( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + end do + else + prevlastv = 1 + do i = k, 1, -1 + if( tau( i )==zero ) then + ! h(i) = i + do j = i, k + t( j, i ) = zero + end do + else + ! general case + if( i1 ) then + prevlastv = min( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + t( i, i ) = tau( i ) + end if + end do + end if + return + end subroutine stdlib_slarft + + !> SLARFX: applies a real elementary reflector H to a real m by n + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix + !> This version uses inline code if H has order < 11. + + pure subroutine stdlib_slarfx( side, m, n, v, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: ldc, m, n + real(sp), intent(in) :: tau + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(in) :: v(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j + real(sp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, v6, & + v7, v8, v9 + ! Executable Statements + if( tau==zero )return + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c, where h has order m. + go to ( 10, 30, 50, 70, 90, 110, 130, 150,170, 190 )m + ! code for general m + call stdlib_slarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 10 continue + ! special code for 1 x 1 householder + t1 = one - tau*v( 1 )*v( 1 ) + do j = 1, n + c( 1, j ) = t1*c( 1, j ) + end do + go to 410 + 30 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + end do + go to 410 + 50 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + end do + go to 410 + 70 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + end do + go to 410 + 90 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + end do + go to 410 + 110 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + end do + go to 410 + 130 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + end do + go to 410 + 150 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + end do + go to 410 + 170 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + end do + go to 410 + 190 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + v10 = v( 10 ) + t10 = tau*v10 + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) +v10*c( 10, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + c( 10, j ) = c( 10, j ) - sum*t10 + end do + go to 410 + else + ! form c * h, where h has order n. + go to ( 210, 230, 250, 270, 290, 310, 330, 350,370, 390 )n + ! code for general n + call stdlib_slarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 210 continue + ! special code for 1 x 1 householder + t1 = one - tau*v( 1 )*v( 1 ) + do j = 1, m + c( j, 1 ) = t1*c( j, 1 ) + end do + go to 410 + 230 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + end do + go to 410 + 250 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + end do + go to 410 + 270 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + end do + go to 410 + 290 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + end do + go to 410 + 310 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + end do + go to 410 + 330 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + end do + go to 410 + 350 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + end do + go to 410 + 370 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + end do + go to 410 + 390 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*v1 + v2 = v( 2 ) + t2 = tau*v2 + v3 = v( 3 ) + t3 = tau*v3 + v4 = v( 4 ) + t4 = tau*v4 + v5 = v( 5 ) + t5 = tau*v5 + v6 = v( 6 ) + t6 = tau*v6 + v7 = v( 7 ) + t7 = tau*v7 + v8 = v( 8 ) + t8 = tau*v8 + v9 = v( 9 ) + t9 = tau*v9 + v10 = v( 10 ) + t10 = tau*v10 + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) +v10*c( j, 10 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + c( j, 10 ) = c( j, 10 ) - sum*t10 + end do + go to 410 + end if + 410 return + end subroutine stdlib_slarfx + + !> SLARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n symmetric matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + + pure subroutine stdlib_slarfy( uplo, n, v, incv, tau, c, ldc, work ) + ! -- lapack test routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv, ldc, n + real(sp), intent(in) :: tau + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(in) :: v(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: alpha + ! Executable Statements + if( tau==zero )return + ! form w:= c * v + call stdlib_ssymv( uplo, n, one, c, ldc, v, incv, zero, work, 1 ) + alpha = -half*tau*stdlib_sdot( n, work, 1, v, incv ) + call stdlib_saxpy( n, alpha, v, incv, work, 1 ) + ! c := c - v * w' - w * v' + call stdlib_ssyr2( uplo, n, -tau, v, incv, work, 1, c, ldc ) + return + end subroutine stdlib_slarfy + + !> SLARGV: generates a vector of real plane rotations, determined by + !> elements of the real vectors x and y. For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( a(i) ) + !> ( -s(i) c(i) ) ( y(i) ) = ( 0 ) + + pure subroutine stdlib_slargv( n, x, incx, y, incy, c, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(sp), intent(out) :: c(*) + real(sp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + real(sp) :: f, g, t, tt + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + loop_10: do i = 1, n + f = x( ix ) + g = y( iy ) + if( g==zero ) then + c( ic ) = one + else if( f==zero ) then + c( ic ) = zero + y( iy ) = one + x( ix ) = g + else if( abs( f )>abs( g ) ) then + t = g / f + tt = sqrt( one+t*t ) + c( ic ) = one / tt + y( iy ) = t*c( ic ) + x( ix ) = f*tt + else + t = f / g + tt = sqrt( one+t*t ) + y( iy ) = one / tt + c( ic ) = t*y( iy ) + x( ix ) = g*tt + end if + ic = ic + incc + iy = iy + incy + ix = ix + incx + end do loop_10 + return + end subroutine stdlib_slargv + + !> Compute the splitting points with threshold SPLTOL. + !> SLARRA: sets any "small" off-diagonal elements to zero. + + pure subroutine stdlib_slarra( n, d, e, e2, spltol, tnrm,nsplit, isplit, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, nsplit + integer(ilp), intent(in) :: n + real(sp), intent(in) :: spltol, tnrm + ! Array Arguments + integer(ilp), intent(out) :: isplit(*) + real(sp), intent(in) :: d(*) + real(sp), intent(inout) :: e(*), e2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: eabs, tmp1 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! compute splitting points + nsplit = 1 + if(spltol Find the number of eigenvalues of the symmetric tridiagonal matrix T + !> that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T + !> if JOBT = 'L'. + + pure subroutine stdlib_slarrc( jobt, n, vl, vu, d, e, pivmin,eigcnt, lcnt, rcnt, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobt + integer(ilp), intent(out) :: eigcnt, info, lcnt, rcnt + integer(ilp), intent(in) :: n + real(sp), intent(in) :: pivmin, vl, vu + ! Array Arguments + real(sp), intent(in) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + logical(lk) :: matt + real(sp) :: lpivot, rpivot, sl, su, tmp, tmp2 + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + lcnt = 0 + rcnt = 0 + eigcnt = 0 + matt = stdlib_lsame( jobt, 'T' ) + if (matt) then + ! sturm sequence count on t + lpivot = d( 1 ) - vl + rpivot = d( 1 ) - vu + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + do i = 1, n-1 + tmp = e(i)**2 + lpivot = ( d( i+1 )-vl ) - tmp/lpivot + rpivot = ( d( i+1 )-vu ) - tmp/rpivot + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + end do + else + ! sturm sequence count on l d l^t + sl = -vl + su = -vu + do i = 1, n - 1 + lpivot = d( i ) + sl + rpivot = d( i ) + su + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + tmp = e(i) * d(i) * e(i) + tmp2 = tmp / lpivot + if( tmp2==zero ) then + sl = tmp - vl + else + sl = sl*tmp2 - vl + end if + tmp2 = tmp / rpivot + if( tmp2==zero ) then + su = tmp - vu + else + su = su*tmp2 - vu + end if + end do + lpivot = d( n ) + sl + rpivot = d( n ) + su + if( lpivot<=zero ) then + lcnt = lcnt + 1 + endif + if( rpivot<=zero ) then + rcnt = rcnt + 1 + endif + endif + eigcnt = rcnt - lcnt + return + end subroutine stdlib_slarrc + + !> SLARRD: computes the eigenvalues of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from SSTEMR. + !> The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_slarrd( range, order, n, vl, vu, il, iu, gers,reltol, d, e, e2, & + pivmin, nsplit, isplit,m, w, werr, wl, wu, iblock, indexw,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: order, range + integer(ilp), intent(in) :: il, iu, n, nsplit + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: pivmin, reltol, vl, vu + real(sp), intent(out) :: wl, wu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), indexw(*), iwork(*) + integer(ilp), intent(in) :: isplit(*) + real(sp), intent(in) :: d(*), e(*), e2(*), gers(*) + real(sp), intent(out) :: w(*), werr(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: fudge = two + integer(ilp), parameter :: allrng = 1 + integer(ilp), parameter :: valrng = 2 + integer(ilp), parameter :: indrng = 3 + + + ! Local Scalars + logical(lk) :: ncnvrg, toofew + integer(ilp) :: i, ib, ibegin, idiscl, idiscu, ie, iend, iinfo, im, in, ioff, iout, & + irange, itmax, itmp1, itmp2, iw, iwoff, j, jblk, jdisc, je, jee, nb, nwl, nwu + real(sp) :: atoli, eps, gl, gu, rtoli, tmp1, tmp2, tnorm, uflow, wkill, wlu, & + wul + ! Local Arrays + integer(ilp) :: idumma(1) + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = allrng + else if( stdlib_lsame( range, 'V' ) ) then + irange = valrng + else if( stdlib_lsame( range, 'I' ) ) then + irange = indrng + else + irange = 0 + end if + ! check for errors + if( irange<=0 ) then + info = -1 + else if( .not.(stdlib_lsame(order,'B').or.stdlib_lsame(order,'E')) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( irange==valrng ) then + if( vl>=vu )info = -5 + else if( irange==indrng .and.( il<1 .or. il>max( 1, n ) ) ) then + info = -6 + else if( irange==indrng .and.( iun ) ) then + info = -7 + end if + if( info/=0 ) then + return + end if + ! initialize error flags + info = 0 + ncnvrg = .false. + toofew = .false. + ! quick return if possible + m = 0 + if( n==0 ) return + ! simplification: + if( irange==indrng .and. il==1 .and. iu==n ) irange = 1 + ! get machine constants + eps = stdlib_slamch( 'P' ) + uflow = stdlib_slamch( 'U' ) + ! special case when n=1 + ! treat case of 1x1 matrix for quick return + if( n==1 ) then + if( (irange==allrng).or.((irange==valrng).and.(d(1)>vl).and.(d(1)<=vu)).or.((& + irange==indrng).and.(il==1).and.(iu==1)) ) then + m = 1 + w(1) = d(1) + ! the computation error of the eigenvalue is zero + werr(1) = zero + iblock( 1 ) = 1 + indexw( 1 ) = 1 + endif + return + end if + ! nb is the minimum vector length for vector bisection, or 0 + ! if only scalar is to be done. + nb = stdlib_ilaenv( 1, 'SSTEBZ', ' ', n, -1, -1, -1 ) + if( nb<=1 ) nb = 0 + ! find global spectral radius + gl = d(1) + gu = d(1) + do i = 1,n + gl = min( gl, gers( 2*i - 1)) + gu = max( gu, gers(2*i) ) + end do + ! compute global gerschgorin bounds and spectral diameter + tnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*tnorm*eps*n - fudge*two*pivmin + gu = gu + fudge*tnorm*eps*n + fudge*two*pivmin + ! [jan/28/2009] remove the line below since spdiam variable not use + ! spdiam = gu - gl + ! input arguments for stdlib_slaebz: + ! the relative tolerance. an interval (a,b] lies within + ! "relative tolerance" if b-a < reltol*max(|a|,|b|), + rtoli = reltol + ! set the absolute tolerance for interval convergence to zero to force + ! interval convergence based on relative size of the interval. + ! this is dangerous because intervals might not converge when reltol is + ! small. but at least a very small number should be selected so that for + ! strongly graded matrices, the code can get relatively accurate + ! eigenvalues. + atoli = fudge*two*uflow + fudge*two*pivmin + if( irange==indrng ) then + ! range='i': compute an interval containing eigenvalues + ! il through iu. the initial interval [gl,gu] from the global + ! gerschgorin bounds gl and gu is refined by stdlib_slaebz. + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + work( n+1 ) = gl + work( n+2 ) = gl + work( n+3 ) = gu + work( n+4 ) = gu + work( n+5 ) = gl + work( n+6 ) = gu + iwork( 1 ) = -1 + iwork( 2 ) = -1 + iwork( 3 ) = n + 1 + iwork( 4 ) = n + 1 + iwork( 5 ) = il - 1 + iwork( 6 ) = iu + call stdlib_slaebz( 3, itmax, n, 2, 2, nb, atoli, rtoli, pivmin,d, e, e2, iwork( 5 )& + , work( n+1 ), work( n+5 ), iout,iwork, w, iblock, iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + ! on exit, output intervals may not be ordered by ascending negcount + if( iwork( 6 )==iu ) then + wl = work( n+1 ) + wlu = work( n+3 ) + nwl = iwork( 1 ) + wu = work( n+4 ) + wul = work( n+2 ) + nwu = iwork( 4 ) + else + wl = work( n+2 ) + wlu = work( n+4 ) + nwl = iwork( 2 ) + wu = work( n+3 ) + wul = work( n+1 ) + nwu = iwork( 3 ) + end if + ! on exit, the interval [wl, wlu] contains a value with negcount nwl, + ! and [wul, wu] contains a value with negcount nwu. + if( nwl<0 .or. nwl>=n .or. nwu<1 .or. nwu>n ) then + info = 4 + return + end if + elseif( irange==valrng ) then + wl = vl + wu = vu + elseif( irange==allrng ) then + wl = gl + wu = gu + endif + ! find eigenvalues -- loop over blocks and recompute nwl and nwu. + ! nwl accumulates the number of eigenvalues .le. wl, + ! nwu accumulates the number of eigenvalues .le. wu + m = 0 + iend = 0 + info = 0 + nwl = 0 + nwu = 0 + loop_70: do jblk = 1, nsplit + ioff = iend + ibegin = ioff + 1 + iend = isplit( jblk ) + in = iend - ioff + if( in==1 ) then + ! 1x1 block + if( wl>=d( ibegin )-pivmin )nwl = nwl + 1 + if( wu>=d( ibegin )-pivmin )nwu = nwu + 1 + if( irange==allrng .or.( wl= d( ibegin )-pivmin ) ) & + then + m = m + 1 + w( m ) = d( ibegin ) + werr(m) = zero + ! the gap for a single block doesn't matter for the later + ! algorithm and is assigned an arbitrary large value + iblock( m ) = jblk + indexw( m ) = 1 + end if + ! disabled 2x2 case because of a failure on the following matrix + ! range = 'i', il = iu = 4 + ! original tridiagonal, d = [ + ! -0.150102010615740e+00_sp + ! -0.849897989384260e+00_sp + ! -0.128208148052635e-15_sp + ! 0.128257718286320e-15_sp + ! ]; + ! e = [ + ! -0.357171383266986e+00_sp + ! -0.180411241501588e-15_sp + ! -0.175152352710251e-15_sp + ! ]; + ! else if( in==2 ) then + ! * 2x2 block + ! disc = sqrt( (half*(d(ibegin)-d(iend)))**2 + e(ibegin)**2 ) + ! tmp1 = half*(d(ibegin)+d(iend)) + ! l1 = tmp1 - disc + ! if( wl>= l1-pivmin ) + ! $ nwl = nwl + 1 + ! if( wu>= l1-pivmin ) + ! $ nwu = nwu + 1 + ! if( irange==allrng .or. ( wl= + ! $ l1-pivmin ) ) then + ! m = m + 1 + ! w( m ) = l1 + ! * the uncertainty of eigenvalues of a 2x2 matrix is very small + ! werr( m ) = eps * abs( w( m ) ) * two + ! iblock( m ) = jblk + ! indexw( m ) = 1 + ! endif + ! l2 = tmp1 + disc + ! if( wl>= l2-pivmin ) + ! $ nwl = nwl + 1 + ! if( wu>= l2-pivmin ) + ! $ nwu = nwu + 1 + ! if( irange==allrng .or. ( wl= + ! $ l2-pivmin ) ) then + ! m = m + 1 + ! w( m ) = l2 + ! * the uncertainty of eigenvalues of a 2x2 matrix is very small + ! werr( m ) = eps * abs( w( m ) ) * two + ! iblock( m ) = jblk + ! indexw( m ) = 2 + ! endif + else + ! general case - block of size in >= 2 + ! compute local gerschgorin interval and use it as the initial + ! interval for stdlib_slaebz + gu = d( ibegin ) + gl = d( ibegin ) + tmp1 = zero + do j = ibegin, iend + gl = min( gl, gers( 2*j - 1)) + gu = max( gu, gers(2*j) ) + end do + ! [jan/28/2009] + ! change spdiam by tnorm in lines 2 and 3 thereafter + ! line 1: remove computation of spdiam (not useful anymore) + ! spdiam = gu - gl + ! gl = gl - fudge*spdiam*eps*in - fudge*pivmin + ! gu = gu + fudge*spdiam*eps*in + fudge*pivmin + gl = gl - fudge*tnorm*eps*in - fudge*pivmin + gu = gu + fudge*tnorm*eps*in + fudge*pivmin + if( irange>1 ) then + if( gu=gu )cycle loop_70 + end if + ! find negcount of initial interval boundaries gl and gu + work( n+1 ) = gl + work( n+in+1 ) = gu + call stdlib_slaebz( 1, 0, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e( & + ibegin ), e2( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), im,iwork, w( m+1 ),& + iblock( m+1 ), iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + nwl = nwl + iwork( 1 ) + nwu = nwu + iwork( in+1 ) + iwoff = m - iwork( 1 ) + ! compute eigenvalues + itmax = int( ( log( gu-gl+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + & + 2 + call stdlib_slaebz( 2, itmax, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e(& + ibegin ), e2( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), iout,iwork, w( m+& + 1 ), iblock( m+1 ), iinfo ) + if( iinfo /= 0 ) then + info = iinfo + return + end if + ! copy eigenvalues into w and iblock + ! use -jblk for block number for unconverged eigenvalues. + ! loop over the number of output intervals from stdlib_slaebz + do j = 1, iout + ! eigenvalue approximation is middle point of interval + tmp1 = half*( work( j+n )+work( j+in+n ) ) + ! semi length of error interval + tmp2 = half*abs( work( j+n )-work( j+in+n ) ) + if( j>iout-iinfo ) then + ! flag non-convergence. + ncnvrg = .true. + ib = -jblk + else + ib = jblk + end if + do je = iwork( j ) + 1 + iwoff,iwork( j+in ) + iwoff + w( je ) = tmp1 + werr( je ) = tmp2 + indexw( je ) = je - iwoff + iblock( je ) = ib + end do + end do + m = m + im + end if + end do loop_70 + ! if range='i', then (wl,wu) contains eigenvalues nwl+1,...,nwu + ! if nwl+1 < il or nwu > iu, discard extra eigenvalues. + if( irange==indrng ) then + idiscl = il - 1 - nwl + idiscu = nwu - iu + if( idiscl>0 ) then + im = 0 + do je = 1, m + ! remove some of the smallest eigenvalues from the left so that + ! at the end idiscl =0. move all eigenvalues up to the left. + if( w( je )<=wlu .and. idiscl>0 ) then + idiscl = idiscl - 1 + else + im = im + 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscu>0 ) then + ! remove some of the largest eigenvalues from the right so that + ! at the end idiscu =0. move all eigenvalues up to the left. + im=m+1 + do je = m, 1, -1 + if( w( je )>=wul .and. idiscu>0 ) then + idiscu = idiscu - 1 + else + im = im - 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + jee = 0 + do je = im, m + jee = jee + 1 + w( jee ) = w( je ) + werr( jee ) = werr( je ) + indexw( jee ) = indexw( je ) + iblock( jee ) = iblock( je ) + end do + m = m-im+1 + end if + if( idiscl>0 .or. idiscu>0 ) then + ! code to deal with effects of bad arithmetic. (if n(w) is + ! monotone non-decreasing, this should never happen.) + ! some low eigenvalues to be discarded are not in (wl,wlu], + ! or high eigenvalues to be discarded are not in (wul,wu] + ! so just kill off the smallest idiscl/largest idiscu + ! eigenvalues, by marking the corresponding iblock = 0 + if( idiscl>0 ) then + wkill = wu + do jdisc = 1, idiscl + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )0 ) then + wkill = wl + do jdisc = 1, idiscu + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )>=wkill .or. iw==0 ) ) then + iw = je + wkill = w( je ) + end if + end do + iblock( iw ) = 0 + end do + end if + ! now erase all eigenvalues with iblock set to zero + im = 0 + do je = 1, m + if( iblock( je )/=0 ) then + im = im + 1 + w( im ) = w( je ) + werr( im ) = werr( je ) + indexw( im ) = indexw( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl<0 .or. idiscu<0 ) then + toofew = .true. + end if + end if + if(( irange==allrng .and. m/=n ).or.( irange==indrng .and. m/=iu-il+1 ) ) then + toofew = .true. + end if + ! if order='b', do nothing the eigenvalues are already sorted by + ! block. + ! if order='e', sort the eigenvalues from smallest to largest + if( stdlib_lsame(order,'E') .and. nsplit>1 ) then + do je = 1, m - 1 + ie = 0 + tmp1 = w( je ) + do j = je + 1, m + if( w( j ) Given the initial eigenvalue approximations of T, SLARRJ: + !> does bisection to refine the eigenvalues of T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses in WERR. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + + pure subroutine stdlib_slarrj( n, d, e2, ifirst, ilast,rtol, offset, w, werr, work, iwork,& + pivmin, spdiam, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ifirst, ilast, n, offset + integer(ilp), intent(out) :: info + real(sp), intent(in) :: pivmin, rtol, spdiam + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: d(*), e2(*) + real(sp), intent(inout) :: w(*), werr(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + integer(ilp) :: maxitr + ! Local Scalars + integer(ilp) :: cnt, i, i1, i2, ii, iter, j, k, next, nint, olnint, p, prev, & + savi1 + real(sp) :: dplus, fac, left, mid, right, s, tmp, width + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + maxitr = int( ( log( spdiam+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ]. + ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while + ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 ) + ! for an unconverged interval is set to the index of the next unconverged + ! interval, and is -1 or 0 for a converged interval. thus a linked + ! list of unconverged intervals is set up. + i1 = ifirst + i2 = ilast + ! the number of unconverged intervals + nint = 0 + ! the last unconverged interval found + prev = 0 + loop_75: do i = i1, i2 + k = 2*i + ii = i - offset + left = w( ii ) - werr( ii ) + mid = w(ii) + right = w( ii ) + werr( ii ) + width = right - mid + tmp = max( abs( left ), abs( right ) ) + ! the following test prevents the test of converged intervals + if( width=i1).and.(i<=i2)) iwork( 2*prev-1 ) = i + 1 + else + ! unconverged interval found + prev = i + ! make sure that [left,right] contains the desired eigenvalue + ! do while( cnt(left)>i-1 ) + fac = one + 20 continue + cnt = 0 + s = left + dplus = d( 1 ) - s + if( dplusi-1 ) then + left = left - werr( ii )*fac + fac = two*fac + go to 20 + end if + ! do while( cnt(right)0 ), i.e. there are still unconverged intervals + ! and while (iter=i1) iwork( 2*prev-1 ) = next + end if + i = next + cycle loop_100 + end if + prev = i + ! perform one bisection step + cnt = 0 + s = mid + dplus = d( 1 ) - s + if( dplus0 ).and.(iter<=maxitr) ) go to 80 + ! at this point, all the intervals have converged + do i = savi1, ilast + k = 2*i + ii = i - offset + ! all intervals marked by '0' have been refined. + if( iwork( k-1 )==0 ) then + w( ii ) = half*( work( k-1 )+work( k ) ) + werr( ii ) = work( k ) - w( ii ) + end if + end do + return + end subroutine stdlib_slarrj + + !> SLARRK: computes one eigenvalue of a symmetric tridiagonal + !> matrix T to suitable accuracy. This is an auxiliary code to be + !> called from SSTEMR. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_slarrk( n, iw, gl, gu,d, e2, pivmin, reltol, w, werr, info) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: iw, n + real(sp), intent(in) :: pivmin, reltol, gl, gu + real(sp), intent(out) :: w, werr + ! Array Arguments + real(sp), intent(in) :: d(*), e2(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: fudge = two + + ! Local Scalars + integer(ilp) :: i, it, itmax, negcnt + real(sp) :: atoli, eps, left, mid, right, rtoli, tmp1, tmp2, tnorm + ! Intrinsic Functions + intrinsic :: abs,int,log,max + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + info = 0 + return + end if + ! get machine constants + eps = stdlib_slamch( 'P' ) + tnorm = max( abs( gl ), abs( gu ) ) + rtoli = reltol + atoli = fudge*two*pivmin + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + info = -1 + left = gl - fudge*tnorm*eps*n - fudge*two*pivmin + right = gu + fudge*tnorm*eps*n + fudge*two*pivmin + it = 0 + 10 continue + ! check if interval converged or maximum number of iterations reached + tmp1 = abs( right - left ) + tmp2 = max( abs(right), abs(left) ) + if( tmp1itmax)goto 30 + ! count number of negative pivots for mid-point + it = it + 1 + mid = half * (left + right) + negcnt = 0 + tmp1 = d( 1 ) - mid + if( abs( tmp1 )=iw) then + right = mid + else + left = mid + endif + goto 10 + 30 continue + ! converged or maximum number of iterations reached + w = half * (left + right) + werr = half * abs( right - left ) + return + end subroutine stdlib_slarrk + + !> Perform tests to decide whether the symmetric tridiagonal matrix T + !> warrants expensive computations which guarantee high relative accuracy + !> in the eigenvalues. + + pure subroutine stdlib_slarrr( n, d, e, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(in) :: d(*) + real(sp), intent(inout) :: e(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: relcond = 0.999_sp + + ! Local Scalars + integer(ilp) :: i + logical(lk) :: yesrel + real(sp) :: eps, safmin, smlnum, rmin, tmp, tmp2, offdig, offdig2 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + info = 0 + return + end if + ! as a default, do not go for relative-accuracy preserving computations. + info = 1 + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + rmin = sqrt( smlnum ) + ! tests for relative accuracy + ! test for scaled diagonal dominance + ! scale the diagonal entries to one and check whether the sum of the + ! off-diagonals is less than one + ! the sdd relative error bounds have a 1/(1- 2*x) factor in them, + ! x = max(offdig + offdig2), so when x is close to 1/2, no relative + ! accuracy is promised. in the notation of the code fragment below, + ! 1/(1 - (offdig + offdig2)) is the condition number. + ! we don't think it is worth going into "sdd mode" unless the relative + ! condition number is reasonable, not 1/macheps. + ! the threshold should be compatible with other thresholds used in the + ! code. we set offdig + offdig2 <= .999_sp =: relcond, it corresponds + ! to losing at most 3 decimal digits: 1 / (1 - (offdig + offdig2)) <= 1000 + ! instead of the current offdig + offdig2 < 1 + yesrel = .true. + offdig = zero + tmp = sqrt(abs(d(1))) + if (tmp=relcond) yesrel = .false. + if(.not.yesrel) goto 11 + tmp = tmp2 + offdig = offdig2 + end do + 11 continue + if( yesrel ) then + info = 0 + return + else + endif + ! *** more to be implemented *** + ! test if the lower bidiagonal matrix l from t = l d l^t + ! (zero shift facto) is well conditioned + ! test if the upper bidiagonal matrix u from t = u d u^t + ! (zero shift facto) is well conditioned. + ! in this case, the matrix needs to be flipped and, at the end + ! of the eigenvector computation, the flip needs to be applied + ! to the computed eigenvectors (and the support) + return + end subroutine stdlib_slarrr + + !> ! + !> + !> SLARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -S C ] [ G ] [ 0 ] + !> where C**2 + S**2 = 1. + !> The mathematical formulas used for C and S are + !> R = sign(F) * sqrt(F**2 + G**2) + !> C = F / R + !> S = G / R + !> Hence C >= 0. The algorithm used to compute these quantities + !> incorporates scaling to avoid overflow or underflow in computing the + !> square root of the sum of squares. + !> This version is discontinuous in R at F = 0 but it returns the same + !> C and S as SLARTG for complex inputs (F,0) and (G,0). + !> This is a more accurate version of the BLAS1 routine SROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0 and (G .ne. 0), then C=0 and S=sign(1,G) without doing any + !> floating point operations (saves work in SBDSQR when + !> there are zeros on the diagonal). + !> If F exceeds G in magnitude, C will be positive. + !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. + + pure subroutine stdlib_slartg( f, g, c, s, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! february 2021 + ! Scalar Arguments + real(sp), intent(out) :: c, r, s + real(sp), intent(in) :: f, g + ! Local Scalars + real(sp) :: d, f1, fs, g1, gs, p, u, uu + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + f1 = abs( f ) + g1 = abs( g ) + if( g == zero ) then + c = one + s = zero + r = f + else if( f == zero ) then + c = zero + s = sign( one, g ) + r = g1 + else if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) & + then + d = sqrt( f*f + g*g ) + p = one / d + c = f1*p + s = g*sign( p, f ) + r = sign( d, f ) + else + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + fs = f*uu + gs = g*uu + d = sqrt( fs*fs + gs*gs ) + p = one / d + c = abs( fs )*p + s = gs*sign( p, f ) + r = sign( d, f )*u + end if + return + end subroutine stdlib_slartg + + !> SLARTGP: generates a plane rotation so that + !> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + !> [ -SN CS ] [ G ] [ 0 ] + !> This is a slower, more accurate version of the Level 1 BLAS routine SROTG, + !> with the following other differences: + !> F and G are unchanged on return. + !> If G=0, then CS=(+/-)1 and SN=0. + !> If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1. + !> The sign is chosen so that R >= 0. + + pure subroutine stdlib_slartgp( f, g, cs, sn, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(out) :: cs, r, sn + real(sp), intent(in) :: f, g + ! ===================================================================== + + + + ! Local Scalars + ! logical first + integer(ilp) :: count, i + real(sp) :: eps, f1, g1, safmin, safmn2, safmx2, scale + ! Intrinsic Functions + intrinsic :: abs,int,log,max,sign,sqrt + ! Save Statement + ! save first, safmx2, safmin, safmn2 + ! Data Statements + ! data first / .true. / + ! Executable Statements + ! if( first ) then + safmin = stdlib_slamch( 'S' ) + eps = stdlib_slamch( 'E' ) + safmn2 = stdlib_slamch( 'B' )**int( log( safmin / eps ) /log( stdlib_slamch( 'B' ) )& + / two,KIND=ilp) + safmx2 = one / safmn2 + ! first = .false. + ! end if + if( g==zero ) then + cs = sign( one, f ) + sn = zero + r = abs( f ) + else if( f==zero ) then + cs = zero + sn = sign( one, g ) + r = abs( g ) + else + f1 = f + g1 = g + scale = max( abs( f1 ), abs( g1 ) ) + if( scale>=safmx2 ) then + count = 0 + 10 continue + count = count + 1 + f1 = f1*safmn2 + g1 = g1*safmn2 + scale = max( abs( f1 ), abs( g1 ) ) + if( scale>=safmx2 .and. count < 20)go to 10 + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + do i = 1, count + r = r*safmx2 + end do + else if( scale<=safmn2 ) then + count = 0 + 30 continue + count = count + 1 + f1 = f1*safmx2 + g1 = g1*safmx2 + scale = max( abs( f1 ), abs( g1 ) ) + if( scale<=safmn2 )go to 30 + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + do i = 1, count + r = r*safmn2 + end do + else + r = sqrt( f1**2+g1**2 ) + cs = f1 / r + sn = g1 / r + end if + if( r SLARTGS: generates a plane rotation designed to introduce a bulge in + !> Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD + !> problem. X and Y are the top-row entries, and SIGMA is the shift. + !> The computed CS and SN define a plane rotation satisfying + !> [ CS SN ] . [ X^2 - SIGMA ] = [ R ], + !> [ -SN CS ] [ X * Y ] [ 0 ] + !> with R nonnegative. If X^2 - SIGMA and X * Y are 0, then the + !> rotation is by PI/2. + + pure subroutine stdlib_slartgs( x, y, sigma, cs, sn ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(out) :: cs, sn + real(sp), intent(in) :: sigma, x, y + ! =================================================================== + + ! Local Scalars + real(sp) :: r, s, thresh, w, z + thresh = stdlib_slamch('E') + ! compute the first column of b**t*b - sigma^2*i, up to a scale + ! factor. + if( (sigma == zero .and. abs(x) < thresh) .or.(abs(x) == sigma .and. y == zero) ) & + then + z = zero + w = zero + else if( sigma == zero ) then + if( x >= zero ) then + z = x + w = y + else + z = -x + w = -y + end if + else if( abs(x) < thresh ) then + z = -sigma*sigma + w = zero + else + if( x >= zero ) then + s = one + else + s = negone + end if + z = s * (abs(x)-sigma) * (s+sigma/x) + w = s * y + end if + ! generate the rotation. + ! call stdlib_slartgp( z, w, cs, sn, r ) might seem more natural; + ! reordering the arguments ensures that if z = 0 then the rotation + ! is by pi/2. + call stdlib_slartgp( w, z, sn, cs, r ) + return + ! end stdlib_slartgs + end subroutine stdlib_slartgs + + !> SLARTV: applies a vector of real plane rotations to elements of the + !> real vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -s(i) c(i) ) ( y(i) ) + + pure subroutine stdlib_slartv( n, x, incx, y, incy, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(sp), intent(in) :: c(*), s(*) + real(sp), intent(inout) :: x(*), y(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + real(sp) :: xi, yi + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( iy ) + x( ix ) = c( ic )*xi + s( ic )*yi + y( iy ) = c( ic )*yi - s( ic )*xi + ix = ix + incx + iy = iy + incy + ic = ic + incc + end do + return + end subroutine stdlib_slartv + + !> SLARUV: returns a vector of n random real numbers from a uniform (0,1) + !> distribution (n <= 128). + !> This is an auxiliary routine called by SLARNV and CLARNV. + + pure subroutine stdlib_slaruv( iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + real(sp), intent(out) :: x(n) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + integer(ilp), parameter :: ipw2 = 4096 + real(sp), parameter :: r = one/ipw2 + + + + ! Local Scalars + integer(ilp) :: i, i1, i2, i3, i4, it1, it2, it3, it4, j + ! Local Arrays + integer(ilp) :: mm(lv,4) + ! Intrinsic Functions + intrinsic :: min,mod,real + ! Data Statements + mm(1,1:4)=[494,322,2508,2549] + mm(2,1:4)=[2637,789,3754,1145] + mm(3,1:4)=[255,1440,1766,2253] + mm(4,1:4)=[2008,752,3572,305] + mm(5,1:4)=[1253,2859,2893,3301] + mm(6,1:4)=[3344,123,307,1065] + mm(7,1:4)=[4084,1848,1297,3133] + mm(8,1:4)=[1739,643,3966,2913] + mm(9,1:4)=[3143,2405,758,3285] + mm(10,1:4)=[3468,2638,2598,1241] + mm(11,1:4)=[688,2344,3406,1197] + mm(12,1:4)=[1657,46,2922,3729] + mm(13,1:4)=[1238,3814,1038,2501] + mm(14,1:4)=[3166,913,2934,1673] + mm(15,1:4)=[1292,3649,2091,541] + mm(16,1:4)=[3422,339,2451,2753] + mm(17,1:4)=[1270,3808,1580,949] + mm(18,1:4)=[2016,822,1958,2361] + mm(19,1:4)=[154,2832,2055,1165] + mm(20,1:4)=[2862,3078,1507,4081] + mm(21,1:4)=[697,3633,1078,2725] + mm(22,1:4)=[1706,2970,3273,3305] + mm(23,1:4)=[491,637,17,3069] + mm(24,1:4)=[931,2249,854,3617] + mm(25,1:4)=[1444,2081,2916,3733] + mm(26,1:4)=[444,4019,3971,409] + mm(27,1:4)=[3577,1478,2889,2157] + mm(28,1:4)=[3944,242,3831,1361] + mm(29,1:4)=[2184,481,2621,3973] + mm(30,1:4)=[1661,2075,1541,1865] + mm(31,1:4)=[3482,4058,893,2525] + mm(32,1:4)=[657,622,736,1409] + mm(33,1:4)=[3023,3376,3992,3445] + mm(34,1:4)=[3618,812,787,3577] + mm(35,1:4)=[1267,234,2125,77] + mm(36,1:4)=[1828,641,2364,3761] + mm(37,1:4)=[164,4005,2460,2149] + mm(38,1:4)=[3798,1122,257,1449] + mm(39,1:4)=[3087,3135,1574,3005] + mm(40,1:4)=[2400,2640,3912,225] + mm(41,1:4)=[2870,2302,1216,85] + mm(42,1:4)=[3876,40,3248,3673] + mm(43,1:4)=[1905,1832,3401,3117] + mm(44,1:4)=[1593,2247,2124,3089] + mm(45,1:4)=[1797,2034,2762,1349] + mm(46,1:4)=[1234,2637,149,2057] + mm(47,1:4)=[3460,1287,2245,413] + mm(48,1:4)=[328,1691,166,65] + mm(49,1:4)=[2861,496,466,1845] + mm(50,1:4)=[1950,1597,4018,697] + mm(51,1:4)=[617,2394,1399,3085] + mm(52,1:4)=[2070,2584,190,3441] + mm(53,1:4)=[3331,1843,2879,1573] + mm(54,1:4)=[769,336,153,3689] + mm(55,1:4)=[1558,1472,2320,2941] + mm(56,1:4)=[2412,2407,18,929] + mm(57,1:4)=[2800,433,712,533] + mm(58,1:4)=[189,2096,2159,2841] + mm(59,1:4)=[287,1761,2318,4077] + mm(60,1:4)=[2045,2810,2091,721] + mm(61,1:4)=[1227,566,3443,2821] + mm(62,1:4)=[2838,442,1510,2249] + mm(63,1:4)=[209,41,449,2397] + mm(64,1:4)=[2770,1238,1956,2817] + mm(65,1:4)=[3654,1086,2201,245] + mm(66,1:4)=[3993,603,3137,1913] + mm(67,1:4)=[192,840,3399,1997] + mm(68,1:4)=[2253,3168,1321,3121] + mm(69,1:4)=[3491,1499,2271,997] + mm(70,1:4)=[2889,1084,3667,1833] + mm(71,1:4)=[2857,3438,2703,2877] + mm(72,1:4)=[2094,2408,629,1633] + mm(73,1:4)=[1818,1589,2365,981] + mm(74,1:4)=[688,2391,2431,2009] + mm(75,1:4)=[1407,288,1113,941] + mm(76,1:4)=[634,26,3922,2449] + mm(77,1:4)=[3231,512,2554,197] + mm(78,1:4)=[815,1456,184,2441] + mm(79,1:4)=[3524,171,2099,285] + mm(80,1:4)=[1914,1677,3228,1473] + mm(81,1:4)=[516,2657,4012,2741] + mm(82,1:4)=[164,2270,1921,3129] + mm(83,1:4)=[303,2587,3452,909] + mm(84,1:4)=[2144,2961,3901,2801] + mm(85,1:4)=[3480,1970,572,421] + mm(86,1:4)=[119,1817,3309,4073] + mm(87,1:4)=[3357,676,3171,2813] + mm(88,1:4)=[837,1410,817,2337] + mm(89,1:4)=[2826,3723,3039,1429] + mm(90,1:4)=[2332,2803,1696,1177] + mm(91,1:4)=[2089,3185,1256,1901] + mm(92,1:4)=[3780,184,3715,81] + mm(93,1:4)=[1700,663,2077,1669] + mm(94,1:4)=[3712,499,3019,2633] + mm(95,1:4)=[150,3784,1497,2269] + mm(96,1:4)=[2000,1631,1101,129] + mm(97,1:4)=[3375,1925,717,1141] + mm(98,1:4)=[1621,3912,51,249] + mm(99,1:4)=[3090,1398,981,3917] + mm(100,1:4)=[3765,1349,1978,2481] + mm(101,1:4)=[1149,1441,1813,3941] + mm(102,1:4)=[3146,2224,3881,2217] + mm(103,1:4)=[33,2411,76,2749] + mm(104,1:4)=[3082,1907,3846,3041] + mm(105,1:4)=[2741,3192,3694,1877] + mm(106,1:4)=[359,2786,1682,345] + mm(107,1:4)=[3316,382,124,2861] + mm(108,1:4)=[1749,37,1660,1809] + mm(109,1:4)=[185,759,3997,3141] + mm(110,1:4)=[2784,2948,479,2825] + mm(111,1:4)=[2202,1862,1141,157] + mm(112,1:4)=[2199,3802,886,2881] + mm(113,1:4)=[1364,2423,3514,3637] + mm(114,1:4)=[1244,2051,1301,1465] + mm(115,1:4)=[2020,2295,3604,2829] + mm(116,1:4)=[3160,1332,1888,2161] + mm(117,1:4)=[2785,1832,1836,3365] + mm(118,1:4)=[2772,2405,1990,361] + mm(119,1:4)=[1217,3638,2058,2685] + mm(120,1:4)=[1822,3661,692,3745] + mm(121,1:4)=[1245,327,1194,2325] + mm(122,1:4)=[2252,3660,20,3609] + mm(123,1:4)=[3904,716,3285,3821] + mm(124,1:4)=[2774,1842,2046,3537] + mm(125,1:4)=[997,3987,2107,517] + mm(126,1:4)=[2573,1368,3508,3017] + mm(127,1:4)=[1148,1848,3525,2141] + mm(128,1:4)=[545,2366,3801,1537] + ! Executable Statements + i1 = iseed( 1 ) + i2 = iseed( 2 ) + i3 = iseed( 3 ) + i4 = iseed( 4 ) + loop_10: do i = 1, min( n, lv ) + 20 continue + ! multiply the seed by i-th power of the multiplier modulo 2**48 + it4 = i4*mm( i, 4 ) + it3 = it4 / ipw2 + it4 = it4 - ipw2*it3 + it3 = it3 + i3*mm( i, 4 ) + i4*mm( i, 3 ) + it2 = it3 / ipw2 + it3 = it3 - ipw2*it2 + it2 = it2 + i2*mm( i, 4 ) + i3*mm( i, 3 ) + i4*mm( i, 2 ) + it1 = it2 / ipw2 + it2 = it2 - ipw2*it1 + it1 = it1 + i1*mm( i, 4 ) + i2*mm( i, 3 ) + i3*mm( i, 2 ) +i4*mm( i, 1 ) + it1 = mod( it1, ipw2 ) + ! convert 48-bit integer to a realnumber in the interval (0,1,KIND=sp) + x( i ) = r*( real( it1,KIND=sp)+r*( real( it2,KIND=sp)+r*( real( it3,KIND=sp)+& + r*real( it4,KIND=sp) ) ) ) + if (x( i )==1.0_sp) then + ! if a real number has n bits of precision, and the first + ! n bits of the 48-bit integer above happen to be all 1 (which + ! will occur about once every 2**n calls), then x( i ) will + ! be rounded to exactly one. in ieee single precision arithmetic, + ! this will happen relatively often since n = 24. + ! since x( i ) is not supposed to return exactly 0.0_sp or 1.0_sp, + ! the statistically correct thing to do in this situation is + ! simply to iterate again. + ! n.b. the case x( i ) = 0.0_sp should not be possible. + i1 = i1 + 2 + i2 = i2 + 2 + i3 = i3 + 2 + i4 = i4 + 2 + goto 20 + end if + end do loop_10 + ! return final value of seed + iseed( 1 ) = it1 + iseed( 2 ) = it2 + iseed( 3 ) = it3 + iseed( 4 ) = it4 + return + end subroutine stdlib_slaruv + + !> SLARZ: applies a real elementary reflector H to a real M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**T + !> where tau is a real scalar and v is a real vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> H is a product of k elementary reflectors as returned by STZRZF. + + pure subroutine stdlib_slarz( side, m, n, l, v, incv, tau, c, ldc, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, l, ldc, m, n + real(sp), intent(in) :: tau + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(in) :: v(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Executable Statements + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c + if( tau/=zero ) then + ! w( 1:n ) = c( 1, 1:n ) + call stdlib_scopy( n, c, ldc, work, 1 ) + ! w( 1:n ) = w( 1:n ) + c( m-l+1:m, 1:n )**t * v( 1:l ) + call stdlib_sgemv( 'TRANSPOSE', l, n, one, c( m-l+1, 1 ), ldc, v,incv, one, work,& + 1 ) + ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n ) + call stdlib_saxpy( n, -tau, work, 1, c, ldc ) + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! tau * v( 1:l ) * w( 1:n )**t + call stdlib_sger( l, n, -tau, v, incv, work, 1, c( m-l+1, 1 ),ldc ) + end if + else + ! form c * h + if( tau/=zero ) then + ! w( 1:m ) = c( 1:m, 1 ) + call stdlib_scopy( m, c, 1, work, 1 ) + ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l ) + call stdlib_sgemv( 'NO TRANSPOSE', m, l, one, c( 1, n-l+1 ), ldc,v, incv, one, & + work, 1 ) + ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m ) + call stdlib_saxpy( m, -tau, work, 1, c, 1 ) + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! tau * w( 1:m ) * v( 1:l )**t + call stdlib_sger( m, l, -tau, work, 1, v, incv, c( 1, n-l+1 ),ldc ) + end if + end if + return + end subroutine stdlib_slarz + + !> SLARZB: applies a real block reflector H or its transpose H**T to + !> a real distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_slarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*), t(ldt,*), v(ldv,*) + real(sp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, info, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -3 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLARZB', -info ) + return + end if + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'T' + else + transt = 'N' + end if + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**t * c + ! w( 1:n, 1:k ) = c( 1:k, 1:n )**t + do j = 1, k + call stdlib_scopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ... + ! c( m-l+1:m, 1:n )**t * v( 1:k, 1:l )**t + if( l>0 )call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE', n, k, l, one,c( m-l+1, 1 ), & + ldc, v, ldv, one, work, ldwork ) + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t or w( 1:m, 1:k ) * t + call stdlib_strmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k, one, t,ldt, work, & + ldwork ) + ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**t + do j = 1, n + do i = 1, k + c( i, j ) = c( i, j ) - work( j, i ) + end do + end do + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! v( 1:k, 1:l )**t * w( 1:n, 1:k )**t + if( l>0 )call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE', l, n, k, -one, v, ldv,work, & + ldwork, one, c( m-l+1, 1 ), ldc ) + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**t + ! w( 1:m, 1:k ) = c( 1:m, 1:k ) + do j = 1, k + call stdlib_scopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ... + ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**t + if( l>0 )call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, l, one,c( 1, n-l+1 ),& + ldc, v, ldv, one, work, ldwork ) + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * t or w( 1:m, 1:k ) * t**t + call stdlib_strmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k, one, t,ldt, work, & + ldwork ) + ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k ) + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! w( 1:m, 1:k ) * v( 1:k, 1:l ) + if( l>0 )call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, l, k, -one,work, & + ldwork, v, ldv, one, c( 1, n-l+1 ), ldc ) + end if + return + end subroutine stdlib_slarzb + + !> SLARZT: forms the triangular factor T of a real block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**T + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**T * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_slarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + real(sp), intent(out) :: t(ldt,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + ! Executable Statements + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -1 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLARZT', -info ) + return + end if + do i = k, 1, -1 + if( tau( i )==zero ) then + ! h(i) = i + do j = i, k + t( j, i ) = zero + end do + else + ! general case + if( i SLAS2: computes the singular values of the 2-by-2 matrix + !> [ F G ] + !> [ 0 H ]. + !> On return, SSMIN is the smaller singular value and SSMAX is the + !> larger singular value. + + pure subroutine stdlib_slas2( f, g, h, ssmin, ssmax ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: f, g, h + real(sp), intent(out) :: ssmax, ssmin + ! ==================================================================== + + + + ! Local Scalars + real(sp) :: as, at, au, c, fa, fhmn, fhmx, ga, ha + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + fa = abs( f ) + ga = abs( g ) + ha = abs( h ) + fhmn = min( fa, ha ) + fhmx = max( fa, ha ) + if( fhmn==zero ) then + ssmin = zero + if( fhmx==zero ) then + ssmax = ga + else + ssmax = max( fhmx, ga )*sqrt( one+( min( fhmx, ga ) / max( fhmx, ga ) )**2 ) + + end if + else + if( ga This subroutine computes the square root of the I-th eigenvalue + !> of a positive symmetric rank-one modification of a 2-by-2 diagonal + !> matrix + !> diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + !> The diagonal entries in the array D are assumed to satisfy + !> 0 <= D(i) < D(j) for i < j . + !> We also assume RHO > 0 and that the Euclidean norm of the vector + !> Z is one. + + pure subroutine stdlib_slasd5( i, d, z, delta, rho, dsigma, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i + real(sp), intent(out) :: dsigma + real(sp), intent(in) :: rho + ! Array Arguments + real(sp), intent(in) :: d(2), z(2) + real(sp), intent(out) :: delta(2), work(2) + ! ===================================================================== + + ! Local Scalars + real(sp) :: b, c, del, delsq, tau, w + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + del = d( 2 ) - d( 1 ) + delsq = del*( d( 2 )+d( 1 ) ) + if( i==1 ) then + w = one + four*rho*( z( 2 )*z( 2 ) / ( d( 1 )+three*d( 2 ) )-z( 1 )*z( 1 ) / ( & + three*d( 1 )+d( 2 ) ) ) / del + if( w>zero ) then + b = delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 1 )*z( 1 )*delsq + ! b > zero, always + ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 ) + tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) + ! the following tau is dsigma - d( 1 ) + tau = tau / ( d( 1 )+sqrt( d( 1 )*d( 1 )+tau ) ) + dsigma = d( 1 ) + tau + delta( 1 ) = -tau + delta( 2 ) = del - tau + work( 1 ) = two*d( 1 ) + tau + work( 2 ) = ( d( 1 )+tau ) + d( 2 ) + ! delta( 1 ) = -z( 1 ) / tau + ! delta( 2 ) = z( 2 ) / ( del-tau ) + else + b = -delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*delsq + ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) + if( b>zero ) then + tau = -two*c / ( b+sqrt( b*b+four*c ) ) + else + tau = ( b-sqrt( b*b+four*c ) ) / two + end if + ! the following tau is dsigma - d( 2 ) + tau = tau / ( d( 2 )+sqrt( abs( d( 2 )*d( 2 )+tau ) ) ) + dsigma = d( 2 ) + tau + delta( 1 ) = -( del+tau ) + delta( 2 ) = -tau + work( 1 ) = d( 1 ) + tau + d( 2 ) + work( 2 ) = two*d( 2 ) + tau + ! delta( 1 ) = -z( 1 ) / ( del+tau ) + ! delta( 2 ) = -z( 2 ) / tau + end if + ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + ! delta( 1 ) = delta( 1 ) / temp + ! delta( 2 ) = delta( 2 ) / temp + else + ! now i=2 + b = -delsq + rho*( z( 1 )*z( 1 )+z( 2 )*z( 2 ) ) + c = rho*z( 2 )*z( 2 )*delsq + ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) + if( b>zero ) then + tau = ( b+sqrt( b*b+four*c ) ) / two + else + tau = two*c / ( -b+sqrt( b*b+four*c ) ) + end if + ! the following tau is dsigma - d( 2 ) + tau = tau / ( d( 2 )+sqrt( d( 2 )*d( 2 )+tau ) ) + dsigma = d( 2 ) + tau + delta( 1 ) = -( del+tau ) + delta( 2 ) = -tau + work( 1 ) = d( 1 ) + tau + d( 2 ) + work( 2 ) = two*d( 2 ) + tau + ! delta( 1 ) = -z( 1 ) / ( del+tau ) + ! delta( 2 ) = -z( 2 ) / tau + ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) + ! delta( 1 ) = delta( 1 ) / temp + ! delta( 2 ) = delta( 2 ) / temp + end if + return + end subroutine stdlib_slasd5 + + !> SLASDT: creates a tree of subproblems for bidiagonal divide and + !> conquer. + + pure subroutine stdlib_slasdt( n, lvl, nd, inode, ndiml, ndimr, msub ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: lvl, nd + integer(ilp), intent(in) :: msub, n + ! Array Arguments + integer(ilp), intent(out) :: inode(*), ndiml(*), ndimr(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, il, ir, llst, maxn, ncrnt, nlvl + real(sp) :: temp + ! Intrinsic Functions + intrinsic :: int,log,max,real + ! Executable Statements + ! find the number of levels on the tree. + maxn = max( 1, n ) + temp = log( real( maxn,KIND=sp) / real( msub+1,KIND=sp) ) / log( two ) + lvl = int( temp,KIND=ilp) + 1 + i = n / 2 + inode( 1 ) = i + 1 + ndiml( 1 ) = i + ndimr( 1 ) = n - i - 1 + il = 0 + ir = 1 + llst = 1 + do nlvl = 1, lvl - 1 + ! constructing the tree at (nlvl+1)-st level. the number of + ! nodes created on this level is llst * 2. + do i = 0, llst - 1 + il = il + 2 + ir = ir + 2 + ncrnt = llst + i + ndiml( il ) = ndiml( ncrnt ) / 2 + ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1 + inode( il ) = inode( ncrnt ) - ndimr( il ) - 1 + ndiml( ir ) = ndimr( ncrnt ) / 2 + ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1 + inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1 + end do + llst = llst*2 + end do + nd = llst*2 - 1 + return + end subroutine stdlib_slasdt + + !> SLASET: initializes an m-by-n matrix A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + + pure subroutine stdlib_slaset( uplo, m, n, alpha, beta, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(in) :: alpha, beta + ! Array Arguments + real(sp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + ! set the strictly upper triangular or trapezoidal part of the + ! array to alpha. + do j = 2, n + do i = 1, min( j-1, m ) + a( i, j ) = alpha + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + ! set the strictly lower triangular or trapezoidal part of the + ! array to alpha. + do j = 1, min( m, n ) + do i = j + 1, m + a( i, j ) = alpha + end do + end do + else + ! set the leading m-by-n submatrix to alpha. + do j = 1, n + do i = 1, m + a( i, j ) = alpha + end do + end do + end if + ! set the first min(m,n) diagonal elements to beta. + do i = 1, min( m, n ) + a( i, i ) = beta + end do + return + end subroutine stdlib_slaset + + !> SLASQ4: computes an approximation TAU to the smallest eigenvalue + !> using values of d from the previous transform. + + pure subroutine stdlib_slasq4( i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn,dn1, dn2, tau, & + ttype, g ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i0, n0, n0in, pp + integer(ilp), intent(out) :: ttype + real(sp), intent(in) :: dmin, dmin1, dmin2, dn, dn1, dn2 + real(sp), intent(inout) :: g + real(sp), intent(out) :: tau + ! Array Arguments + real(sp), intent(in) :: z(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: cnst1 = 0.5630_sp + real(sp), parameter :: cnst2 = 1.010_sp + real(sp), parameter :: cnst3 = 1.050_sp + real(sp), parameter :: qurtr = 0.250_sp + real(sp), parameter :: third = 0.3330_sp + real(sp), parameter :: hundrd = 100.0_sp + + + ! Local Scalars + integer(ilp) :: i4, nn, np + real(sp) :: a2, b1, b2, gam, gap1, gap2, s + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! a negative dmin forces the shift to take that absolute value + ! ttype records the type of shift. + if( dmin<=zero ) then + tau = -dmin + ttype = -1 + return + end if + nn = 4*n0 + pp + if( n0in==n0 ) then + ! no eigenvalues deflated. + if( dmin==dn .or. dmin==dn1 ) then + b1 = sqrt( z( nn-3 ) )*sqrt( z( nn-5 ) ) + b2 = sqrt( z( nn-7 ) )*sqrt( z( nn-9 ) ) + a2 = z( nn-7 ) + z( nn-5 ) + ! cases 2 and 3. + if( dmin==dn .and. dmin1==dn1 ) then + gap2 = dmin2 - a2 - dmin2*qurtr + if( gap2>zero .and. gap2>b2 ) then + gap1 = a2 - dn - ( b2 / gap2 )*b2 + else + gap1 = a2 - dn - ( b1+b2 ) + end if + if( gap1>zero .and. gap1>b1 ) then + s = max( dn-( b1 / gap1 )*b1, half*dmin ) + ttype = -2 + else + s = zero + if( dn>b1 )s = dn - b1 + if( a2>( b1+b2 ) )s = min( s, a2-( b1+b2 ) ) + s = max( s, third*dmin ) + ttype = -3 + end if + else + ! case 4. + ttype = -4 + s = qurtr*dmin + if( dmin==dn ) then + gam = dn + a2 = zero + if( z( nn-5 ) > z( nn-7 ) )return + b2 = z( nn-5 ) / z( nn-7 ) + np = nn - 9 + else + np = nn - 2*pp + gam = dn1 + if( z( np-4 ) > z( np-2 ) )return + a2 = z( np-4 ) / z( np-2 ) + if( z( nn-9 ) > z( nn-11 ) )return + b2 = z( nn-9 ) / z( nn-11 ) + np = nn - 13 + end if + ! approximate contribution to norm squared from i < nn-1. + a2 = a2 + b2 + do i4 = np, 4*i0 - 1 + pp, -4 + if( b2==zero )go to 20 + b1 = b2 + if( z( i4 ) > z( i4-2 ) )return + b2 = b2*( z( i4 ) / z( i4-2 ) ) + a2 = a2 + b2 + if( hundrd*max( b2, b1 ) nn-2. + np = nn - 2*pp + b1 = z( np-2 ) + b2 = z( np-6 ) + gam = dn2 + if( z( np-8 )>b2 .or. z( np-4 )>b1 )return + a2 = ( z( np-8 ) / b2 )*( one+z( np-4 ) / b1 ) + ! approximate contribution to norm squared from i < nn-2. + if( n0-i0>2 ) then + b2 = z( nn-13 ) / z( nn-15 ) + a2 = a2 + b2 + do i4 = nn - 17, 4*i0 - 1 + pp, -4 + if( b2==zero )go to 40 + b1 = b2 + if( z( i4 ) > z( i4-2 ) )return + b2 = b2*( z( i4 ) / z( i4-2 ) ) + a2 = a2 + b2 + if( hundrd*max( b2, b1 )z( nn-7 ) )return + b1 = z( nn-5 ) / z( nn-7 ) + b2 = b1 + if( b2==zero )go to 60 + do i4 = 4*n0 - 9 + pp, 4*i0 - 1 + pp, -4 + a2 = b1 + if( z( i4 )>z( i4-2 ) )return + b1 = b1*( z( i4 ) / z( i4-2 ) ) + b2 = b2 + b1 + if( hundrd*max( b1, a2 )zero .and. gap2>b2*a2 ) then + s = max( s, a2*( one-cnst2*a2*( b2 / gap2 )*b2 ) ) + else + s = max( s, a2*( one-cnst2*b2 ) ) + ttype = -8 + end if + else + ! case 9. + s = qurtr*dmin1 + if( dmin1==dn1 )s = half*dmin1 + ttype = -9 + end if + else if( n0in==( n0+2 ) ) then + ! two eigenvalues deflated. use dmin2, dn2 for dmin and dn. + ! cases 10 and 11. + if( dmin2==dn2 .and. two*z( nn-5 )z( nn-7 ) )return + b1 = z( nn-5 ) / z( nn-7 ) + b2 = b1 + if( b2==zero )go to 80 + do i4 = 4*n0 - 9 + pp, 4*i0 - 1 + pp, -4 + if( z( i4 )>z( i4-2 ) )return + b1 = b1*( z( i4 ) / z( i4-2 ) ) + b2 = b2 + b1 + if( hundrd*b1zero .and. gap2>b2*a2 ) then + s = max( s, a2*( one-cnst2*a2*( b2 / gap2 )*b2 ) ) + else + s = max( s, a2*( one-cnst2*b2 ) ) + end if + else + s = qurtr*dmin2 + ttype = -11 + end if + else if( n0in>( n0+2 ) ) then + ! case 12, more than two eigenvalues deflated. no information. + s = zero + ttype = -12 + end if + tau = s + return + end subroutine stdlib_slasq4 + + !> SLASQ5: computes one dqds transform in ping-pong form, one + !> version for IEEE machines another for non IEEE machines. + + pure subroutine stdlib_slasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2,dn, dnm1, dnm2, & + ieee, eps ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0, n0, pp + real(sp), intent(out) :: dmin, dmin1, dmin2, dn, dnm1, dnm2 + real(sp), intent(inout) :: tau + real(sp), intent(in) :: sigma, eps + ! Array Arguments + real(sp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j4, j4p2 + real(sp) :: d, emin, temp, dthresh + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( ( n0-i0-1 )<=0 )return + dthresh = eps*(sigma+tau) + if( tau SLASQ6: computes one dqd (shift equal to zero) transform in + !> ping-pong form, with protection against underflow and overflow. + + pure subroutine stdlib_slasq6( i0, n0, z, pp, dmin, dmin1, dmin2, dn,dnm1, dnm2 ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i0, n0, pp + real(sp), intent(out) :: dmin, dmin1, dmin2, dn, dnm1, dnm2 + ! Array Arguments + real(sp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j4, j4p2 + real(sp) :: d, emin, safmin, temp + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( ( n0-i0-1 )<=0 )return + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + j4 = 4*i0 + pp - 3 + emin = z( j4+4 ) + d = z( j4 ) + dmin = d + if( pp==0 ) then + do j4 = 4*i0, 4*( n0-3 ), 4 + z( j4-2 ) = d + z( j4-1 ) + if( z( j4-2 )==zero ) then + z( j4 ) = zero + d = z( j4+1 ) + dmin = d + emin = zero + else if( safmin*z( j4+1 ) SLASR: applies a sequence of plane rotations to a real matrix A, + !> from either the left or the right. + !> When SIDE = 'L', the transformation takes the form + !> A := P*A + !> and when SIDE = 'R', the transformation takes the form + !> A := A*P**T + !> where P is an orthogonal matrix consisting of a sequence of z plane + !> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + !> and P**T is the transpose of P. + !> When DIRECT = 'F' (Forward sequence), then + !> P = P(z-1) * ... * P(2) * P(1) + !> and when DIRECT = 'B' (Backward sequence), then + !> P = P(1) * P(2) * ... * P(z-1) + !> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + !> R(k) = ( c(k) s(k) ) + !> = ( -s(k) c(k) ). + !> When PIVOT = 'V' (Variable pivot), the rotation is performed + !> for the plane (k,k+1), i.e., P(k) has the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears as a rank-2 modification to the identity matrix in + !> rows and columns k and k+1. + !> When PIVOT = 'T' (Top pivot), the rotation is performed for the + !> plane (1,k+1), so P(k) has the form + !> P(k) = ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears in rows and columns 1 and k+1. + !> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + !> performed for the plane (k,z), giving P(k) the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> where R(k) appears in rows and columns k and z. The rotations are + !> performed without ever forming P(k) explicitly. + + pure subroutine stdlib_slasr( side, pivot, direct, m, n, c, s, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, pivot, side + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: c(*), s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + real(sp) :: ctemp, stemp, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.( stdlib_lsame( side, 'L' ) .or. stdlib_lsame( side, 'R' ) ) ) then + info = 1 + else if( .not.( stdlib_lsame( pivot, 'V' ) .or. stdlib_lsame( pivot,'T' ) .or. & + stdlib_lsame( pivot, 'B' ) ) ) then + info = 2 + else if( .not.( stdlib_lsame( direct, 'F' ) .or. stdlib_lsame( direct, 'B' ) ) )& + then + info = 3 + else if( m<0 ) then + info = 4 + else if( n<0 ) then + info = 5 + else if( lda Sort the numbers in D in increasing order (if ID = 'I') or + !> in decreasing order (if ID = 'D' ). + !> Use Quick Sort, reverting to Insertion sort on arrays of + !> size <= 20. Dimension of STACK limits N to about 2**32. + + pure subroutine stdlib_slasrt( id, n, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: id + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: d(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: select = 20 + + ! Local Scalars + integer(ilp) :: dir, endd, i, j, start, stkpnt + real(sp) :: d1, d2, d3, dmnmx, tmp + ! Local Arrays + integer(ilp) :: stack(2,32) + ! Executable Statements + ! test the input parameters. + info = 0 + dir = -1 + if( stdlib_lsame( id, 'D' ) ) then + dir = 0 + else if( stdlib_lsame( id, 'I' ) ) then + dir = 1 + end if + if( dir==-1 ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLASRT', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + stkpnt = 1 + stack( 1, 1 ) = 1 + stack( 2, 1 ) = n + 10 continue + start = stack( 1, stkpnt ) + endd = stack( 2, stkpnt ) + stkpnt = stkpnt - 1 + if( endd-start<=select .and. endd-start>0 ) then + ! do insertion sort on d( start:endd ) + if( dir==0 ) then + ! sort into decreasing order + loop_30: do i = start + 1, endd + do j = i, start + 1, -1 + if( d( j )>d( j-1 ) ) then + dmnmx = d( j ) + d( j ) = d( j-1 ) + d( j-1 ) = dmnmx + else + cycle loop_30 + end if + end do + end do loop_30 + else + ! sort into increasing order + loop_50: do i = start + 1, endd + do j = i, start + 1, -1 + if( d( j )select ) then + ! partition d( start:endd ) and stack parts, largest one first + ! choose partition entry as median of 3 + d1 = d( start ) + d2 = d( endd ) + i = ( start+endd ) / 2 + d3 = d( i ) + if( d1dmnmx )go to 80 + if( iendd-j-1 ) then + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + else + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + end if + else + ! sort into increasing order + i = start - 1 + j = endd + 1 + 90 continue + 100 continue + j = j - 1 + if( d( j )>dmnmx )go to 100 + 110 continue + i = i + 1 + if( d( i )endd-j-1 ) then + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + else + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = j + 1 + stack( 2, stkpnt ) = endd + stkpnt = stkpnt + 1 + stack( 1, stkpnt ) = start + stack( 2, stkpnt ) = j + end if + end if + end if + if( stkpnt>0 )go to 10 + return + end subroutine stdlib_slasrt + + !> ! + !> + !> SLASSQ: returns the values scl and smsq such that + !> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + !> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + !> assumed to be non-negative. + !> scale and sumsq must be supplied in SCALE and SUMSQ and + !> scl and smsq are overwritten on SCALE and SUMSQ respectively. + !> If scale * sqrt( sumsq ) > tbig then + !> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, + !> and if 0 < scale * sqrt( sumsq ) < tsml then + !> we require: scale <= sqrt( HUGE ) / ssml on entry, + !> where + !> tbig -- upper threshold for values whose square is representable; + !> sbig -- scaling constant for big numbers; \see la_constants.f90 + !> tsml -- lower threshold for values whose square is representable; + !> ssml -- scaling constant for small numbers; \see la_constants.f90 + !> and + !> TINY*EPS -- tiniest representable number; + !> HUGE -- biggest representable number. + + pure subroutine stdlib_slassq( n, x, incx, scl, sumsq ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(sp), intent(inout) :: scl, sumsq + ! Array Arguments + real(sp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(sp) :: abig, amed, asml, ax, ymax, ymin + ! quick return if possible + if( ieee_is_nan(scl) .or. ieee_is_nan(sumsq) ) return + if( sumsq == zero ) scl = one + if( scl == zero ) then + scl = one + sumsq = zero + end if + if (n <= 0) then + return + end if + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(x(ix)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! put the existing sum of squares into one of the accumulators + if( sumsq > zero ) then + ax = scl*sqrt( sumsq ) + if (ax > tbig) then + ! we assume scl >= sqrt( tiny*eps ) / sbig + abig = abig + (scl*sbig)**2 * sumsq + else if (ax < tsml) then + ! we assume scl <= sqrt( huge ) / ssml + if (notbig) asml = asml + (scl*ssml)**2 * sumsq + else + amed = amed + scl**2 * sumsq + end if + end if + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range or zero + scl = one + sumsq = amed + end if + return + end subroutine stdlib_slassq + + !> SLASV2: computes the singular value decomposition of a 2-by-2 + !> triangular matrix + !> [ F G ] + !> [ 0 H ]. + !> On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the + !> smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and + !> right singular vectors for abs(SSMAX), giving the decomposition + !> [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] + !> [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. + + pure subroutine stdlib_slasv2( f, g, h, ssmin, ssmax, snr, csr, snl, csl ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(out) :: csl, csr, snl, snr, ssmax, ssmin + real(sp), intent(in) :: f, g, h + ! ===================================================================== + + + + + + ! Local Scalars + logical(lk) :: gasmal, swap + integer(ilp) :: pmax + real(sp) :: a, clt, crt, d, fa, ft, ga, gt, ha, ht, l, m, mm, r, s, slt, srt, t, temp, & + tsign, tt + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ft = f + fa = abs( ft ) + ht = h + ha = abs( h ) + ! pmax points to the maximum absolute element of matrix + ! pmax = 1 if f largest in absolute values + ! pmax = 2 if g largest in absolute values + ! pmax = 3 if h largest in absolute values + pmax = 1 + swap = ( ha>fa ) + if( swap ) then + pmax = 3 + temp = ft + ft = ht + ht = temp + temp = fa + fa = ha + ha = temp + ! now fa .ge. ha + end if + gt = g + ga = abs( gt ) + if( ga==zero ) then + ! diagonal matrix + ssmin = ha + ssmax = fa + clt = one + crt = one + slt = zero + srt = zero + else + gasmal = .true. + if( ga>fa ) then + pmax = 2 + if( ( fa / ga )one ) then + ssmin = fa / ( ga / ha ) + else + ssmin = ( fa / ga )*ha + end if + clt = one + slt = ht / gt + srt = one + crt = ft / gt + end if + end if + if( gasmal ) then + ! normal case + d = fa - ha + if( d==fa ) then + ! copes with infinite f or h + l = one + else + l = d / fa + end if + ! note that 0 .le. l .le. 1 + m = gt / ft + ! note that abs(m) .le. 1/macheps + t = two - l + ! note that t .ge. 1 + mm = m*m + tt = t*t + s = sqrt( tt+mm ) + ! note that 1 .le. s .le. 1 + 1/macheps + if( l==zero ) then + r = abs( m ) + else + r = sqrt( l*l+mm ) + end if + ! note that 0 .le. r .le. 1 + 1/macheps + a = half*( s+r ) + ! note that 1 .le. a .le. 1 + abs(m) + ssmin = ha / a + ssmax = fa*a + if( mm==zero ) then + ! note that m is very tiny + if( l==zero ) then + t = sign( two, ft )*sign( one, gt ) + else + t = gt / sign( d, ft ) + m / t + end if + else + t = ( m / ( s+t )+m / ( r+l ) )*( one+a ) + end if + l = sqrt( t*t+four ) + crt = two / l + srt = t / l + clt = ( crt+srt*m ) / a + slt = ( ht / ft )*srt / a + end if + end if + if( swap ) then + csl = srt + snl = crt + csr = slt + snr = clt + else + csl = clt + snl = slt + csr = crt + snr = srt + end if + ! correct signs of ssmax and ssmin + if( pmax==1 )tsign = sign( one, csr )*sign( one, csl )*sign( one, f ) + if( pmax==2 )tsign = sign( one, snr )*sign( one, csl )*sign( one, g ) + if( pmax==3 )tsign = sign( one, snr )*sign( one, snl )*sign( one, h ) + ssmax = sign( ssmax, tsign ) + ssmin = sign( ssmin, tsign*sign( one, f )*sign( one, h ) ) + return + end subroutine stdlib_slasv2 + + !> SLASWP: performs a series of row interchanges on the matrix A. + !> One row interchange is initiated for each of rows K1 through K2 of A. + + pure subroutine stdlib_slaswp( n, a, lda, k1, k2, ipiv, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k1, k2, lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32 + real(sp) :: temp + ! Executable Statements + ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows + ! k1 through k2. + if( incx>0 ) then + ix0 = k1 + i1 = k1 + i2 = k2 + inc = 1 + else if( incx<0 ) then + ix0 = k1 + ( k1-k2 )*incx + i1 = k2 + i2 = k1 + inc = -1 + else + return + end if + n32 = ( n / 32 )*32 + if( n32/=0 ) then + do j = 1, n32, 32 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = j, j + 31 + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end do + end if + if( n32/=n ) then + n32 = n32 + 1 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = n32, n + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end if + return + end subroutine stdlib_slaswp + + !> SLASY2: solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in + !> op(TL)*X + ISGN*X*op(TR) = SCALE*B, + !> where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or + !> -1. op(T) = T or T**T, where T**T denotes the transpose of T. + + pure subroutine stdlib_slasy2( ltranl, ltranr, isgn, n1, n2, tl, ldtl, tr,ldtr, b, ldb, & + scale, x, ldx, xnorm, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ltranl, ltranr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, ldb, ldtl, ldtr, ldx, n1, n2 + real(sp), intent(out) :: scale, xnorm + ! Array Arguments + real(sp), intent(in) :: b(ldb,*), tl(ldtl,*), tr(ldtr,*) + real(sp), intent(out) :: x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: bswap, xswap + integer(ilp) :: i, ip, ipiv, ipsv, j, jp, jpsv, k + real(sp) :: bet, eps, gam, l21, sgn, smin, smlnum, tau1, temp, u11, u12, u22, & + xmax + ! Local Arrays + logical(lk) :: bswpiv(4), xswpiv(4) + integer(ilp) :: jpiv(4), locl21(4), locu12(4), locu22(4) + real(sp) :: btmp(4), t16(4,4), tmp(4), x2(2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Data Statements + locu12 = [3,4,1,2] + locl21 = [2,1,4,3] + locu22 = [4,3,2,1] + xswpiv = [.false.,.false.,.true.,.true.] + bswpiv = [.false.,.true.,.false.,.true.] + ! Executable Statements + ! do not check the input parameters for errors + info = 0 + ! quick return if possible + if( n1==0 .or. n2==0 )return + ! set constants to control overflow + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + sgn = isgn + k = n1 + n1 + n2 - 2 + go to ( 10, 20, 30, 50 )k + ! 1 by 1: tl11*x + sgn*x*tr11 = b11 + 10 continue + tau1 = tl( 1, 1 ) + sgn*tr( 1, 1 ) + bet = abs( tau1 ) + if( bet<=smlnum ) then + tau1 = smlnum + bet = smlnum + info = 1 + end if + scale = one + gam = abs( b( 1, 1 ) ) + if( smlnum*gam>bet )scale = one / gam + x( 1, 1 ) = ( b( 1, 1 )*scale ) / tau1 + xnorm = abs( x( 1, 1 ) ) + return + ! 1 by 2: + ! tl11*[x11 x12] + isgn*[x11 x12]*op[tr11 tr12] = [b11 b12] + ! [tr21 tr22] + 20 continue + smin = max( eps*max( abs( tl( 1, 1 ) ), abs( tr( 1, 1 ) ),abs( tr( 1, 2 ) ), abs( tr( & + 2, 1 ) ), abs( tr( 2, 2 ) ) ),smlnum ) + tmp( 1 ) = tl( 1, 1 ) + sgn*tr( 1, 1 ) + tmp( 4 ) = tl( 1, 1 ) + sgn*tr( 2, 2 ) + if( ltranr ) then + tmp( 2 ) = sgn*tr( 2, 1 ) + tmp( 3 ) = sgn*tr( 1, 2 ) + else + tmp( 2 ) = sgn*tr( 1, 2 ) + tmp( 3 ) = sgn*tr( 2, 1 ) + end if + btmp( 1 ) = b( 1, 1 ) + btmp( 2 ) = b( 1, 2 ) + go to 40 + ! 2 by 1: + ! op[tl11 tl12]*[x11] + isgn* [x11]*tr11 = [b11] + ! [tl21 tl22] [x21] [x21] [b21] + 30 continue + smin = max( eps*max( abs( tr( 1, 1 ) ), abs( tl( 1, 1 ) ),abs( tl( 1, 2 ) ), abs( tl( & + 2, 1 ) ), abs( tl( 2, 2 ) ) ),smlnum ) + tmp( 1 ) = tl( 1, 1 ) + sgn*tr( 1, 1 ) + tmp( 4 ) = tl( 2, 2 ) + sgn*tr( 1, 1 ) + if( ltranl ) then + tmp( 2 ) = tl( 1, 2 ) + tmp( 3 ) = tl( 2, 1 ) + else + tmp( 2 ) = tl( 2, 1 ) + tmp( 3 ) = tl( 1, 2 ) + end if + btmp( 1 ) = b( 1, 1 ) + btmp( 2 ) = b( 2, 1 ) + 40 continue + ! solve 2 by 2 system using complete pivoting. + ! set pivots less than smin to smin. + ipiv = stdlib_isamax( 4, tmp, 1 ) + u11 = tmp( ipiv ) + if( abs( u11 )<=smin ) then + info = 1 + u11 = smin + end if + u12 = tmp( locu12( ipiv ) ) + l21 = tmp( locl21( ipiv ) ) / u11 + u22 = tmp( locu22( ipiv ) ) - u12*l21 + xswap = xswpiv( ipiv ) + bswap = bswpiv( ipiv ) + if( abs( u22 )<=smin ) then + info = 1 + u22 = smin + end if + if( bswap ) then + temp = btmp( 2 ) + btmp( 2 ) = btmp( 1 ) - l21*temp + btmp( 1 ) = temp + else + btmp( 2 ) = btmp( 2 ) - l21*btmp( 1 ) + end if + scale = one + if( ( two*smlnum )*abs( btmp( 2 ) )>abs( u22 ) .or.( two*smlnum )*abs( btmp( 1 ) )>abs(& + u11 ) ) then + scale = half / max( abs( btmp( 1 ) ), abs( btmp( 2 ) ) ) + btmp( 1 ) = btmp( 1 )*scale + btmp( 2 ) = btmp( 2 )*scale + end if + x2( 2 ) = btmp( 2 ) / u22 + x2( 1 ) = btmp( 1 ) / u11 - ( u12 / u11 )*x2( 2 ) + if( xswap ) then + temp = x2( 2 ) + x2( 2 ) = x2( 1 ) + x2( 1 ) = temp + end if + x( 1, 1 ) = x2( 1 ) + if( n1==1 ) then + x( 1, 2 ) = x2( 2 ) + xnorm = abs( x( 1, 1 ) ) + abs( x( 1, 2 ) ) + else + x( 2, 1 ) = x2( 2 ) + xnorm = max( abs( x( 1, 1 ) ), abs( x( 2, 1 ) ) ) + end if + return + ! 2 by 2: + ! op[tl11 tl12]*[x11 x12] +isgn* [x11 x12]*op[tr11 tr12] = [b11 b12] + ! [tl21 tl22] [x21 x22] [x21 x22] [tr21 tr22] [b21 b22] + ! solve equivalent 4 by 4 system using complete pivoting. + ! set pivots less than smin to smin. + 50 continue + smin = max( abs( tr( 1, 1 ) ), abs( tr( 1, 2 ) ),abs( tr( 2, 1 ) ), abs( tr( 2, 2 ) ) ) + + smin = max( smin, abs( tl( 1, 1 ) ), abs( tl( 1, 2 ) ),abs( tl( 2, 1 ) ), abs( tl( 2, & + 2 ) ) ) + smin = max( eps*smin, smlnum ) + btmp( 1 ) = zero + call stdlib_scopy( 16, btmp, 0, t16, 1 ) + t16( 1, 1 ) = tl( 1, 1 ) + sgn*tr( 1, 1 ) + t16( 2, 2 ) = tl( 2, 2 ) + sgn*tr( 1, 1 ) + t16( 3, 3 ) = tl( 1, 1 ) + sgn*tr( 2, 2 ) + t16( 4, 4 ) = tl( 2, 2 ) + sgn*tr( 2, 2 ) + if( ltranl ) then + t16( 1, 2 ) = tl( 2, 1 ) + t16( 2, 1 ) = tl( 1, 2 ) + t16( 3, 4 ) = tl( 2, 1 ) + t16( 4, 3 ) = tl( 1, 2 ) + else + t16( 1, 2 ) = tl( 1, 2 ) + t16( 2, 1 ) = tl( 2, 1 ) + t16( 3, 4 ) = tl( 1, 2 ) + t16( 4, 3 ) = tl( 2, 1 ) + end if + if( ltranr ) then + t16( 1, 3 ) = sgn*tr( 1, 2 ) + t16( 2, 4 ) = sgn*tr( 1, 2 ) + t16( 3, 1 ) = sgn*tr( 2, 1 ) + t16( 4, 2 ) = sgn*tr( 2, 1 ) + else + t16( 1, 3 ) = sgn*tr( 2, 1 ) + t16( 2, 4 ) = sgn*tr( 2, 1 ) + t16( 3, 1 ) = sgn*tr( 1, 2 ) + t16( 4, 2 ) = sgn*tr( 1, 2 ) + end if + btmp( 1 ) = b( 1, 1 ) + btmp( 2 ) = b( 2, 1 ) + btmp( 3 ) = b( 1, 2 ) + btmp( 4 ) = b( 2, 2 ) + ! perform elimination + loop_100: do i = 1, 3 + xmax = zero + do ip = i, 4 + do jp = i, 4 + if( abs( t16( ip, jp ) )>=xmax ) then + xmax = abs( t16( ip, jp ) ) + ipsv = ip + jpsv = jp + end if + end do + end do + if( ipsv/=i ) then + call stdlib_sswap( 4, t16( ipsv, 1 ), 4, t16( i, 1 ), 4 ) + temp = btmp( i ) + btmp( i ) = btmp( ipsv ) + btmp( ipsv ) = temp + end if + if( jpsv/=i )call stdlib_sswap( 4, t16( 1, jpsv ), 1, t16( 1, i ), 1 ) + jpiv( i ) = jpsv + if( abs( t16( i, i ) )abs( t16( 1, 1 ) ) .or.( eight*smlnum )*abs( & + btmp( 2 ) )>abs( t16( 2, 2 ) ) .or.( eight*smlnum )*abs( btmp( 3 ) )>abs( t16( 3, 3 ) )& + .or.( eight*smlnum )*abs( btmp( 4 ) )>abs( t16( 4, 4 ) ) ) then + scale = ( one / eight ) / max( abs( btmp( 1 ) ),abs( btmp( 2 ) ), abs( btmp( 3 ) ), & + abs( btmp( 4 ) ) ) + btmp( 1 ) = btmp( 1 )*scale + btmp( 2 ) = btmp( 2 )*scale + btmp( 3 ) = btmp( 3 )*scale + btmp( 4 ) = btmp( 4 )*scale + end if + do i = 1, 4 + k = 5 - i + temp = one / t16( k, k ) + tmp( k ) = btmp( k )*temp + do j = k + 1, 4 + tmp( k ) = tmp( k ) - ( temp*t16( k, j ) )*tmp( j ) + end do + end do + do i = 1, 3 + if( jpiv( 4-i )/=4-i ) then + temp = tmp( 4-i ) + tmp( 4-i ) = tmp( jpiv( 4-i ) ) + tmp( jpiv( 4-i ) ) = temp + end if + end do + x( 1, 1 ) = tmp( 1 ) + x( 2, 1 ) = tmp( 2 ) + x( 1, 2 ) = tmp( 3 ) + x( 2, 2 ) = tmp( 4 ) + xnorm = max( abs( tmp( 1 ) )+abs( tmp( 3 ) ),abs( tmp( 2 ) )+abs( tmp( 4 ) ) ) + return + end subroutine stdlib_slasy2 + + !> SLASYF: computes a partial factorization of a real symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The partial + !> factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> SLASYF is an auxiliary routine called by SSYTRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_slasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(sp) :: absakk, alpha, colmax, d11, d21, d22, r1, rowmax, t + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_isamax( k-1, w( 1, kw ), 1 ) + colmax = abs( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column kw-1 of w and update it + call stdlib_scopy( imax, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + call stdlib_scopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + if( k1 ) then + jmax = stdlib_isamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, abs( w( jmax, kw-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( w( imax, kw-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_scopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_scopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + if( kp>1 )call stdlib_scopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k2 ) then + ! compose the columns of the inverse of 2-by-2 pivot + ! block d in the following way to reduce the number + ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by + ! this inverse + ! d**(-1) = ( d11 d21 )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = + ! ( (-d21 ) ( d11 ) ) + ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * + ! * ( ( d22/d21 ) ( -1 ) ) = + ! ( ( -1 ) ( d11/d21 ) ) + ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) ( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = 1/d21 * t * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + ! = d21 * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / d21 + d22 = w( k-1, kw-1 ) / d21 + t = one / ( d11*d22-one ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = d21*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_sgemv( 'NO TRANSPOSE', jj-j+1, n-k, -one,a( j, k+1 ), lda, w( jj, & + kw+1 ), ldw, one,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k, -one,a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw, one,a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_sswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + ! copy column k of a to column k of w and update it + call stdlib_scopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + call stdlib_sgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ), lda,w( k, 1 ), ldw, & + one, w( k, k ), 1 ) + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column k+1 of w and update it + call stdlib_scopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_scopy( n-imax+1, a( imax, imax ), 1, w( imax, k+1 ),1 ) + call stdlib_sgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ),lda, w( imax, & + 1 ), ldw, one, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_isamax( imax-k, w( k, k+1 ), 1 ) + rowmax = abs( w( jmax, k+1 ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( w( imax, k+1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_scopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_scopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + if( kp1 )call stdlib_sswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_sswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + call stdlib_scopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_sswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_slasyf + + !> SLASYF_RK: computes a partial factorization of a real symmetric + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> SLASYF_RK is an auxiliary routine called by SSYTRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_slasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*), w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii + real(sp) :: absakk, alpha, colmax, d11, d12, d21, d22, stemp, r1, rowmax, t, & + sfmin + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_slamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = zero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_isamax( k-1, w( 1, kw ), 1 ) + colmax = abs( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_scopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = zero + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_isamax( imax-1, w( 1, kw-1 ), 1 ) + stemp = abs( w( itemp, kw-1 ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(abs( w( imax, kw-1 ) )1 ) then + if( abs( a( k, k ) )>=sfmin ) then + r1 = one / a( k, k ) + call stdlib_sscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the superdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = one / ( d11*d22-one ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = zero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = zero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_sgemv( 'NO TRANSPOSE', jj-j+1, n-k, -one,a( j, k+1 ), lda, w( jj, & + kw+1 ), ldw, one,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -one, a( & + 1, k+1 ), lda, w( j, kw+1 ),ldw, one, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = zero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_scopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_sgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ),lda, w( k, & + 1 ), ldw, one, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_sgemv( 'NO TRANSPOSE', n-k+1, k-1, -one,a( k, 1 ), & + lda, w( imax, 1 ), ldw,one, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_isamax( imax-k, w( k, k+1 ), 1 ) + rowmax = abs( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( w( imax, k+1 ) )=sfmin ) then + r1 = one / a( k, k ) + call stdlib_sscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the subdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k SLASYF_ROOK: computes a partial factorization of a real symmetric + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> SLASYF_ROOK is an auxiliary routine called by SSYTRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_slasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, & + ii + real(sp) :: absakk, alpha, colmax, d11, d12, d21, d22, stemp, r1, rowmax, t, & + sfmin + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_slamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_isamax( k-1, w( 1, kw ), 1 ) + colmax = abs( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_scopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_isamax( imax-1, w( 1, kw-1 ), 1 ) + stemp = abs( w( itemp, kw-1 ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(abs( w( imax, kw-1 ) )1 ) then + if( abs( a( k, k ) )>=sfmin ) then + r1 = one / a( k, k ) + call stdlib_sscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = one / ( d11*d22-one ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_sgemv( 'NO TRANSPOSE', jj-j+1, n-k, -one,a( j, k+1 ), lda, w( jj, & + kw+1 ), ldw, one,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -one, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,one, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n + j = k + 1 + 60 continue + kstep = 1 + jp1 = 1 + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_sswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = j - 1 + if( jp1/=jj .and. kstep==2 )call stdlib_sswap( n-j+1, a( jp1, j ), lda, a( jj, j & + ), lda ) + if( j<=n )go to 60 + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_scopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_sgemv( 'NO TRANSPOSE', n-k+1, k-1, -one, a( k, 1 ),lda, w( k, & + 1 ), ldw, one, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_sgemv( 'NO TRANSPOSE', n-k+1, k-1, -one,a( k, 1 ), & + lda, w( imax, 1 ), ldw,one, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_isamax( imax-k, w( k, k+1 ), 1 ) + rowmax = abs( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! abs( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( w( imax, k+1 ) )=sfmin ) then + r1 = one / a( k, k ) + call stdlib_sscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=zero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k=1 )call stdlib_sswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = j + 1 + if( jp1/=jj .and. kstep==2 )call stdlib_sswap( j, a( jp1, 1 ), lda, a( jj, 1 ), & + lda ) + if( j>=1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_slasyf_rook + + !> SLATBS: solves one of the triangular systems + !> A *x = s*b or A**T*x = s*b + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular band matrix. Here A**T denotes the transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine STBSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_slatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: cnorm(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind + real(sp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, & + xmax + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( kd<0 ) then + info = -6 + else if( ldab0 ) then + cnorm( j ) = stdlib_sasum( jlen, ab( 2, j ), 1 ) + else + cnorm( j ) = zero + end if + end do + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum. + imax = stdlib_isamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum ) then + tscal = one + else + tscal = one / ( smlnum*tmax ) + call stdlib_sscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_stbsv can be used. + j = stdlib_isamax( n, x, 1 ) + xmax = abs( x( j ) ) + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + maind = kd + 1 + else + jfirst = 1 + jlast = n + jinc = 1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 50 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! m(j) = g(j-1) / abs(a(j,j)) + tjj = abs( ab( maind, j ) ) + xbnd = min( xbnd, min( one, tjj )*grow ) + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 50 continue + else + ! compute the growth in a**t * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + maind = kd + 1 + else + jfirst = n + jlast = 1 + jinc = -1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 80 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + tjj = abs( ab( maind, j ) ) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 80 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_stbsv( uplo, trans, diag, n, kd, ab, ldab, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = bignum / xmax + call stdlib_sscal( n, scale, x, 1 ) + xmax = bignum + end if + if( notran ) then + ! solve a * x = b + loop_100: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = abs( x( j ) ) + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 95 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 95 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_sscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - + ! x(j)* a(max(1,j-kd):j-1,j) + jlen = min( kd, j-1 ) + call stdlib_saxpy( jlen, -x( j )*tscal,ab( kd+1-jlen, j ), 1, x( j-jlen & + ), 1 ) + i = stdlib_isamax( j-1, x, 1 ) + xmax = abs( x( i ) ) + end if + else if( j0 )call stdlib_saxpy( jlen, -x( j )*tscal, ab( 2, j ), 1,x( j+1 ),& + 1 ) + i = j + stdlib_isamax( n-j, x( j+1 ), 1 ) + xmax = abs( x( i ) ) + end if + end do loop_100 + else + ! solve a**t * x = b + loop_140: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = abs( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + end if + tjj = abs( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = uscal / tjjs + end if + if( rec0 )sumj = stdlib_sdot( jlen, ab( 2, j ), 1, x( j+1 ), 1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + sumj = sumj + ( ab( kd+i-jlen, j )*uscal )*x( j-jlen-1+i ) + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + sumj = sumj + ( ab( i+1, j )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==tscal ) then + ! compute x(j) := ( x(j) - sumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - sumj + xj = abs( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 135 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a**t*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 135 continue + else + ! compute x(j) := x(j) / a(j,j) - sumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = x( j ) / tjjs - sumj + end if + xmax = max( xmax, abs( x( j ) ) ) + end do loop_140 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_sscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_slatbs + + !> SLATPS: solves one of the triangular systems + !> A *x = s*b or A**T*x = s*b + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular matrix stored in packed form. Here A**T denotes the + !> transpose of A, x and b are n-element vectors, and s is a scaling + !> factor, usually less than or equal to 1, chosen so that the + !> components of x will be less than the overflow threshold. If the + !> unscaled problem will not cause overflow, the Level 2 BLAS routine + !> STPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + !> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_slatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: cnorm(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen + real(sp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, & + xmax + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLATPS', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine machine dependent parameters to control overflow. + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) / stdlib_slamch( 'PRECISION' ) + bignum = one / smlnum + scale = one + if( stdlib_lsame( normin, 'N' ) ) then + ! compute the 1-norm of each column, not including the diagonal. + if( upper ) then + ! a is upper triangular. + ip = 1 + do j = 1, n + cnorm( j ) = stdlib_sasum( j-1, ap( ip ), 1 ) + ip = ip + j + end do + else + ! a is lower triangular. + ip = 1 + do j = 1, n - 1 + cnorm( j ) = stdlib_sasum( n-j, ap( ip+1 ), 1 ) + ip = ip + n - j + 1 + end do + cnorm( n ) = zero + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum. + imax = stdlib_isamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum ) then + tscal = one + else + tscal = one / ( smlnum*tmax ) + call stdlib_sscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_stpsv can be used. + j = stdlib_isamax( n, x, 1 ) + xmax = abs( x( j ) ) + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + else + jfirst = 1 + jlast = n + jinc = 1 + end if + if( tscal/=one ) then + grow = zero + go to 50 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = n + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! m(j) = g(j-1) / abs(a(j,j)) + tjj = abs( ap( ip ) ) + xbnd = min( xbnd, min( one, tjj )*grow ) + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + ip = ip + jinc*jlen + jlen = jlen - 1 + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 50 continue + else + ! compute the growth in a**t * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 80 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + tjj = abs( ap( ip ) ) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 80 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_stpsv( uplo, trans, diag, n, ap, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = bignum / xmax + call stdlib_sscal( n, scale, x, 1 ) + xmax = bignum + end if + if( notran ) then + ! solve a * x = b + ip = jfirst*( jfirst+1 ) / 2 + loop_100: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = abs( x( j ) ) + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + if( tscal==one )go to 95 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 95 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_sscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_saxpy( j-1, -x( j )*tscal, ap( ip-j+1 ), 1, x,1 ) + i = stdlib_isamax( j-1, x, 1 ) + xmax = abs( x( i ) ) + end if + ip = ip - j + else + if( jj + xj = abs( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + end if + tjj = abs( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = uscal / tjjs + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a**t*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 135 continue + else + ! compute x(j) := x(j) / a(j,j) - sumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = x( j ) / tjjs - sumj + end if + xmax = max( xmax, abs( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_140 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_sscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_slatps + + !> SLATRS: solves one of the triangular systems + !> A *x = s*b or A**T*x = s*b + !> with scaling to prevent overflow. Here A is an upper or lower + !> triangular matrix, A**T denotes the transpose of A, x and b are + !> n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine STRSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_slatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: cnorm(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast + real(sp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, & + xmax + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 50 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 50 continue + else + ! compute the growth in a**t * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 80 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = one / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + tjj = abs( a( j, j ) ) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, one / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 80 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 80 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_strsv( uplo, trans, diag, n, a, lda, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = bignum / xmax + call stdlib_sscal( n, scale, x, 1 ) + xmax = bignum + end if + if( notran ) then + ! solve a * x = b + loop_100: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = abs( x( j ) ) + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 95 + end if + tjj = abs( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + xj = abs( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 95 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_sscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_saxpy( j-1, -x( j )*tscal, a( 1, j ), 1, x,1 ) + i = stdlib_isamax( j-1, x, 1 ) + xmax = abs( x( i ) ) + end if + else + if( jj + xj = abs( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + end if + tjj = abs( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = uscal / tjjs + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = x( j ) / tjjs + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = x( j ) / tjjs + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a**t*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 135 continue + else + ! compute x(j) := x(j) / a(j,j) - sumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = x( j ) / tjjs - sumj + end if + xmax = max( xmax, abs( x( j ) ) ) + end do loop_140 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_sscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_slatrs + + !> SLAUU2: computes the product U * U**T or L**T * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the unblocked form of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_slauu2( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SLAUUM: computes the product U * U**T or L**T * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the blocked form of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_slauum( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ib, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_slauu2( uplo, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute the product u * u**t. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_strmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',i-1, ib, one, a( & + i, i ), lda, a( 1, i ),lda ) + call stdlib_slauu2( 'UPPER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', i-1, ib,n-i-ib+1, one, a( & + 1, i+ib ), lda,a( i, i+ib ), lda, one, a( 1, i ), lda ) + call stdlib_ssyrk( 'UPPER', 'NO TRANSPOSE', ib, n-i-ib+1,one, a( i, i+ib ),& + lda, one, a( i, i ),lda ) + end if + end do + else + ! compute the product l**t * l. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_strmm( 'LEFT', 'LOWER', 'TRANSPOSE', 'NON-UNIT', ib,i-1, one, a( & + i, i ), lda, a( i, 1 ), lda ) + call stdlib_slauu2( 'LOWER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', ib, i-1,n-i-ib+1, one, a( & + i+ib, i ), lda,a( i+ib, 1 ), lda, one, a( i, 1 ), lda ) + call stdlib_ssyrk( 'LOWER', 'TRANSPOSE', ib, n-i-ib+1, one,a( i+ib, i ), & + lda, one, a( i, i ), lda ) + end if + end do + end if + end if + return + end subroutine stdlib_slauum + + !> SORBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + + pure subroutine stdlib_sorbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: alphasq = 0.01_sp + real(sp), parameter :: realone = 1.0_sp + real(sp), parameter :: realzero = 0.0_sp + + + ! Local Scalars + integer(ilp) :: i + real(sp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SORBDB6', -info ) + return + end if + ! first, project x onto the orthogonal complement of q's column + ! space + scl1 = realzero + ssq1 = realone + call stdlib_slassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_slassq( m2, x2, incx2, scl2, ssq2 ) + normsq1 = scl1**2*ssq1 + scl2**2*ssq2 + if( m1 == 0 ) then + do i = 1, n + work(i) = zero + end do + else + call stdlib_sgemv( 'C', m1, n, one, q1, ldq1, x1, incx1, zero, work,1 ) + end if + call stdlib_sgemv( 'C', m2, n, one, q2, ldq2, x2, incx2, one, work, 1 ) + call stdlib_sgemv( 'N', m1, n, negone, q1, ldq1, work, 1, one, x1,incx1 ) + call stdlib_sgemv( 'N', m2, n, negone, q2, ldq2, work, 1, one, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_slassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_slassq( m2, x2, incx2, scl2, ssq2 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if projection is sufficiently large in norm, then stop. + ! if projection is zero, then stop. + ! otherwise, project again. + if( normsq2 >= alphasq*normsq1 ) then + return + end if + if( normsq2 == zero ) then + return + end if + normsq1 = normsq2 + do i = 1, n + work(i) = zero + end do + if( m1 == 0 ) then + do i = 1, n + work(i) = zero + end do + else + call stdlib_sgemv( 'C', m1, n, one, q1, ldq1, x1, incx1, zero, work,1 ) + end if + call stdlib_sgemv( 'C', m2, n, one, q2, ldq2, x2, incx2, one, work, 1 ) + call stdlib_sgemv( 'N', m1, n, negone, q1, ldq1, work, 1, one, x1,incx1 ) + call stdlib_sgemv( 'N', m2, n, negone, q2, ldq2, work, 1, one, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_slassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_slassq( m1, x1, incx1, scl1, ssq1 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if second projection is sufficiently large in norm, then do + ! nothing more. alternatively, if it shrunk significantly, then + ! truncate it to zero. + if( normsq2 < alphasq*normsq1 ) then + do i = 1, m1 + x1(i) = zero + end do + do i = 1, m2 + x2(i) = zero + end do + end if + return + end subroutine stdlib_sorbdb6 + + !> SORG2L: generates an m by n real matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGEQLF. + + pure subroutine stdlib_sorg2l( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda SORG2R: generates an m by n real matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGEQRF. + + pure subroutine stdlib_sorg2r( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda SORGL2: generates an m by n real matrix Q with orthonormal rows, + !> which is defined as the first m rows of a product of k elementary + !> reflectors of order n + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGELQF. + + pure subroutine stdlib_sorgl2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldak .and. j<=m )a( j, j ) = one + end do + end if + do i = k, 1, -1 + ! apply h(i) to a(i:m,i:n) from the right + if( i SORGLQ: generates an M-by-N real matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGELQF. + + pure subroutine stdlib_sorglq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'SORGLQ', ' ', m, n, k, -1 ) + lwkopt = max( 1, m )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=m ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_slarft( 'FORWARD', 'ROWWISE', n-i+1, ib, a( i, i ),lda, tau( i ), & + work, ldwork ) + ! apply h**t to a(i+ib:m,i:n) from the right + call stdlib_slarfb( 'RIGHT', 'TRANSPOSE', 'FORWARD', 'ROWWISE',m-i-ib+1, n-i+& + 1, ib, a( i, i ), lda, work,ldwork, a( i+ib, i ), lda, work( ib+1 ),ldwork ) + + end if + ! apply h**t to columns i:n of current block + call stdlib_sorgl2( ib, n-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set columns 1:i-1 of current block to zero + do j = 1, i - 1 + do l = i, i + ib - 1 + a( l, j ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_sorglq + + !> SORGQL: generates an M-by-N real matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGEQLF. + + pure subroutine stdlib_sorgql( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + if( n-k+i>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_slarfb( 'LEFT', 'NO TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-& + 1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + ! apply h to rows 1:m-k+i+ib-1 of current block + call stdlib_sorg2l( m-k+i+ib-1, ib, ib, a( 1, n-k+i ), lda,tau( i ), work, iinfo & + ) + ! set rows m-k+i+ib:m of current block to zero + do j = n - k + i, n - k + i + ib - 1 + do l = m - k + i + ib, m + a( l, j ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_sorgql + + !> SORGQR: generates an M-by-N real matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGEQRF. + + pure subroutine stdlib_sorgqr( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'SORGQR', ' ', m, n, k, -1 ) + lwkopt = max( 1, n )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=n ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i & + ), work, ldwork ) + ! apply h to a(i:m,i+ib:n) from the left + call stdlib_slarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-& + i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), & + ldwork ) + end if + ! apply h to rows i:m of current block + call stdlib_sorg2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set rows 1:i-1 of current block to zero + do j = i, i + ib - 1 + do l = 1, i - 1 + a( l, j ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_sorgqr + + !> SORGR2: generates an m by n real matrix Q with orthonormal rows, + !> which is defined as the last m rows of a product of k elementary + !> reflectors of order n + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGERQF. + + pure subroutine stdlib_sorgr2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldan-m .and. j<=n-k )a( m-n+j, j ) = one + end do + end if + do i = 1, k + ii = m - k + i + ! apply h(i) to a(1:m-k+i,1:n-k+i) from the right + a( ii, n-m+ii ) = one + call stdlib_slarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1 ), lda, tau( i ),a, lda, work ) + + call stdlib_sscal( n-m+ii-1, -tau( i ), a( ii, 1 ), lda ) + a( ii, n-m+ii ) = one - tau( i ) + ! set a(m-k+i,n-k+i+1:n) to zero + do l = n - m + ii + 1, n + a( ii, l ) = zero + end do + end do + return + end subroutine stdlib_sorgr2 + + !> SORGRQ: generates an M-by-N real matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGERQF. + + pure subroutine stdlib_sorgrq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + ii = m - k + i + if( ii>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h**t to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_slarfb( 'RIGHT', 'TRANSPOSE', 'BACKWARD', 'ROWWISE',ii-1, n-k+i+& + ib-1, ib, a( ii, 1 ), lda, work,ldwork, a, lda, work( ib+1 ), ldwork ) + end if + ! apply h**t to columns 1:n-k+i+ib-1 of current block + call stdlib_sorgr2( ib, n-k+i+ib-1, ib, a( ii, 1 ), lda, tau( i ),work, iinfo ) + + ! set columns n-k+i+ib:n of current block to zero + do l = n - k + i + ib, n + do j = ii, ii + ib - 1 + a( j, l ) = zero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_sorgrq + + !> SORGTSQR_ROW: generates an M-by-N real matrix Q_out with + !> orthonormal columns from the output of SLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by SLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of SLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine SLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which SLATSQR generates the output blocks. + + pure subroutine stdlib_sorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, & + num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1 + ! Local Arrays + real(sp) :: dummy(1,1) + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m=m, then the loop is never executed. + if ( mb=m, then we have only one row block of a of size m + ! and we work on the entire matrix a. + mb1 = min( mb, m ) + ! apply column blocks of h in the top row block from right to left. + ! kb is the column index of the current block reflector in + ! the matrices t and v. + do kb = kb_last, 1, -nblocal + ! determine the size of the current column block knb in + ! the matrices t and v. + knb = min( nblocal, n - kb + 1 ) + if( mb1-kb-knb+1==0 ) then + ! in stdlib_slarfb_gett parameters, when m=0, then the matrix b + ! does not exist, hence we need to pass a dummy array + ! reference dummy(1,1) to b with lddummy=1. + call stdlib_slarfb_gett( 'N', 0, n-kb+1, knb,t( 1, kb ), ldt, a( kb, kb ), lda,& + dummy( 1, 1 ), 1, work, knb ) + else + call stdlib_slarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1, kb ), ldt, a( kb, & + kb ), lda,a( kb+knb, kb), lda, work, knb ) + end if + end do + work( 1 ) = real( lworkopt,KIND=sp) + return + end subroutine stdlib_sorgtsqr_row + + + pure subroutine stdlib_sorm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: m, n, n1, n2, ldq, ldc, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(in) :: q(ldq,*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, ldwork, len, lwkopt, nb, nq, nw + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q; + ! nw is the minimum dimension of work. + if( left ) then + nq = m + else + nq = n + end if + nw = nq + if( n1==0 .or. n2==0 ) nw = 1 + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( n1<0 .or. n1+n2/=nq ) then + info = -5 + else if( n2<0 ) then + info = -6 + else if( ldq SORM2L: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T * C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_sorm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda SORM2R: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda SORML2: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_sorml2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda SORMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_sormlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_sorml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_slarft( 'FORWARD', 'ROWWISE', nq-i+1, ib, a( i, i ),lda, tau( i ), & + work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**t is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**t + call stdlib_slarfb( side, transt, 'FORWARD', 'ROWWISE', mi, ni, ib,a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sormlq + + !> SORMQL: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_sormql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_sorm2l( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarft( 'BACKWARD', 'COLUMNWISE', nq-k+i+ib-1, ib,a( 1, i ), lda, & + tau( i ), work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**t is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**t + call stdlib_slarfb( side, trans, 'BACKWARD', 'COLUMNWISE', mi, ni,ib, a( 1, i ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sormql + + !> SORMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_slarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),& + work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**t is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**t + call stdlib_slarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sormqr + + !> SORMR2: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'T', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'T', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGERQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_sormr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda SORMR3: overwrites the general real m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**T* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**T if SIDE = 'R' and TRANS = 'C', + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by STZRZF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_sormr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, m, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*), tau(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, ja, jc, mi, ni, nq + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda SORMRQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by SGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_sormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_sormr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1 ), lda, tau( & + i ), work( iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**t is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**t + call stdlib_slarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1 ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sormrq + + !> SORMRZ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by STZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_sormrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, ja, jc, ldwork, lwkopt, mi, nb, & + nbmin, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_sormr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, iinfo ) + + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + ja = m - l + 1 + else + mi = m + ic = 1 + ja = n - l + 1 + end if + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarzt( 'BACKWARD', 'ROWWISE', l, ib, a( i, ja ), lda,tau( i ), work(& + iwt ), ldt ) + if( left ) then + ! h or h**t is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**t is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**t + call stdlib_slarzb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, l, a( i, ja )& + , lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sormrz + + !> SPBEQU: computes row and column scalings intended to equilibrate a + !> symmetric positive definite band matrix A and reduce its condition + !> number (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_spbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j + real(sp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab SPBSTF: computes a split Cholesky factorization of a real + !> symmetric positive definite band matrix A. + !> This routine is designed to be used in conjunction with SSBGST. + !> The factorization has the form A = S**T*S where S is a band matrix + !> of the same bandwidth as A and the following structure: + !> S = ( U ) + !> ( M L ) + !> where U is upper triangular of order m = (n+kd)/2, and L is lower + !> triangular of order n-m. + + pure subroutine stdlib_spbstf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, km, m + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_sscal( km, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_ssyr( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + end if + end do + else + ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m). + do j = n, m + 1, -1 + ! compute s(j,j) and test for non-positive-definiteness. + ajj = ab( 1, j ) + if( ajj<=zero )go to 50 + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( j-1, kd ) + ! compute elements j-km:j-1 of the j-th row and update the + ! trailing submatrix within the band. + call stdlib_sscal( km, one / ajj, ab( km+1, j-km ), kld ) + call stdlib_ssyr( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1, j-km ), kld ) + + end do + ! factorize the updated submatrix a(1:m,1:m) as u**t*u. + do j = 1, m + ! compute s(j,j) and test for non-positive-definiteness. + ajj = ab( 1, j ) + if( ajj<=zero )go to 50 + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( kd, m-j ) + ! compute elements j+1:j+km of the j-th column and update the + ! trailing submatrix within the band. + if( km>0 ) then + call stdlib_sscal( km, one / ajj, ab( 2, j ), 1 ) + call stdlib_ssyr( 'LOWER', km, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 50 continue + info = j + return + end subroutine stdlib_spbstf + + !> SPBTF2: computes the Cholesky factorization of a real symmetric + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**T * U , if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix, U**T is the transpose of U, and + !> L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_spbtf2( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, kn + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_sscal( kn, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_ssyr( 'UPPER', kn, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + end if + end do + else + ! compute the cholesky factorization a = l*l**t. + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = ab( 1, j ) + if( ajj<=zero )go to 30 + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + ! compute elements j+1:j+kn of column j and update the + ! trailing submatrix within the band. + kn = min( kd, n-j ) + if( kn>0 ) then + call stdlib_sscal( kn, one / ajj, ab( 2, j ), 1 ) + call stdlib_ssyr( 'LOWER', kn, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 30 continue + info = j + return + end subroutine stdlib_spbtf2 + + !> SPBTRS: solves a system of linear equations A*X = B with a symmetric + !> positive definite band matrix A using the Cholesky factorization + !> A = U**T*U or A = L*L**T computed by SPBTRF. + + pure subroutine stdlib_spbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab SPOEQU: computes row and column scalings intended to equilibrate a + !> symmetric positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_spoequ( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( lda SPOEQUB: computes row and column scalings intended to equilibrate a + !> symmetric positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + !> This routine differs from SPOEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled diagonal entries are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_spoequb( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: smin, base, tmp + ! Intrinsic Functions + intrinsic :: max,min,sqrt,log,int + ! Executable Statements + ! test the input parameters. + ! positive definite only performs 1 pass of equilibration. + info = 0 + if( n<0 ) then + info = -1 + else if( lda SPOTRS: solves a system of linear equations A*X = B with a symmetric + !> positive definite matrix A using the Cholesky factorization + !> A = U**T*U or A = L*L**T computed by SPOTRF. + + pure subroutine stdlib_spotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda SPPEQU: computes row and column scalings intended to equilibrate a + !> symmetric positive definite matrix A in packed storage and reduce + !> its condition number (with respect to the two-norm). S contains the + !> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix + !> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. + !> This choice of S puts the condition number of B within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_sppequ( uplo, n, ap, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: amax, scond + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, jj + real(sp) :: smin + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SPPEQU', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + scond = one + amax = zero + return + end if + ! initialize smin and amax. + s( 1 ) = ap( 1 ) + smin = s( 1 ) + amax = s( 1 ) + if( upper ) then + ! uplo = 'u': upper triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + i + s( i ) = ap( jj ) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + else + ! uplo = 'l': lower triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + n - i + 2 + s( i ) = ap( jj ) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + end if + if( smin<=zero ) then + ! find the first non-positive diagonal element and return. + do i = 1, n + if( s( i )<=zero ) then + info = i + return + end if + end do + else + ! set the scale factors to the reciprocals + ! of the diagonal elements. + do i = 1, n + s( i ) = one / sqrt( s( i ) ) + end do + ! compute scond = min(s(i)) / max(s(i)) + scond = sqrt( smin ) / sqrt( amax ) + end if + return + end subroutine stdlib_sppequ + + !> SPPTRF: computes the Cholesky factorization of a real symmetric + !> positive definite matrix A stored in packed format. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_spptrf( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SPPTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( upper ) then + ! compute the cholesky factorization a = u**t*u. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + ! compute elements 1:j-1 of column j. + if( j>1 )call stdlib_stpsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', j-1, ap,ap( jc ), & + 1 ) + ! compute u(j,j) and test for non-positive-definiteness. + ajj = ap( jj ) - stdlib_sdot( j-1, ap( jc ), 1, ap( jc ), 1 ) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ap( jj ) = sqrt( ajj ) + end do + else + ! compute the cholesky factorization a = l*l**t. + jj = 1 + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = ap( jj ) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ap( jj ) = ajj + ! compute elements j+1:n of column j and update the trailing + ! submatrix. + if( j SPPTRS: solves a system of linear equations A*X = B with a symmetric + !> positive definite matrix A in packed storage using the Cholesky + !> factorization A = U**T*U or A = L*L**T computed by SPPTRF. + + pure subroutine stdlib_spptrs( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb SPTCON: computes the reciprocal of the condition number (in the + !> 1-norm) of a real symmetric positive definite tridiagonal matrix + !> using the factorization A = L*D*L**T or A = U**T*D*U computed by + !> SPTTRF. + !> Norm(inv(A)) is computed by a direct method, and the reciprocal of + !> the condition number is computed as + !> RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_sptcon( n, d, e, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(in) :: d(*), e(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ix + real(sp) :: ainvnm + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input arguments. + info = 0 + if( n<0 ) then + info = -1 + else if( anorm SPTTRF: computes the L*D*L**T factorization of a real symmetric + !> positive definite tridiagonal matrix A. The factorization may also + !> be regarded as having the form A = U**T*D*U. + + pure subroutine stdlib_spttrf( n, d, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i4 + real(sp) :: ei + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'SPTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! compute the l*d*l**t (or u**t*d*u) factorization of a. + i4 = mod( n-1, 4 ) + do i = 1, i4 + if( d( i )<=zero ) then + info = i + go to 30 + end if + ei = e( i ) + e( i ) = ei / d( i ) + d( i+1 ) = d( i+1 ) - e( i )*ei + end do + loop_20: do i = i4 + 1, n - 4, 4 + ! drop out of the loop if d(i) <= 0: the matrix is not positive + ! definite. + if( d( i )<=zero ) then + info = i + go to 30 + end if + ! solve for e(i) and d(i+1). + ei = e( i ) + e( i ) = ei / d( i ) + d( i+1 ) = d( i+1 ) - e( i )*ei + if( d( i+1 )<=zero ) then + info = i + 1 + go to 30 + end if + ! solve for e(i+1) and d(i+2). + ei = e( i+1 ) + e( i+1 ) = ei / d( i+1 ) + d( i+2 ) = d( i+2 ) - e( i+1 )*ei + if( d( i+2 )<=zero ) then + info = i + 2 + go to 30 + end if + ! solve for e(i+2) and d(i+3). + ei = e( i+2 ) + e( i+2 ) = ei / d( i+2 ) + d( i+3 ) = d( i+3 ) - e( i+2 )*ei + if( d( i+3 )<=zero ) then + info = i + 3 + go to 30 + end if + ! solve for e(i+3) and d(i+4). + ei = e( i+3 ) + e( i+3 ) = ei / d( i+3 ) + d( i+4 ) = d( i+4 ) - e( i+3 )*ei + end do loop_20 + ! check d(n) for positive definiteness. + if( d( n )<=zero )info = n + 30 continue + return + end subroutine stdlib_spttrf + + !> SPTTS2: solves a tridiagonal system of the form + !> A * X = B + !> using the L*D*L**T factorization of A computed by SPTTRF. D is a + !> diagonal matrix specified in the vector D, L is a unit bidiagonal + !> matrix whose subdiagonal is specified in the vector E, and X and B + !> are N by NRHS matrices. + + pure subroutine stdlib_sptts2( n, nrhs, d, e, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(in) :: d(*), e(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + if( n==1 )call stdlib_sscal( nrhs, 1. / d( 1 ), b, ldb ) + return + end if + ! solve a * x = b using the factorization a = l*d*l**t, + ! overwriting each right hand side vector with its solution. + do j = 1, nrhs + ! solve l * x = b. + do i = 2, n + b( i, j ) = b( i, j ) - b( i-1, j )*e( i-1 ) + end do + ! solve d * l**t * x = b. + b( n, j ) = b( n, j ) / d( n ) + do i = n - 1, 1, -1 + b( i, j ) = b( i, j ) / d( i ) - b( i+1, j )*e( i ) + end do + end do + return + end subroutine stdlib_sptts2 + + !> SRSCL: multiplies an n-element real vector x by the real scalar 1/a. + !> This is done without overflow or underflow as long as + !> the final result x/a does not overflow or underflow. + + pure subroutine stdlib_srscl( n, sa, sx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(sp), intent(in) :: sa + ! Array Arguments + real(sp), intent(inout) :: sx(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + real(sp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 )return + ! get machine parameters + smlnum = stdlib_slamch( 'S' ) + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! initialize the denominator to sa and the numerator to 1. + cden = sa + cnum = one + 10 continue + cden1 = cden*smlnum + cnum1 = cnum / bignum + if( abs( cden1 )>abs( cnum ) .and. cnum/=zero ) then + ! pre-multiply x by smlnum if cden is large compared to cnum. + mul = smlnum + done = .false. + cden = cden1 + else if( abs( cnum1 )>abs( cden ) ) then + ! pre-multiply x by bignum if cden is small compared to cnum. + mul = bignum + done = .false. + cnum = cnum1 + else + ! multiply x by cnum / cden and return. + mul = cnum / cden + done = .true. + end if + ! scale the vector x by mul + call stdlib_sscal( n, mul, sx, incx ) + if( .not.done )go to 10 + return + end subroutine stdlib_srscl + + !> SSBGST: reduces a real symmetric-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**T*S by SPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**T*A*X, where + !> X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the + !> bandwidth of A. + + pure subroutine stdlib_ssbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldx, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(in) :: bb(ldbb,*) + real(sp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: update, upper, wantx + integer(ilp) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, & + nrt, nx + real(sp) :: bii, ra, ra1, t + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + wantx = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + ka1 = ka + 1 + kb1 = kb + 1 + info = 0 + if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldabn-1 )go to 480 + end if + if( upper ) then + ! transform a, working with the upper triangle + if( update ) then + ! form inv(s(i))**t * a * inv(s(i)) + bii = bb( kb1, i ) + do j = i, i1 + ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii + end do + do j = max( 1, i-ka ), i + ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*ab( k-i+ka1, i ) -bb(& + k-i+kb1, i )*ab( j-i+ka1, i ) +ab( ka1, i )*bb( j-i+kb1, i )*bb( k-i+kb1, & + i ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( k-i+kb1, i )*ab( j-i+ka1, i ) + + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j ) + + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_sscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_sger( n-m, kbt, -one, x( m+1, i ), 1,bb( kb1-kbt, i ), & + 1, x( m+1, i-kbt ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+ka1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_130: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i,i-k+ka+1) + call stdlib_slartg( ab( k+1, i-k+ka ), ra1,work( n+i-k+ka-m ), work( i-k+& + ka-m ),ra ) + ! create nonzero element a(i-k,i-k+ka+1) outside the + ! band and store it in work(i-k) + t = -bb( kb1-k, i )*ra1 + work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( 1, i-k+ka ) + + ab( 1, i-k+ka ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( 1, i-k+ka ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( 1, j+1 ) + ab( 1, j+1 ) = work( n+j-m )*ab( 1, j+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_slargv( nrt, ab( 1, j2t ), inca, work( j2t-m ), ka1,work( & + n+j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(& + n+j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + work( n+j2-m ),work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca,work( n+j2-m ), work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_srot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j-m ), & + work( j-m ) ) + end do + end if + end do loop_130 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kb1-kbt, i )*ra1 + end if + end if + loop_170: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), & + inca, work( n+j2-ka ),work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + work( n+j ) = work( n+j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( 1, j+1 ) + ab( 1, j+1 ) = work( n+j )*ab( 1, j+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_slargv( nr, ab( 1, j2 ), inca, work( j2 ), ka1,work( n+j2 ), ka1 ) + + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(& + n+j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + work( n+j2 ),work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, work( n+j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_srot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j ), work( & + j ) ) + end do + end if + end do loop_210 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca,work( n+j2-m ), work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, i - kb + 2*ka + 1, -1 + work( n+j-m ) = work( n+j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**t * a * inv(s(i)) + bii = bb( 1, i ) + do j = i, i1 + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do j = max( 1, i-ka ), i + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*ab( i-k+1, k ) -bb( i-k+1, & + k )*ab( i-j+1, j ) +ab( 1, i )*bb( i-j+1, j )*bb( i-k+1, k ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-k+1, k )*ab( i-j+1, j ) + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_sscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_sger( n-m, kbt, -one, x( m+1, i ), 1,bb( kbt+1, i-kbt )& + , ldbb-1,x( m+1, i-kbt ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_360: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i-k+ka+1,i) + call stdlib_slartg( ab( ka1-k, i ), ra1, work( n+i-k+ka-m ),work( i-k+ka-m & + ), ra ) + ! create nonzero element a(i-k+ka+1,i-k) outside the + ! band and store it in work(i-k) + t = -bb( k+1, i-k )*ra1 + work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( ka1, i-k ) + ab( ka1, i-k ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( ka1, i-k ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = work( n+j-m )*ab( ka1, j-ka+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_slargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, & + work( n+j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( & + n+j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, work( n+& + j2-m ), work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, work( n+j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_srot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j-m ), & + work( j-m ) ) + end do + end if + end do loop_360 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1 + end if + end if + loop_400: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-& + ka+1 ), inca,work( n+j2-ka ), work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + work( n+j ) = work( n+j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = work( n+j )*ab( ka1, j-ka+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_slargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,work( n+j2 ), & + ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( & + n+j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, work( n+& + j2 ), work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, work( n+j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_srot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,work( n+j ), work( & + j ) ) + end do + end if + end do loop_440 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, work( n+j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, i - kb + 2*ka + 1, -1 + work( n+j-m ) = work( n+j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + end if + go to 10 + 480 continue + ! **************************** phase 2 ***************************** + ! the logical structure of this phase is: + ! update = .true. + ! do i = 1, m + ! use s(i) to update a and create a new bulge + ! apply rotations to push all bulges ka positions upward + ! end do + ! update = .false. + ! do i = m - ka - 1, 2, -1 + ! apply rotations to push all bulges ka positions upward + ! end do + ! to avoid duplicating code, the two loops are merged. + update = .true. + i = 0 + 490 continue + if( update ) then + i = i + 1 + kbt = min( kb, m-i ) + i0 = i + 1 + i1 = max( 1, i-ka ) + i2 = i + kbt - ka1 + if( i>m ) then + update = .false. + i = i - 1 + i0 = m + 1 + if( ka==0 )return + go to 490 + end if + else + i = i - ka + if( i<2 )return + end if + if( i0 )call stdlib_sger( nx, kbt, -one, x( 1, i ), 1, bb( kb, i+1 ),ldbb-& + 1, x( 1, i+1 ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+ka1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_610: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_slargv( nrt, ab( 1, j1+ka ), inca, work( j1 ), ka1,work( & + n+j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + work( n+j1 ),work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + work( n+j1 ),work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + work( n+j1t ),work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_srot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+j ), work( j ) ) + + end do + end if + end do loop_610 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1 + end if + end if + loop_650: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),& + inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + work( n+m-kb+j ) = work( n+m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j-1,j+ka) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( 1, j+ka-1 ) + ab( 1, j+ka-1 ) = work( n+m-kb+j )*ab( 1, j+ka-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_650 + loop_690: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_slargv( nr, ab( 1, j1+ka ), inca, work( m-kb+j1 ),ka1, work( n+m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca,& + work( n+m-kb+j1 ), work( m-kb+j1 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + work( n+m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_srot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_690 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + work( n+j1t ),work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, min( i+kb, m ) - 2*ka - 1 + work( n+j ) = work( n+j+ka ) + work( j ) = work( j+ka ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**t * a * inv(s(i)) + bii = bb( 1, i ) + do j = i1, i + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do j = i, min( n, i+ka ) + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do k = i + 1, i + kbt + do j = k, i + kbt + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*ab( k-i+1, i ) -bb( k-i+1, & + i )*ab( j-i+1, i ) +ab( 1, i )*bb( j-i+1, i )*bb( k-i+1, i ) + end do + do j = i + kbt + 1, min( n, i+ka ) + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( k-i+1, i )*ab( j-i+1, i ) + end do + end do + do j = i1, i + do k = i + 1, min( j+ka, i+kbt ) + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_sscal( nx, one / bii, x( 1, i ), 1 ) + if( kbt>0 )call stdlib_sger( nx, kbt, -one, x( 1, i ), 1, bb( 2, i ), 1,x( 1, & + i+1 ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_840: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_slargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,work( n+& + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+& + j1 ), work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, work( & + n+j1 ),work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_srot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+j ), work( j ) ) + + end do + end if + end do loop_840 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1 + end if + end if + loop_880: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, & + j1t+l-1 ), inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + work( n+m-kb+j ) = work( n+m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j+ka,j-1) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 ) + ab( ka1, j-1 ) = work( n+m-kb+j )*ab( ka1, j-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_880 + loop_920: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_slargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, work( n+m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_slartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+& + m-kb+j1 ), work( m-kb+j1 ),ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_slar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, work( & + n+m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_srot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,work( n+m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_920 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_slartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, min( i+kb, m ) - 2*ka - 1 + work( n+j ) = work( n+j+ka ) + work( j ) = work( j+ka ) + end do + end if + end if + go to 490 + end subroutine stdlib_ssbgst + + !> SSBTRD: reduces a real symmetric band matrix A to symmetric + !> tridiagonal form T by an orthogonal similarity transformation: + !> Q**T * A * Q = T. + + pure subroutine stdlib_ssbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldq, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*), q(ldq,*) + real(sp), intent(out) :: d(*), e(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: initq, upper, wantq + integer(ilp) :: i, i2, ibl, inca, incx, iqaend, iqb, iqend, j, j1, j1end, j1inc, j2, & + jend, jin, jinc, k, kd1, kdm1, kdn, l, last, lend, nq, nr, nrt + real(sp) :: temp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + initq = stdlib_lsame( vect, 'V' ) + wantq = initq .or. stdlib_lsame( vect, 'U' ) + upper = stdlib_lsame( uplo, 'U' ) + kd1 = kd + 1 + kdm1 = kd - 1 + incx = ldab - 1 + iqend = 1 + info = 0 + if( .not.wantq .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldab1 ) then + ! reduce to tridiagonal form, working with upper triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + loop_90: do i = 1, n - 2 + ! reduce i-th row of matrix to tridiagonal form + loop_80: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_slargv( nr, ab( 1, j1-1 ), inca, work( j1 ),kd1, d( j1 ), & + kd1 ) + ! apply rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_slartv or stdlib_srot is used + if( nr>=2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_slartv( nr, ab( l+1, j1-1 ), inca,ab( l, j1 ), inca, & + d( j1 ),work( j1 ), kd1 ) + end do + else + jend = j1 + ( nr-1 )*kd1 + do jinc = j1, jend, kd1 + call stdlib_srot( kdm1, ab( 2, jinc-1 ), 1,ab( 1, jinc ), 1, d( & + jinc ),work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+k-1) + ! within the band + call stdlib_slartg( ab( kd-k+3, i+k-2 ),ab( kd-k+2, i+k-1 ), d( i+k-& + 1 ),work( i+k-1 ), temp ) + ab( kd-k+3, i+k-2 ) = temp + ! apply rotation from the right + call stdlib_srot( k-3, ab( kd-k+4, i+k-2 ), 1,ab( kd-k+3, i+k-1 ), 1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_slar2v( nr, ab( kd1, j1-1 ), ab( kd1, j1 ),ab( kd, & + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the left + if( nr>0 ) then + if( 2*kd-1n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_slartv( nrt, ab( kd-l, j1+l ), inca,ab( kd-& + l+1, j1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do jin = j1, j1end, kd1 + call stdlib_srot( kd-1, ab( kd-1, jin+1 ), incx,ab( kd, jin+1 )& + , incx,d( jin ), work( jin ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_srot( lend, ab( kd-1, last+1 ), incx,ab( kd, & + last+1 ), incx, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_srot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_srot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j-1,j+kd) outside the band + ! and store it in work + work( j+kd ) = work( j )*ab( 1, j+kd ) + ab( 1, j+kd ) = d( j )*ab( 1, j+kd ) + end do + end do loop_80 + end do loop_90 + end if + if( kd>0 ) then + ! copy off-diagonal elements to e + do i = 1, n - 1 + e( i ) = ab( kd, i+1 ) + end do + else + ! set e to zero if original matrix was diagonal + do i = 1, n - 1 + e( i ) = zero + end do + end if + ! copy diagonal elements to d + do i = 1, n + d( i ) = ab( kd1, i ) + end do + else + if( kd>1 ) then + ! reduce to tridiagonal form, working with lower triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + loop_210: do i = 1, n - 2 + ! reduce i-th column of matrix to tridiagonal form + loop_200: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_slargv( nr, ab( kd1, j1-kd1 ), inca,work( j1 ), kd1, d( j1 )& + , kd1 ) + ! apply plane rotations from one side + ! dependent on the the number of diagonals either + ! stdlib_slartv or stdlib_srot is used + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_slartv( nr, ab( kd1-l, j1-kd1+l ), inca,ab( kd1-l+1, & + j1-kd1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + jend = j1 + kd1*( nr-1 ) + do jinc = j1, jend, kd1 + call stdlib_srot( kdm1, ab( kd, jinc-kd ), incx,ab( kd1, jinc-kd )& + , incx,d( jinc ), work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i+k-1,i) + ! within the band + call stdlib_slartg( ab( k-1, i ), ab( k, i ),d( i+k-1 ), work( i+k-1 & + ), temp ) + ab( k-1, i ) = temp + ! apply rotation from the left + call stdlib_srot( k-3, ab( k-2, i+1 ), ldab-1,ab( k-1, i+1 ), ldab-1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_slar2v( nr, ab( 1, j1-1 ), ab( 1, j1 ),ab( 2, j1-1 ),& + inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_slartv or stdlib_srot is used + if( nr>0 ) then + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + if( j2+l>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_slartv( nrt, ab( l+2, j1-1 ), inca,ab( l+1,& + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do j1inc = j1, j1end, kd1 + call stdlib_srot( kdm1, ab( 3, j1inc-1 ), 1,ab( 2, j1inc ), 1, & + d( j1inc ),work( j1inc ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_srot( lend, ab( 3, last-1 ), 1,ab( 2, last ),& + 1, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_srot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_srot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j+kd,j-1) outside the + ! band and store it in work + work( j+kd ) = work( j )*ab( kd1, j ) + ab( kd1, j ) = d( j )*ab( kd1, j ) + end do + end do loop_200 + end do loop_210 + end if + if( kd>0 ) then + ! copy off-diagonal elements to e + do i = 1, n - 1 + e( i ) = ab( 2, i ) + end do + else + ! set e to zero if original matrix was diagonal + do i = 1, n - 1 + e( i ) = zero + end do + end if + ! copy diagonal elements to d + do i = 1, n + d( i ) = ab( 1, i ) + end do + end if + return + end subroutine stdlib_ssbtrd + + !> Level 3 BLAS like routine for C in RFP Format. + !> SSFRK: performs one of the symmetric rank--k operations + !> C := alpha*A*A**T + beta*C, + !> or + !> C := alpha*A**T*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n symmetric + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + + pure subroutine stdlib_ssfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, n + character, intent(in) :: trans, transr, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: c(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, normaltransr, nisodd, notrans + integer(ilp) :: info, nrowa, j, nk, n1, n2 + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( notrans ) then + nrowa = n + else + nrowa = k + end if + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( lda SSPGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by SPPTRF. + + pure subroutine stdlib_sspgst( itype, uplo, n, ap, bp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, n + ! Array Arguments + real(sp), intent(inout) :: ap(*) + real(sp), intent(in) :: bp(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk + real(sp) :: ajj, akk, bjj, bkk, ct + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SSPGST', -info ) + return + end if + if( itype==1 ) then + if( upper ) then + ! compute inv(u**t)*a*inv(u) + ! j1 and jj are the indices of a(1,j) and a(j,j) + jj = 0 + do j = 1, n + j1 = jj + 1 + jj = jj + j + ! compute the j-th column of the upper triangle of a + bjj = bp( jj ) + call stdlib_stpsv( uplo, 'TRANSPOSE', 'NONUNIT', j, bp,ap( j1 ), 1 ) + call stdlib_sspmv( uplo, j-1, -one, ap, bp( j1 ), 1, one,ap( j1 ), 1 ) + call stdlib_sscal( j-1, one / bjj, ap( j1 ), 1 ) + ap( jj ) = ( ap( jj )-stdlib_sdot( j-1, ap( j1 ), 1, bp( j1 ),1 ) ) / & + bjj + end do + else + ! compute inv(l)*a*inv(l**t) + ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1) + kk = 1 + do k = 1, n + k1k1 = kk + n - k + 1 + ! update the lower triangle of a(k:n,k:n) + akk = ap( kk ) + bkk = bp( kk ) + akk = akk / bkk**2 + ap( kk ) = akk + if( k SSPTRF: computes the factorization of a real symmetric matrix A stored + !> in packed format using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_ssptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(sp) :: absakk, alpha, colmax, d11, d12, d21, d22, r1, rowmax, t, wk, wkm1, & + wkp1 + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SSPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**t using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( ap( kc+k-1 ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_isamax( k-1, ap( kc ), 1 ) + colmax = abs( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( abs( ap( kx ) )>rowmax ) then + rowmax = abs( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_isamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, abs( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( ap( kpc+imax-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_sswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = ap( knc+j-1 ) + ap( knc+j-1 ) = ap( kx ) + ap( kx ) = t + end do + t = ap( knc+kk-1 ) + ap( knc+kk-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = t + if( kstep==2 ) then + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = one / ap( kc+k-1 ) + call stdlib_sspr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_sscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = ap( k-1+( k-1 )*k / 2 ) + d22 = ap( k-1+( k-2 )*( k-1 ) / 2 ) / d12 + d11 = ap( k+( k-1 )*k / 2 ) / d12 + t = one / ( d11*d22-one ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-ap( j+( k-1 )*k / 2 ) ) + + wk = d12*( d22*ap( j+( k-1 )*k / 2 )-ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *wk -ap( i+( k-2 )*( k-1 ) / 2 )*wkm1 + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( ap( kc ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( abs( ap( kx ) )>rowmax ) then + rowmax = abs( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( ap( kpc ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp SSPTRI: computes the inverse of a real symmetric indefinite matrix + !> A in packed storage using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by SSPTRF. + + pure subroutine stdlib_ssptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + real(sp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SSPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==zero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==zero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = one / ap( kc+k-1 ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_scopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_sspmv( uplo, k-1, -one, ap, work, 1, zero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_sdot( k-1, work, 1, ap( kc ), 1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( ap( kcnext+k-1 ) ) + ak = ap( kc+k-1 ) / t + akp1 = ap( kcnext+k ) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-one ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_scopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_sspmv( uplo, k-1, -one, ap, work, 1, zero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_sdot( k-1, work, 1, ap( kc ), 1 ) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_sdot( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_scopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_sspmv( uplo, k-1, -one, ap, work, 1, zero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -stdlib_sdot( k-1, work, 1, ap( kcnext ), 1 ) + + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_sswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = ap( kc+j-1 ) + ap( kc+j-1 ) = ap( kx ) + ap( kx ) = temp + end do + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = one / ap( kc ) + ! compute column k of the inverse. + if( k SSPTRS: solves a system of linear equations A*X = B with a real + !> symmetric matrix A stored in packed format using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by SSPTRF. + + pure subroutine stdlib_ssptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + real(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_sger( k-1, nrhs, -one, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_sscal( nrhs, one / ap( kc+k-1 ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_sswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_sger( k-2, nrhs, -one, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_sger( k-2, nrhs, -one, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, 1 & + ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, ap( kc ),1, one, b( k, & + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, ap( kc ),1, one, b( k, & + 1 ), ldb ) + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb,ap( kc+k ), 1, one, b( k+& + 1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k SSTEBZ: computes the eigenvalues of a symmetric tridiagonal + !> matrix T. The user may ask for all eigenvalues, all eigenvalues + !> in the half-open interval (VL, VU], or the IL-th through IU-th + !> eigenvalues. + !> To avoid overflow, the matrix must be scaled so that its + !> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest + !> accuracy, it should not be much smaller than that. + !> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal + !> Matrix", Report CS41, Computer Science Dept., Stanford + !> University, July 21, 1966. + + pure subroutine stdlib_sstebz( range, order, n, vl, vu, il, iu, abstol, d, e,m, nsplit, w, & + iblock, isplit, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: order, range + integer(ilp), intent(in) :: il, iu, n + integer(ilp), intent(out) :: info, m, nsplit + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), isplit(*), iwork(*) + real(sp), intent(in) :: d(*), e(*) + real(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: fudge = 2.1_sp + real(sp), parameter :: relfac = 2.0_sp + + + ! Local Scalars + logical(lk) :: ncnvrg, toofew + integer(ilp) :: ib, ibegin, idiscl, idiscu, ie, iend, iinfo, im, in, ioff, iorder, & + iout, irange, itmax, itmp1, iw, iwoff, j, jb, jdisc, je, nb, nwl, nwu + real(sp) :: atoli, bnorm, gl, gu, pivmin, rtoli, safemn, tmp1, tmp2, tnorm, ulp, wkill,& + wl, wlu, wu, wul + ! Local Arrays + integer(ilp) :: idumma(1) + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + info = 0 + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = 1 + else if( stdlib_lsame( range, 'V' ) ) then + irange = 2 + else if( stdlib_lsame( range, 'I' ) ) then + irange = 3 + else + irange = 0 + end if + ! decode order + if( stdlib_lsame( order, 'B' ) ) then + iorder = 2 + else if( stdlib_lsame( order, 'E' ) ) then + iorder = 1 + else + iorder = 0 + end if + ! check for errors + if( irange<=0 ) then + info = -1 + else if( iorder<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( irange==2 ) then + if( vl>=vu ) info = -5 + else if( irange==3 .and. ( il<1 .or. il>max( 1, n ) ) )then + info = -6 + else if( irange==3 .and. ( iun ) )then + info = -7 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SSTEBZ', -info ) + return + end if + ! initialize error flags + info = 0 + ncnvrg = .false. + toofew = .false. + ! quick return if possible + m = 0 + if( n==0 )return + ! simplifications: + if( irange==3 .and. il==1 .and. iu==n )irange = 1 + ! get machine constants + ! nb is the minimum vector length for vector bisection, or 0 + ! if only scalar is to be done. + safemn = stdlib_slamch( 'S' ) + ulp = stdlib_slamch( 'P' ) + rtoli = ulp*relfac + nb = stdlib_ilaenv( 1, 'SSTEBZ', ' ', n, -1, -1, -1 ) + if( nb<=1 )nb = 0 + ! special case when n=1 + if( n==1 ) then + nsplit = 1 + isplit( 1 ) = 1 + if( irange==2 .and. ( vl>=d( 1 ) .or. vutmp1 ) then + isplit( nsplit ) = j - 1 + nsplit = nsplit + 1 + work( j-1 ) = zero + else + work( j-1 ) = tmp1 + pivmin = max( pivmin, tmp1 ) + end if + end do + isplit( nsplit ) = n + pivmin = pivmin*safemn + ! compute interval and atoli + if( irange==3 ) then + ! range='i': compute the interval containing eigenvalues + ! il through iu. + ! compute gershgorin interval for entire (split) matrix + ! and use it as the initial interval + gu = d( 1 ) + gl = d( 1 ) + tmp1 = zero + do j = 1, n - 1 + tmp2 = sqrt( work( j ) ) + gu = max( gu, d( j )+tmp1+tmp2 ) + gl = min( gl, d( j )-tmp1-tmp2 ) + tmp1 = tmp2 + end do + gu = max( gu, d( n )+tmp1 ) + gl = min( gl, d( n )-tmp1 ) + tnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*tnorm*ulp*n - fudge*two*pivmin + gu = gu + fudge*tnorm*ulp*n + fudge*pivmin + ! compute iteration parameters + itmax = int( ( log( tnorm+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + if( abstol<=zero ) then + atoli = ulp*tnorm + else + atoli = abstol + end if + work( n+1 ) = gl + work( n+2 ) = gl + work( n+3 ) = gu + work( n+4 ) = gu + work( n+5 ) = gl + work( n+6 ) = gu + iwork( 1 ) = -1 + iwork( 2 ) = -1 + iwork( 3 ) = n + 1 + iwork( 4 ) = n + 1 + iwork( 5 ) = il - 1 + iwork( 6 ) = iu + call stdlib_slaebz( 3, itmax, n, 2, 2, nb, atoli, rtoli, pivmin, d, e,work, iwork( & + 5 ), work( n+1 ), work( n+5 ), iout,iwork, w, iblock, iinfo ) + if( iwork( 6 )==iu ) then + wl = work( n+1 ) + wlu = work( n+3 ) + nwl = iwork( 1 ) + wu = work( n+4 ) + wul = work( n+2 ) + nwu = iwork( 4 ) + else + wl = work( n+2 ) + wlu = work( n+4 ) + nwl = iwork( 2 ) + wu = work( n+3 ) + wul = work( n+1 ) + nwu = iwork( 3 ) + end if + if( nwl<0 .or. nwl>=n .or. nwu<1 .or. nwu>n ) then + info = 4 + return + end if + else + ! range='a' or 'v' -- set atoli + tnorm = max( abs( d( 1 ) )+abs( e( 1 ) ),abs( d( n ) )+abs( e( n-1 ) ) ) + do j = 2, n - 1 + tnorm = max( tnorm, abs( d( j ) )+abs( e( j-1 ) )+abs( e( j ) ) ) + end do + if( abstol<=zero ) then + atoli = ulp*tnorm + else + atoli = abstol + end if + if( irange==2 ) then + wl = vl + wu = vu + else + wl = zero + wu = zero + end if + end if + ! find eigenvalues -- loop over blocks and recompute nwl and nwu. + ! nwl accumulates the number of eigenvalues .le. wl, + ! nwu accumulates the number of eigenvalues .le. wu + m = 0 + iend = 0 + info = 0 + nwl = 0 + nwu = 0 + loop_70: do jb = 1, nsplit + ioff = iend + ibegin = ioff + 1 + iend = isplit( jb ) + in = iend - ioff + if( in==1 ) then + ! special case -- in=1 + if( irange==1 .or. wl>=d( ibegin )-pivmin )nwl = nwl + 1 + if( irange==1 .or. wu>=d( ibegin )-pivmin )nwu = nwu + 1 + if( irange==1 .or. ( wl=d( ibegin )-pivmin ) ) & + then + m = m + 1 + w( m ) = d( ibegin ) + iblock( m ) = jb + end if + else + ! general case -- in > 1 + ! compute gershgorin interval + ! and use it as the initial interval + gu = d( ibegin ) + gl = d( ibegin ) + tmp1 = zero + do j = ibegin, iend - 1 + tmp2 = abs( e( j ) ) + gu = max( gu, d( j )+tmp1+tmp2 ) + gl = min( gl, d( j )-tmp1-tmp2 ) + tmp1 = tmp2 + end do + gu = max( gu, d( iend )+tmp1 ) + gl = min( gl, d( iend )-tmp1 ) + bnorm = max( abs( gl ), abs( gu ) ) + gl = gl - fudge*bnorm*ulp*in - fudge*pivmin + gu = gu + fudge*bnorm*ulp*in + fudge*pivmin + ! compute atoli for the current submatrix + if( abstol<=zero ) then + atoli = ulp*max( abs( gl ), abs( gu ) ) + else + atoli = abstol + end if + if( irange>1 ) then + if( gu=gu )cycle loop_70 + end if + ! set up initial interval + work( n+1 ) = gl + work( n+in+1 ) = gu + call stdlib_slaebz( 1, 0, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e( & + ibegin ), work( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), im,iwork, w( m+1 & + ), iblock( m+1 ), iinfo ) + nwl = nwl + iwork( 1 ) + nwu = nwu + iwork( in+1 ) + iwoff = m - iwork( 1 ) + ! compute eigenvalues + itmax = int( ( log( gu-gl+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + & + 2 + call stdlib_slaebz( 2, itmax, in, in, 1, nb, atoli, rtoli, pivmin,d( ibegin ), e(& + ibegin ), work( ibegin ),idumma, work( n+1 ), work( n+2*in+1 ), iout,iwork, w( & + m+1 ), iblock( m+1 ), iinfo ) + ! copy eigenvalues into w and iblock + ! use -jb for block number for unconverged eigenvalues. + do j = 1, iout + tmp1 = half*( work( j+n )+work( j+in+n ) ) + ! flag non-convergence. + if( j>iout-iinfo ) then + ncnvrg = .true. + ib = -jb + else + ib = jb + end if + do je = iwork( j ) + 1 + iwoff,iwork( j+in ) + iwoff + w( je ) = tmp1 + iblock( je ) = ib + end do + end do + m = m + im + end if + end do loop_70 + ! if range='i', then (wl,wu) contains eigenvalues nwl+1,...,nwu + ! if nwl+1 < il or nwu > iu, discard extra eigenvalues. + if( irange==3 ) then + im = 0 + idiscl = il - 1 - nwl + idiscu = nwu - iu + if( idiscl>0 .or. idiscu>0 ) then + do je = 1, m + if( w( je )<=wlu .and. idiscl>0 ) then + idiscl = idiscl - 1 + else if( w( je )>=wul .and. idiscu>0 ) then + idiscu = idiscu - 1 + else + im = im + 1 + w( im ) = w( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl>0 .or. idiscu>0 ) then + ! code to deal with effects of bad arithmetic: + ! some low eigenvalues to be discarded are not in (wl,wlu], + ! or high eigenvalues to be discarded are not in (wul,wu] + ! so just kill off the smallest idiscl/largest idiscu + ! eigenvalues, by simply finding the smallest/largest + ! eigenvalue(s). + ! (if n(w) is monotone non-decreasing, this should never + ! happen.) + if( idiscl>0 ) then + wkill = wu + do jdisc = 1, idiscl + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )0 ) then + wkill = wl + do jdisc = 1, idiscu + iw = 0 + do je = 1, m + if( iblock( je )/=0 .and.( w( je )>wkill .or. iw==0 ) ) then + iw = je + wkill = w( je ) + end if + end do + iblock( iw ) = 0 + end do + end if + im = 0 + do je = 1, m + if( iblock( je )/=0 ) then + im = im + 1 + w( im ) = w( je ) + iblock( im ) = iblock( je ) + end if + end do + m = im + end if + if( idiscl<0 .or. idiscu<0 ) then + toofew = .true. + end if + end if + ! if order='b', do nothing -- the eigenvalues are already sorted + ! by block. + ! if order='e', sort the eigenvalues from smallest to largest + if( iorder==1 .and. nsplit>1 ) then + do je = 1, m - 1 + ie = 0 + tmp1 = w( je ) + do j = je + 1, m + if( w( j ) SSYCONV: convert A given by TRF into L and D and vice-versa. + !> Get Non-diag elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + + pure subroutine stdlib_ssyconv( uplo, way, n, a, lda, ipiv, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, j + real(sp) :: temp + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda 1 ) + if( ipiv(i) < 0 ) then + e(i)=a(i-1,i) + e(i-1)=zero + a(i-1,i)=zero + i=i-1 + else + e(i)=zero + endif + i=i-1 + end do + ! convert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + i=i-1 + endif + i=i-1 + end do + else + ! revert a (a is upper) + ! revert permutations + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i+1 + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + endif + i=i+1 + end do + ! revert value + i=n + do while ( i > 1 ) + if( ipiv(i) < 0 ) then + a(i-1,i)=e(i) + i=i-1 + endif + i=i-1 + end do + end if + else + ! a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + i=1 + e(n)=zero + do while ( i <= n ) + if( i 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i+1,j) + a(i+1,j)=temp + end do + endif + i=i+1 + endif + i=i+1 + end do + else + ! revert a (a is lower) + ! revert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(i,j) + a(i,j)=a(ip,j) + a(ip,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i-1 + if (i > 1) then + do j= 1,i-1 + temp=a(i+1,j) + a(i+1,j)=a(ip,j) + a(ip,j)=temp + end do + endif + endif + i=i-1 + end do + ! revert value + i=1 + do while ( i <= n-1 ) + if( ipiv(i) < 0 ) then + a(i+1,i)=e(i) + i=i+1 + endif + i=i+1 + end do + end if + end if + return + end subroutine stdlib_ssyconv + + !> If parameter WAY = 'C': + !> SSYCONVF: converts the factorization output format used in + !> SSYTRF provided on entry in parameter A into the factorization + !> output format used in SSYTRF_RK (or SSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in SSYTRF into + !> the format used in SSYTRF_RK (or SSYTRF_BK). + !> If parameter WAY = 'R': + !> SSYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in SSYTRF_RK + !> (or SSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in SSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in SSYTRF_RK + !> (or SSYTRF_BK) into the format used in SSYTRF. + + pure subroutine stdlib_ssyconvf( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = zero + a( i-1, i ) = zero + i = i - 1 + else + e( i ) = zero + end if + i = i - 1 + end do + ! convert permutations and ipiv + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and zero out + ! corresponding entries in input storage a + i = 1 + e( n ) = zero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_sswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_sswap( i-1, a( i+1, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is no interchnge of rows i and and ipiv(i), + ! so this should be reflected in ipiv format for + ! *sytrf_rk ( or *sytrf_bk) + ipiv( i ) = i + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations and ipiv + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_sswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_sswap( i-1, a( ip, 1 ), lda,a( i+1, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is one interchange of rows i+1 and ipiv(i+1), + ! so this should be recorded in consecutive entries + ! in ipiv format for *sytrf + ipiv( i ) = ipiv( i+1 ) + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_ssyconvf + + !> If parameter WAY = 'C': + !> SSYCONVF_ROOK: converts the factorization output format used in + !> SSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in SSYTRF_RK (or SSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for SSYTRF_ROOK and + !> SSYTRF_RK (or SSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> SSYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in SSYTRF_RK + !> (or SSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in SSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for SSYTRF_ROOK and + !> SSYTRF_RK (or SSYTRF_BK) is the same and is not converted. + + pure subroutine stdlib_ssyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, ip2 + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = zero + a( i-1, i ) = zero + i = i - 1 + else + e( i ) = zero + end if + i = i - 1 + end do + ! convert permutations + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and zero out + ! corresponding entries in input storage a + i = 1 + e( n ) = zero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_sswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i and ipiv(i) and i+1 and ipiv(i+1) + ! in a(i:n,1:i-1) + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_sswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + if( ip2/=(i+1) ) then + call stdlib_sswap( i-1, a( i+1, 1 ), lda,a( ip2, 1 ), lda ) + end if + end if + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_sswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i+1) and i and ipiv(i) + ! in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip2/=(i+1) ) then + call stdlib_sswap( i-1, a( ip2, 1 ), lda,a( i+1, 1 ), lda ) + end if + if( ip/=i ) then + call stdlib_sswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_ssyconvf_rook + + !> SSYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_ssyequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: s(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(sp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'SSYEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), abs( a( i, j ) ) ) + s( j ) = max( s( j ), abs( a( i, j ) ) ) + amax = max( amax, abs( a( i, j ) ) ) + end do + s( j ) = max( s( j ), abs( a( j, j ) ) ) + amax = max( amax, abs( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), abs( a( j, j ) ) ) + amax = max( amax, abs( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), abs( a( i, j ) ) ) + s( j ) = max( s( j ), abs( a( i, j ) ) ) + amax = max( amax, abs( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_sp / s( j ) + end do + tol = one / sqrt( 2.0_sp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + abs( a( i, j ) ) * s( j ) + work( j ) = work( j ) + abs( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + abs( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + abs( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + abs( a( i, j ) ) * s( j ) + work( j ) = work( j ) + abs( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + s( i )*work( i ) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_slassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = abs( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( work( i ) - t*si ) + c0 = -(t*si)*si + 2*work( i )*si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = abs( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = abs( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = abs( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = abs( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + work( i ) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_slamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_slamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_ssyequb + + !> SSYGS2: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. + !> B must have been previously factorized as U**T *U or L*L**T by SPOTRF. + + pure subroutine stdlib_ssygs2( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k + real(sp) :: akk, bkk, ct + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda SSYGST: reduces a real symmetric-definite generalized eigenproblem + !> to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. + !> B must have been previously factorized as U**T*U or L*L**T by SPOTRF. + + pure subroutine stdlib_ssygst( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_ssygs2( itype, uplo, n, a, lda, b, ldb, info ) + else + ! use blocked code + if( itype==1 ) then + if( upper ) then + ! compute inv(u**t)*a*inv(u) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(k:n,k:n) + call stdlib_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_strsm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT',kb, n-k-kb+1, & + one, b( k, k ), ldb,a( k, k+kb ), lda ) + call stdlib_ssymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( & + k, k+kb ), ldb, one,a( k, k+kb ), lda ) + call stdlib_ssyr2k( uplo, 'TRANSPOSE', n-k-kb+1, kb, -one,a( k, k+kb ), & + lda, b( k, k+kb ), ldb,one, a( k+kb, k+kb ), lda ) + call stdlib_ssymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( & + k, k+kb ), ldb, one,a( k, k+kb ), lda ) + call stdlib_strsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+& + 1, one,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda ) + end if + end do + else + ! compute inv(l)*a*inv(l**t) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(k:n,k:n) + call stdlib_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_strsm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',n-k-kb+1, kb, & + one, b( k, k ), ldb,a( k+kb, k ), lda ) + call stdlib_ssymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(& + k+kb, k ), ldb, one,a( k+kb, k ), lda ) + call stdlib_ssyr2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-one, a( k+kb, k & + ), lda, b( k+kb, k ),ldb, one, a( k+kb, k+kb ), lda ) + call stdlib_ssymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(& + k+kb, k ), ldb, one,a( k+kb, k ), lda ) + call stdlib_strsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, & + kb, one,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda ) + end if + end do + end if + else + if( upper ) then + ! compute u*a*u**t + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_strmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, one, & + b, ldb, a( 1, k ), lda ) + call stdlib_ssymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1, k ), & + ldb, one, a( 1, k ), lda ) + call stdlib_ssyr2k( uplo, 'NO TRANSPOSE', k-1, kb, one,a( 1, k ), lda, b( & + 1, k ), ldb, one, a,lda ) + call stdlib_ssymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1, k ), & + ldb, one, a( 1, k ), lda ) + call stdlib_strmm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',k-1, kb, one, b( & + k, k ), ldb, a( 1, k ),lda ) + call stdlib_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + else + ! compute l**t*a*l + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_strmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, one, & + b, ldb, a( k, 1 ), lda ) + call stdlib_ssymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1 ), & + ldb, one, a( k, 1 ), lda ) + call stdlib_ssyr2k( uplo, 'TRANSPOSE', k-1, kb, one,a( k, 1 ), lda, b( k, & + 1 ), ldb, one, a,lda ) + call stdlib_ssymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1 ), & + ldb, one, a( k, 1 ), lda ) + call stdlib_strmm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT', kb,k-1, one, b( & + k, k ), ldb, a( k, 1 ), lda ) + call stdlib_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + end if + end if + end if + return + end subroutine stdlib_ssygst + + !> SSYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + + pure subroutine stdlib_ssyswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(sp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_sswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=a(i1+i,i2) + a(i1+i,i2)=tmp + end do + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from i1 to i1-1 + call stdlib_sswap( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=a(i2,i1+i) + a(i2,i1+i)=tmp + end do + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_ssyswapr + + !> SSYTF2_RK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_ssytf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(sp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, stemp, t, wk, wkm1, & + wkp1, sfmin + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_isamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + ! set e( k ) to zero + if( k>1 )e( k ) = zero + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_isamax( imax-1, a( 1, imax ), 1 ) + stemp = abs( a( itemp, imax ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )1 )call stdlib_sswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_sswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_sswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_sswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / a( k, k ) + call stdlib_ssyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_sscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_ssyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = zero + a( k-1, k ) = zero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**t using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = zero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )(k+1) )call stdlib_sswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_sswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_sswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_sswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / a( k, k ) + call stdlib_ssyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_sscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_ssyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = zero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k SSYTF2_ROOK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_ssytf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sevten = 17.0e+0_sp + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(sp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, stemp, t, wk, wkm1, & + wkp1, sfmin + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_isamax( k-1, a( 1, k ), 1 ) + colmax = abs( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_isamax( imax-1, a( 1, imax ), 1 ) + stemp = abs( a( itemp, imax ) ) + if( stemp>rowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )1 )call stdlib_sswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_sswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + if( kp>1 )call stdlib_sswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_sswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / a( k, k ) + call stdlib_ssyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_sscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_ssyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = stemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( abs( a( imax, imax ) )(k+1) )call stdlib_sswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_sswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / a( k, k ) + call stdlib_ssyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_sscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_ssyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k SSYTRF_RK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_ssytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_slasyf_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_ssytf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_slasyf_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_ssytf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_sswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_ssytrf_rk + + !> SSYTRF_ROOK: computes the factorization of a real symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_ssytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_slasyf_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_ssytf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_slasyf_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_slasyf_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_ssytf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_ssytrf_rook + + !> SSYTRI: computes the inverse of a real symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> SSYTRF. + + pure subroutine stdlib_ssytri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + real(sp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==zero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_scopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_ssymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_sdot( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_scopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_ssymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_sdot( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_sdot( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_scopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_ssymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_sdot( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_sswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_sswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k SSYTRI_ROOK: computes the inverse of a real symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by SSYTRF_ROOK. + + pure subroutine stdlib_ssytri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + real(sp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==zero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_scopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_ssymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_sdot( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_scopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_ssymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_sdot( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_sdot( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_scopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_ssymv( uplo, k-1, -one, a, lda, work, 1, zero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_sdot( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k+1,1:k+1) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_sswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_sswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_sswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_sswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_sswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_sswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / a( k, k ) + ! compute column k of the inverse. + if( k SSYTRS: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by SSYTRF. + + pure subroutine stdlib_ssytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_sger( k-1, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_sscal( nrhs, one / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_sswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_sger( k-2, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_sger( k-2, nrhs, -one, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, a( 1, k ),1, one, b( k, & + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb, a( 1, k ),1, one, b( k, & + 1 ), ldb ) + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b, ldb,a( 1, k+1 ), 1, one, b( & + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k SSYTRS2: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by SSYTRF and converted by SSYCONV. + + pure subroutine stdlib_ssytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + real(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_sswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_strsm('L','U','N','U',n,nrhs,one,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + call stdlib_sscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_strsm('L','U','T','U',n,nrhs,one,a,lda,b,ldb) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_sswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_strsm('L','L','N','U',n,nrhs,one,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + call stdlib_sscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / akm1k + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i, j ) / akm1k + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**t \ b) -> b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_strsm('L','L','T','U',n,nrhs,one,a,lda,b,ldb) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_ssyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_ssytrs2 + + !> SSYTRS_3: solves a system of linear equations A * X = B with a real + !> symmetric matrix A using the factorization computed + !> by SSYTRF_RK or SSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_ssytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*), e(*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + real(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_strsm( 'L', 'U', 'N', 'U', n, nrhs, one, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + call stdlib_sscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_strsm( 'L', 'U', 'T', 'U', n, nrhs, one, a, lda, b, ldb ) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_strsm( 'L', 'L', 'N', 'U', n, nrhs, one, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + call stdlib_sscal( nrhs, one / a( i, i ), b( i, 1 ), ldb ) + else if( i b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_strsm('L', 'L', 'T', 'U', n, nrhs, one, a, lda, b, ldb ) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_ssytrs_3 + + !> SSYTRS_AA: solves a system of linear equations A*X = B with a real + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by SSYTRF_AA. + + pure subroutine stdlib_ssytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + k = 1 + do while ( k<=n ) + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + end do + ! compute u**t \ b -> b [ (u**t \p**t * b) ] + call stdlib_strsm( 'L', 'U', 'T', 'U', n-1, nrhs, one, a( 1, 2 ),lda, b( 2, 1 ), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**t \p**t * b) ] + call stdlib_slacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_slacpy( 'F', 1, n-1, a(1, 2), lda+1, work(1), 1) + call stdlib_slacpy( 'F', 1, n-1, a(1, 2), lda+1, work(2*n), 1) + end if + call stdlib_sgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb,info) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**t \p**t * b) ) ] + call stdlib_strsm( 'L', 'U', 'N', 'U', n-1, nrhs, one, a( 1, 2 ),lda, b(2, 1), & + ldb) + ! pivot, p * b -> b [ p * (u \ (t \ (u**t \p**t * b) )) ] + k = n + do while ( k>=1 ) + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k - 1 + end do + end if + else + ! solve a*x = b, where a = l*t*l**t. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + k = 1 + do while ( k<=n ) + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_strsm( 'L', 'L', 'N', 'U', n-1, nrhs, one, a( 2, 1),lda, b(2, 1), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_slacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_slacpy( 'F', 1, n-1, a(2, 1), lda+1, work(1), 1) + call stdlib_slacpy( 'F', 1, n-1, a(2, 1), lda+1, work(2*n), 1) + end if + call stdlib_sgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb,info) + ! 3) backward substitution with l**t + if( n>1 ) then + ! compute l**t \ b -> b [ l**t \ (t \ (l \p**t * b) ) ] + call stdlib_strsm( 'L', 'L', 'T', 'U', n-1, nrhs, one, a( 2, 1 ),lda, b( 2, 1 ), & + ldb) + ! pivot, p * b -> b [ p * (l**t \ (t \ (l \p**t * b) )) ] + k = n + do while ( k>=1 ) + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k - 1 + end do + end if + end if + return + end subroutine stdlib_ssytrs_aa + + !> SSYTRS_ROOK: solves a system of linear equations A*X = B with + !> a real symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by SSYTRF_ROOK. + + pure subroutine stdlib_ssytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(sp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_sger( k-1, nrhs, -one, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_sscal( nrhs, one / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1 ) + if( kp/=k-1 )call stdlib_sswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + if( k>2 ) then + call stdlib_sger( k-2, nrhs, -one, a( 1, k ), 1, b( k, 1 ),ldb, b( 1, 1 ), & + ldb ) + call stdlib_sger( k-2, nrhs, -one, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ),& + ldb ) + end if + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - one + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 )call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k ), 1, & + one, b( k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k ), 1, one, b( & + k, 1 ), ldb ) + call stdlib_sgemv( 'TRANSPOSE', k-1, nrhs, -one, b,ldb, a( 1, k+1 ), 1, one, & + b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1). + kp = -ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_sswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_sswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k STBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by STBTRS or some other + !> means before entering this routine. STBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_stbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, ferr,& + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: ab(ldab,*), b(ldb,*), x(ldx,*) + real(sp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldabsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_slacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_stbsv( uplo, transt, diag, n, kd, ab, ldab,work( n+1 ), 1 ) + + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_stbsv( uplo, trans, diag, n, kd, ab, ldab,work( n+1 ), 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_stbrfs + + !> STBTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by NRHS matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_stbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab Level 3 BLAS like routine for A in RFP Format. + !> STFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**T. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_stfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, diag, side, trans, uplo + integer(ilp), intent(in) :: ldb, m, n + real(sp), intent(in) :: alpha + ! Array Arguments + real(sp), intent(in) :: a(0:*) + real(sp), intent(inout) :: b(0:ldb-1,0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans + integer(ilp) :: m1, m2, n1, n2, k, info, i, j + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lside = stdlib_lsame( side, 'L' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lside .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -3 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -4 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 ) then + info = -7 + else if( ldb STFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + + pure subroutine stdlib_stfttp( transr, uplo, n, arf, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(out) :: ap(0:*) + real(sp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'STFTTP', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + ap( 0 ) = arf( 0 ) + else + ap( 0 ) = arf( 0 ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_stfttp + + !> STFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + + pure subroutine stdlib_stfttr( transr, uplo, n, arf, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(sp), intent(out) :: a(0:lda-1,0:*) + real(sp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt, nx2, np1x2 + integer(ilp) :: i, j, l, ij + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda STPRFB: applies a real "triangular-pentagonal" block reflector H or its + !> conjugate transpose H^H to a real matrix C, which is composed of two + !> blocks A and B, either from the left or right. + + pure subroutine stdlib_stprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(in) :: t(ldt,*), v(ldv,*) + real(sp), intent(out) :: work(ldwork,*) + ! ========================================================================== + + ! Local Scalars + integer(ilp) :: i, j, mp, np, kp + logical(lk) :: left, forward, column, right, backward, row + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return + if( stdlib_lsame( storev, 'C' ) ) then + column = .true. + row = .false. + else if ( stdlib_lsame( storev, 'R' ) ) then + column = .false. + row = .true. + else + column = .false. + row = .false. + end if + if( stdlib_lsame( side, 'L' ) ) then + left = .true. + right = .false. + else if( stdlib_lsame( side, 'R' ) ) then + left = .false. + right = .true. + else + left = .false. + right = .false. + end if + if( stdlib_lsame( direct, 'F' ) ) then + forward = .true. + backward = .false. + else if( stdlib_lsame( direct, 'B' ) ) then + forward = .false. + backward = .true. + else + forward = .false. + backward = .false. + end if + ! --------------------------------------------------------------------------- + if( column .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (m-by-k) + ! form h c or h^h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w t w^h or h^h = i - w t^h w^h + ! a = a - t (a + v^h b) or a = a - t^h (a + v^h b) + ! b = b - v t (a + v^h b) or b = b - v t^h (a + v^h b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_strmm( 'L', 'U', 'T', 'N', l, n, one, v( mp, 1 ), ldv,work, ldwork ) + + call stdlib_sgemm( 'T', 'N', l, n, m-l, one, v, ldv, b, ldb,one, work, ldwork ) + + call stdlib_sgemm( 'T', 'N', k-l, n, m, one, v( 1, kp ), ldv,b, ldb, zero, work( kp,& + 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'N', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb ) + + call stdlib_sgemm( 'N', 'N', l, n, k-l, -one, v( mp, kp ), ldv,work( kp, 1 ), & + ldwork, one, b( mp, 1 ), ldb ) + call stdlib_strmm( 'L', 'U', 'N', 'N', l, n, one, v( mp, 1 ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (n-by-k) + ! form c h or c h^h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w t w^h or h^h = i - w t^h w^h + ! a = a - (a + b v) t or a = a - (a + b v) t^h + ! b = b - (a + b v) t v^h or b = b - (a + b v) t^h v^h + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_strmm( 'R', 'U', 'N', 'N', m, l, one, v( np, 1 ), ldv,work, ldwork ) + + call stdlib_sgemm( 'N', 'N', m, l, n-l, one, b, ldb,v, ldv, one, work, ldwork ) + + call stdlib_sgemm( 'N', 'N', m, k-l, n, one, b, ldb,v( 1, kp ), ldv, zero, work( 1, & + kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_sgemm( 'N', 'T', m, l, k-l, -one, work( 1, kp ), ldwork,v( np, kp ), & + ldv, one, b( 1, np ), ldb ) + call stdlib_strmm( 'R', 'U', 'T', 'N', m, l, one, v( np, 1 ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (m-by-k) + ! [ i ] (k-by-k) + ! form h c or h^h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w t w^h or h^h = i - w t^h w^h + ! a = a - t (a + v^h b) or a = a - t^h (a + v^h b) + ! b = b - v t (a + v^h b) or b = b - v t^h (a + v^h b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_strmm( 'L', 'L', 'T', 'N', l, n, one, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_sgemm( 'T', 'N', l, n, m-l, one, v( mp, kp ), ldv,b( mp, 1 ), ldb, one, & + work( kp, 1 ), ldwork ) + call stdlib_sgemm( 'T', 'N', k-l, n, m, one, v, ldv,b, ldb, zero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'L', 'L', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'N', 'N', m-l, n, k, -one, v( mp, 1 ), ldv,work, ldwork, one, b( & + mp, 1 ), ldb ) + call stdlib_sgemm( 'N', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb ) + + call stdlib_strmm( 'L', 'L', 'N', 'N', l, n, one, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (n-by-k) + ! [ i ] (k-by-k) + ! form c h or c h^h where c = [ b a ] (b is m-by-n, a is m-by-k) + ! h = i - w t w^h or h^h = i - w t^h w^h + ! a = a - (a + b v) t or a = a - (a + b v) t^h + ! b = b - (a + b v) t v^h or b = b - (a + b v) t^h v^h + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_strmm( 'R', 'L', 'N', 'N', m, l, one, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_sgemm( 'N', 'N', m, l, n-l, one, b( 1, np ), ldb,v( np, kp ), ldv, one, & + work( 1, kp ), ldwork ) + call stdlib_sgemm( 'N', 'N', m, k-l, n, one, b, ldb,v, ldv, zero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v( np, 1 ), ldv, one, b( & + 1, np ), ldb ) + call stdlib_sgemm( 'N', 'T', m, l, k-l, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_strmm( 'R', 'L', 'T', 'N', m, l, one, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-m ) + ! form h c or h^h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w^h t w or h^h = i - w^h t^h w + ! a = a - t (a + v b) or a = a - t^h (a + v b) + ! b = b - v^h t (a + v b) or b = b - v^h t^h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_strmm( 'L', 'L', 'N', 'N', l, n, one, v( 1, mp ), ldv,work, ldb ) + + call stdlib_sgemm( 'N', 'N', l, n, m-l, one, v, ldv,b, ldb,one, work, ldwork ) + + call stdlib_sgemm( 'N', 'N', k-l, n, m, one, v( kp, 1 ), ldv,b, ldb, zero, work( kp,& + 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'T', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb ) + + call stdlib_sgemm( 'T', 'N', l, n, k-l, -one, v( kp, mp ), ldv,work( kp, 1 ), & + ldwork, one, b( mp, 1 ), ldb ) + call stdlib_strmm( 'L', 'L', 'T', 'N', l, n, one, v( 1, mp ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h^h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w^h t w or h^h = i - w^h t^h w + ! a = a - (a + b v^h) t or a = a - (a + b v^h) t^h + ! b = b - (a + b v^h) t v or b = b - (a + b v^h) t^h v + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_strmm( 'R', 'L', 'T', 'N', m, l, one, v( 1, np ), ldv,work, ldwork ) + + call stdlib_sgemm( 'N', 'T', m, l, n-l, one, b, ldb, v, ldv,one, work, ldwork ) + + call stdlib_sgemm( 'N', 'T', m, k-l, n, one, b, ldb,v( kp, 1 ), ldv, zero, work( 1, & + kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_sgemm( 'N', 'N', m, l, k-l, -one, work( 1, kp ), ldwork,v( kp, np ), & + ldv, one, b( 1, np ), ldb ) + call stdlib_strmm( 'R', 'L', 'N', 'N', m, l, one, v( 1, np ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-m ) + ! form h c or h^h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w^h t w or h^h = i - w^h t^h w + ! a = a - t (a + v b) or a = a - t^h (a + v b) + ! b = b - v^h t (a + v b) or b = b - v^h t^h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_strmm( 'L', 'U', 'N', 'N', l, n, one, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_sgemm( 'N', 'N', l, n, m-l, one, v( kp, mp ), ldv,b( mp, 1 ), ldb, one, & + work( kp, 1 ), ldwork ) + call stdlib_sgemm( 'N', 'N', k-l, n, m, one, v, ldv, b, ldb,zero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'L', 'L ', trans, 'N', k, n, one, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'T', 'N', m-l, n, k, -one, v( 1, mp ), ldv,work, ldwork, one, b( & + mp, 1 ), ldb ) + call stdlib_sgemm( 'T', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb ) + + call stdlib_strmm( 'L', 'U', 'T', 'N', l, n, one, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h^h where c = [ b a ] (a is m-by-k, b is m-by-n) + ! h = i - w^h t w or h^h = i - w^h t^h w + ! a = a - (a + b v^h) t or a = a - (a + b v^h) t^h + ! b = b - (a + b v^h) t v or b = b - (a + b v^h) t^h v + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_strmm( 'R', 'U', 'T', 'N', m, l, one, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_sgemm( 'N', 'T', m, l, n-l, one, b( 1, np ), ldb,v( kp, np ), ldv, one, & + work( 1, kp ), ldwork ) + call stdlib_sgemm( 'N', 'T', m, k-l, n, one, b, ldb, v, ldv,zero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_strmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_sgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v( 1, np ), ldv, one, b( & + 1, np ), ldb ) + call stdlib_sgemm( 'N', 'N', m, l, k-l , -one, work, ldwork,v, ldv, one, b, ldb ) + + call stdlib_strmm( 'R', 'U', 'N', 'N', m, l, one, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + end if + return + end subroutine stdlib_stprfb + + !> STPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by STPTRS or some other + !> means before entering this routine. STPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_stprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: ap(*), b(ldb,*), x(ldx,*) + real(sp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, kc, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_slacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_stpsv( uplo, transt, diag, n, ap, work( n+1 ), 1 ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_stpsv( uplo, trans, diag, n, ap, work( n+1 ), 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_stprfs + + !> STPTRI: computes the inverse of a real upper or lower triangular + !> matrix A stored in packed format. + + pure subroutine stdlib_stptri( uplo, diag, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc, jclast, jj + real(sp) :: ajj + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'STPTRI', -info ) + return + end if + ! check for singularity if non-unit. + if( nounit ) then + if( upper ) then + jj = 0 + do info = 1, n + jj = jj + info + if( ap( jj )==zero )return + end do + else + jj = 1 + do info = 1, n + if( ap( jj )==zero )return + jj = jj + n - info + 1 + end do + end if + info = 0 + end if + if( upper ) then + ! compute inverse of upper triangular matrix. + jc = 1 + do j = 1, n + if( nounit ) then + ap( jc+j-1 ) = one / ap( jc+j-1 ) + ajj = -ap( jc+j-1 ) + else + ajj = -one + end if + ! compute elements 1:j-1 of j-th column. + call stdlib_stpmv( 'UPPER', 'NO TRANSPOSE', diag, j-1, ap,ap( jc ), 1 ) + call stdlib_sscal( j-1, ajj, ap( jc ), 1 ) + jc = jc + j + end do + else + ! compute inverse of lower triangular matrix. + jc = n*( n+1 ) / 2 + do j = n, 1, -1 + if( nounit ) then + ap( jc ) = one / ap( jc ) + ajj = -ap( jc ) + else + ajj = -one + end if + if( j STPTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + + pure subroutine stdlib_stptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldb STPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + + pure subroutine stdlib_stpttf( transr, uplo, n, ap, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: ap(0:*) + real(sp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'STPTTF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + arf( 0 ) = ap( 0 ) + else + arf( 0 ) = ap( 0 ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! n is odd, transr = 'n', and uplo = 'l' + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + else + ! n is odd, transr = 'n', and uplo = 'u' + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! n is odd, transr = 't', and uplo = 'l' + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! n is odd, transr = 't', and uplo = 'u' + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! n is even, transr = 'n', and uplo = 'l' + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + else + ! n is even, transr = 'n', and uplo = 'u' + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 't' + if( lower ) then + ! n is even, transr = 't', and uplo = 'l' + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! n is even, transr = 't', and uplo = 'u' + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_stpttf + + !> STPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + + pure subroutine stdlib_stpttr( uplo, n, ap, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(sp), intent(out) :: a(lda,*) + real(sp), intent(in) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda STRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by STRTRS or some other + !> means before entering this routine. STRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_strrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*), b(ldb,*), x(ldx,*) + real(sp), intent(out) :: berr(*), ferr(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transt + integer(ilp) :: i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_slacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_strsv( uplo, transt, diag, n, a, lda, work( n+1 ),1 ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_strsv( uplo, trans, diag, n, a, lda, work( n+1 ),1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_strrfs + + !> STRTI2: computes the inverse of a real upper or lower triangular + !> matrix. + !> This is the Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_strti2( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + real(sp) :: ajj + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda STRTRI: computes the inverse of a real upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_strtri( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jb, nb, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_strti2( uplo, diag, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute inverse of upper triangular matrix + do j = 1, n, nb + jb = min( nb, n-j+1 ) + ! compute rows 1:j-1 of current block column + call stdlib_strmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, one, a, lda,& + a( 1, j ), lda ) + call stdlib_strsm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, -one, a( j,& + j ), lda, a( 1, j ), lda ) + ! compute inverse of current diagonal block + call stdlib_strti2( 'UPPER', diag, jb, a( j, j ), lda, info ) + end do + else + ! compute inverse of lower triangular matrix + nn = ( ( n-1 ) / nb )*nb + 1 + do j = nn, 1, -nb + jb = min( nb, n-j+1 ) + if( j+jb<=n ) then + ! compute rows j+jb:n of current block column + call stdlib_strmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, one,& + a( j+jb, j+jb ), lda,a( j+jb, j ), lda ) + call stdlib_strsm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, -& + one, a( j, j ), lda,a( j+jb, j ), lda ) + end if + ! compute inverse of current diagonal block + call stdlib_strti2( 'LOWER', diag, jb, a( j, j ), lda, info ) + end do + end if + end if + return + end subroutine stdlib_strtri + + !> STRTRS: solves a triangular system of the form + !> A * X = B or A**T * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_strtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( lda STRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + + pure subroutine stdlib_strttf( transr, uplo, n, a, lda, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(sp), intent(in) :: a(0:lda-1,0:*) + real(sp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2 + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda STRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + + pure subroutine stdlib_strttp( uplo, n, a, lda, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SBBCSD: computes the CS decomposition of an orthogonal matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See SORCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The orthogonal matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + + pure subroutine stdlib_sbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q + ! Array Arguments + real(sp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& + b22e(*), work(*) + real(sp), intent(inout) :: phi(*), theta(*) + real(sp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) + ! =================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 6 + real(sp), parameter :: hundred = 100.0_sp + real(sp), parameter :: meighth = -0.125_sp + real(sp), parameter :: piover2 = 1.57079632679489661923132169163975144210_sp + + + + + ! Local Scalars + logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & + wantu2, wantv1t, wantv2t + integer(ilp) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & + iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini + real(sp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & + sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 + ! Intrinsic Functions + intrinsic :: abs,atan2,cos,max,min,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( m < 0 ) then + info = -6 + else if( p < 0 .or. p > m ) then + info = -7 + else if( q < 0 .or. q > m ) then + info = -8 + else if( q > p .or. q > m-p .or. q > m-q ) then + info = -8 + else if( wantu1 .and. ldu1 < p ) then + info = -12 + else if( wantu2 .and. ldu2 < m-p ) then + info = -14 + else if( wantv1t .and. ldv1t < q ) then + info = -16 + else if( wantv2t .and. ldv2t < m-q ) then + info = -18 + end if + ! quick return if q = 0 + if( info == 0 .and. q == 0 ) then + lworkmin = 1 + work(1) = lworkmin + return + end if + ! compute workspace + if( info == 0 ) then + iu1cs = 1 + iu1sn = iu1cs + q + iu2cs = iu1sn + q + iu2sn = iu2cs + q + iv1tcs = iu2sn + q + iv1tsn = iv1tcs + q + iv2tcs = iv1tsn + q + iv2tsn = iv2tcs + q + lworkopt = iv2tsn + q - 1 + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not. lquery ) then + info = -28 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SBBCSD', -info ) + return + else if( lquery ) then + return + end if + ! get machine constants + eps = stdlib_slamch( 'EPSILON' ) + unfl = stdlib_slamch( 'SAFE MINIMUM' ) + tolmul = max( ten, min( hundred, eps**meighth ) ) + tol = tolmul*eps + thresh = max( tol, maxitr*q*q*unfl ) + ! test for negligible sines or cosines + do i = 1, q + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = 1, q-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! initial deflation + imax = q + do while( imax > 1 ) + if( phi(imax-1) /= zero ) then + exit + end if + imax = imax - 1 + end do + imin = imax - 1 + if ( imin > 1 ) then + do while( phi(imin-1) /= zero ) + imin = imin - 1 + if ( imin <= 1 ) exit + end do + end if + ! initialize iteration counter + maxit = maxitr*q*q + iter = 0 + ! begin main iteration loop + do while( imax > 1 ) + ! compute the matrix entries + b11d(imin) = cos( theta(imin) ) + b21d(imin) = -sin( theta(imin) ) + do i = imin, imax - 1 + b11e(i) = -sin( theta(i) ) * sin( phi(i) ) + b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) + b12d(i) = sin( theta(i) ) * cos( phi(i) ) + b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) + b21e(i) = -cos( theta(i) ) * sin( phi(i) ) + b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) + b22d(i) = cos( theta(i) ) * cos( phi(i) ) + b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) + end do + b12d(imax) = sin( theta(imax) ) + b22d(imax) = cos( theta(imax) ) + ! abort if not converging; otherwise, increment iter + if( iter > maxit ) then + info = 0 + do i = 1, q + if( phi(i) /= zero )info = info + 1 + end do + return + end if + iter = iter + imax - imin + ! compute shifts + thetamax = theta(imin) + thetamin = theta(imin) + do i = imin+1, imax + if( theta(i) > thetamax )thetamax = theta(i) + if( theta(i) < thetamin )thetamin = theta(i) + end do + if( thetamax > piover2 - thresh ) then + ! zero on diagonals of b11 and b22; induce deflation with a + ! zero shift + mu = zero + nu = one + else if( thetamin < thresh ) then + ! zero on diagonals of b12 and b22; induce deflation with a + ! zero shift + mu = one + nu = zero + else + ! compute shifts for b11 and b21 and use the lesser + call stdlib_slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) + + call stdlib_slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) + + if( sigma11 <= sigma21 ) then + mu = sigma11 + nu = sqrt( one - mu**2 ) + if( mu < thresh ) then + mu = zero + nu = one + end if + else + nu = sigma21 + mu = sqrt( 1.0_sp - nu**2 ) + if( nu < thresh ) then + mu = one + nu = zero + end if + end if + end if + ! rotate to produce bulges in b11 and b21 + if( mu <= nu ) then + call stdlib_slartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& + imin-1) ) + else + call stdlib_slartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& + imin-1) ) + end if + temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) + b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) + b11d(imin) = temp + b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) + b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) + temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) + b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) + b21d(imin) = temp + b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) + b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) + ! compute theta(imin) + theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),sqrt( b11d(imin)**2+& + b11bulge**2 ) ) + ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) + if( b11d(imin)**2+b11bulge**2 > thresh**2 ) then + call stdlib_slartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& + r ) + else if( mu <= nu ) then + call stdlib_slartgs( b11e( imin ), b11d( imin + 1 ), mu,work(iu1cs+imin-1), work(& + iu1sn+imin-1) ) + else + call stdlib_slartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& + iu1sn+imin-1) ) + end if + if( b21d(imin)**2+b21bulge**2 > thresh**2 ) then + call stdlib_slartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& + r ) + else if( nu < mu ) then + call stdlib_slartgs( b21e( imin ), b21d( imin + 1 ), nu,work(iu2cs+imin-1), work(& + iu2sn+imin-1) ) + else + call stdlib_slartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& + imin-1) ) + end if + work(iu2cs+imin-1) = -work(iu2cs+imin-1) + work(iu2sn+imin-1) = -work(iu2sn+imin-1) + temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) + b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) + + b11e(imin) = temp + if( imax > imin+1 ) then + b11bulge = work(iu1sn+imin-1)*b11e(imin+1) + b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) + end if + temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) + b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) + b12d(imin) = temp + b12bulge = work(iu1sn+imin-1)*b12d(imin+1) + b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) + temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) + b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) + + b21e(imin) = temp + if( imax > imin+1 ) then + b21bulge = work(iu2sn+imin-1)*b21e(imin+1) + b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) + end if + temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) + b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) + b22d(imin) = temp + b22bulge = work(iu2sn+imin-1)*b22d(imin+1) + b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) + ! inner loop: chase bulges from b11(imin,imin+2), + ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to + ! bottom-right + do i = imin+1, imax-1 + ! compute phi(i-1) + x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) + x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge + y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) + y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge + phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11e(i-1)**2 + b11bulge**2 <= thresh**2 + restart21 = b21e(i-1)**2 + b21bulge**2 <= thresh**2 + restart12 = b12d(i-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), + ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart21 ) then + call stdlib_slartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) + else if( .not. restart11 .and. restart21 ) then + call stdlib_slartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & + r ) + else if( restart11 .and. .not. restart21 ) then + call stdlib_slartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & + r ) + else if( mu <= nu ) then + call stdlib_slartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) + + else + call stdlib_slartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) + + end if + work(iv1tcs+i-1) = -work(iv1tcs+i-1) + work(iv1tsn+i-1) = -work(iv1tsn+i-1) + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_slartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_slartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& + 1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_slartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& + 1), r ) + else if( nu < mu ) then + call stdlib_slartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& + 1-1) ) + else + call stdlib_slartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& + 1-1) ) + end if + temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) + b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) + b11d(i) = temp + b11bulge = work(iv1tsn+i-1)*b11d(i+1) + b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) + temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) + b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) + b21d(i) = temp + b21bulge = work(iv1tsn+i-1)*b21d(i+1) + b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) + temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) + b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) + b12e(i-1) = temp + b12bulge = work(iv2tsn+i-1-1)*b12e(i) + b12e(i) = work(iv2tcs+i-1-1)*b12e(i) + temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) + b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) + b22e(i-1) = temp + b22bulge = work(iv2tsn+i-1-1)*b22e(i) + b22e(i) = work(iv2tcs+i-1-1)*b22e(i) + ! compute theta(i) + x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) + x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge + y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) + y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge + theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11d(i)**2 + b11bulge**2 <= thresh**2 + restart12 = b12e(i-1)**2 + b12bulge**2 <= thresh**2 + restart21 = b21d(i)**2 + b21bulge**2 <= thresh**2 + restart22 = b22e(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), + ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart12 ) then + call stdlib_slartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) + else if( .not. restart11 .and. restart12 ) then + call stdlib_slartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) + + else if( restart11 .and. .not. restart12 ) then + call stdlib_slartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) + + else if( mu <= nu ) then + call stdlib_slartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) + + else + call stdlib_slartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) + + end if + if( .not. restart21 .and. .not. restart22 ) then + call stdlib_slartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) + else if( .not. restart21 .and. restart22 ) then + call stdlib_slartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) + + else if( restart21 .and. .not. restart22 ) then + call stdlib_slartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) + + else if( nu < mu ) then + call stdlib_slartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) + + else + call stdlib_slartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) + + end if + work(iu2cs+i-1) = -work(iu2cs+i-1) + work(iu2sn+i-1) = -work(iu2sn+i-1) + temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) + b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) + b11e(i) = temp + if( i < imax - 1 ) then + b11bulge = work(iu1sn+i-1)*b11e(i+1) + b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) + end if + temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) + b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) + b21e(i) = temp + if( i < imax - 1 ) then + b21bulge = work(iu2sn+i-1)*b21e(i+1) + b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) + end if + temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) + b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) + b12d(i) = temp + b12bulge = work(iu1sn+i-1)*b12d(i+1) + b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) + temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) + b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) + b22d(i) = temp + b22bulge = work(iu2sn+i-1)*b22d(i+1) + b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) + end do + ! compute phi(imax-1) + x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) + y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) + y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge + phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) ) + ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) + restart12 = b12d(imax-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(imax-1)**2 + b22bulge**2 <= thresh**2 + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_slartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_slartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& + imax-1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_slartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& + imax-1-1), r ) + else if( nu < mu ) then + call stdlib_slartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& + iv2tsn+imax-1-1) ) + else + call stdlib_slartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& + iv2tsn+imax-1-1) ) + end if + temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) + b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) + + b12e(imax-1) = temp + temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) + b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) + + b22e(imax-1) = temp + ! update singular vectors + if( wantu1 ) then + if( colmajor ) then + call stdlib_slasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& + iu1sn+imin-1),u1(1,imin), ldu1 ) + else + call stdlib_slasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& + iu1sn+imin-1),u1(imin,1), ldu1 ) + end if + end if + if( wantu2 ) then + if( colmajor ) then + call stdlib_slasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& + iu2sn+imin-1),u2(1,imin), ldu2 ) + else + call stdlib_slasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& + iu2sn+imin-1),u2(imin,1), ldu2 ) + end if + end if + if( wantv1t ) then + if( colmajor ) then + call stdlib_slasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& + iv1tsn+imin-1),v1t(imin,1), ldv1t ) + else + call stdlib_slasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& + iv1tsn+imin-1),v1t(1,imin), ldv1t ) + end if + end if + if( wantv2t ) then + if( colmajor ) then + call stdlib_slasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& + iv2tsn+imin-1),v2t(imin,1), ldv2t ) + else + call stdlib_slasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& + iv2tsn+imin-1),v2t(1,imin), ldv2t ) + end if + end if + ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) + if( b11e(imax-1)+b21e(imax-1) > 0 ) then + b11d(imax) = -b11d(imax) + b21d(imax) = -b21d(imax) + if( wantv1t ) then + if( colmajor ) then + call stdlib_sscal( q, negone, v1t(imax,1), ldv1t ) + else + call stdlib_sscal( q, negone, v1t(1,imax), 1 ) + end if + end if + end if + ! compute theta(imax) + x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) + y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) + theta(imax) = atan2( abs(y1), abs(x1) ) + ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), + ! and b22(imax,imax-1) + if( b11d(imax)+b12e(imax-1) < 0 ) then + b12d(imax) = -b12d(imax) + if( wantu1 ) then + if( colmajor ) then + call stdlib_sscal( p, negone, u1(1,imax), 1 ) + else + call stdlib_sscal( p, negone, u1(imax,1), ldu1 ) + end if + end if + end if + if( b21d(imax)+b22e(imax-1) > 0 ) then + b22d(imax) = -b22d(imax) + if( wantu2 ) then + if( colmajor ) then + call stdlib_sscal( m-p, negone, u2(1,imax), 1 ) + else + call stdlib_sscal( m-p, negone, u2(imax,1), ldu2 ) + end if + end if + end if + ! fix signs on b12(imax,imax) and b22(imax,imax) + if( b12d(imax)+b22d(imax) < 0 ) then + if( wantv2t ) then + if( colmajor ) then + call stdlib_sscal( m-q, negone, v2t(imax,1), ldv2t ) + else + call stdlib_sscal( m-q, negone, v2t(1,imax), 1 ) + end if + end if + end if + ! test for negligible sines or cosines + do i = imin, imax + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = imin, imax-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! deflate + if (imax > 1) then + do while( phi(imax-1) == zero ) + imax = imax - 1 + if (imax <= 1) exit + end do + end if + if( imin > imax - 1 )imin = imax - 1 + if (imin > 1) then + do while (phi(imin-1) /= zero) + imin = imin - 1 + if (imin <= 1) exit + end do + end if + ! repeat main iteration loop + end do + ! postprocessing: order theta from least to greatest + do i = 1, q + mini = i + thetamin = theta(i) + do j = i+1, q + if( theta(j) < thetamin ) then + mini = j + thetamin = theta(j) + end if + end do + if( mini /= i ) then + theta(mini) = theta(i) + theta(i) = thetamin + if( colmajor ) then + if( wantu1 )call stdlib_sswap( p, u1(1,i), 1, u1(1,mini), 1 ) + if( wantu2 )call stdlib_sswap( m-p, u2(1,i), 1, u2(1,mini), 1 ) + if( wantv1t )call stdlib_sswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t ) + + if( wantv2t )call stdlib_sswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),ldv2t ) + + else + if( wantu1 )call stdlib_sswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 ) + if( wantu2 )call stdlib_sswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 ) + if( wantv1t )call stdlib_sswap( q, v1t(1,i), 1, v1t(1,mini), 1 ) + if( wantv2t )call stdlib_sswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 ) + end if + end if + end do + return + end subroutine stdlib_sbbcsd + + !> SDISNA: computes the reciprocal condition numbers for the eigenvectors + !> of a real symmetric or complex Hermitian matrix or for the left or + !> right singular vectors of a general m-by-n matrix. The reciprocal + !> condition number is the 'gap' between the corresponding eigenvalue or + !> singular value and the nearest other one. + !> The bound on the error, measured by angle in radians, in the I-th + !> computed vector is given by + !> SLAMCH( 'E' ) * ( ANORM / SEP( I ) ) + !> where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed + !> to be smaller than SLAMCH( 'E' )*ANORM in order to limit the size of + !> the error bound. + !> SDISNA may also be used to compute error bounds for eigenvectors of + !> the generalized symmetric definite eigenproblem. + + pure subroutine stdlib_sdisna( job, m, n, d, sep, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: m, n + ! Array Arguments + real(sp), intent(in) :: d(*) + real(sp), intent(out) :: sep(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: decr, eigen, incr, left, right, sing + integer(ilp) :: i, k + real(sp) :: anorm, eps, newgap, oldgap, safmin, thresh + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + eigen = stdlib_lsame( job, 'E' ) + left = stdlib_lsame( job, 'L' ) + right = stdlib_lsame( job, 'R' ) + sing = left .or. right + if( eigen ) then + k = m + else if( sing ) then + k = min( m, n ) + end if + if( .not.eigen .and. .not.sing ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( k<0 ) then + info = -3 + else + incr = .true. + decr = .true. + do i = 1, k - 1 + if( incr )incr = incr .and. d( i )<=d( i+1 ) + if( decr )decr = decr .and. d( i )>=d( i+1 ) + end do + if( sing .and. k>0 ) then + if( incr )incr = incr .and. zero<=d( 1 ) + if( decr )decr = decr .and. d( k )>=zero + end if + if( .not.( incr .or. decr ) )info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SDISNA', -info ) + return + end if + ! quick return if possible + if( k==0 )return + ! compute reciprocal condition numbers + if( k==1 ) then + sep( 1 ) = stdlib_slamch( 'O' ) + else + oldgap = abs( d( 2 )-d( 1 ) ) + sep( 1 ) = oldgap + do i = 2, k - 1 + newgap = abs( d( i+1 )-d( i ) ) + sep( i ) = min( oldgap, newgap ) + oldgap = newgap + end do + sep( k ) = oldgap + end if + if( sing ) then + if( ( left .and. m>n ) .or. ( right .and. m SGBBRD: reduces a real general m-by-n band matrix A to upper + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> The routine computes B, and optionally forms Q or P**T, or computes + !> Q**T*C for a given matrix C. + + pure subroutine stdlib_sgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & + ldc, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*), c(ldc,*) + real(sp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantb, wantc, wantpt, wantq + integer(ilp) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,& + mu, mu0, nr, nrt + real(sp) :: ra, rb, rc, rs + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + wantb = stdlib_lsame( vect, 'B' ) + wantq = stdlib_lsame( vect, 'Q' ) .or. wantb + wantpt = stdlib_lsame( vect, 'P' ) .or. wantb + wantc = ncc>0 + klu1 = kl + ku + 1 + info = 0 + if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncc<0 ) then + info = -4 + else if( kl<0 ) then + info = -5 + else if( ku<0 ) then + info = -6 + else if( ldab1 ) then + ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce + ! first to lower bidiagonal form and then transform to upper + ! bidiagonal + if( ku>0 ) then + ml0 = 1 + mu0 = 2 + else + ml0 = 2 + mu0 = 1 + end if + ! wherever possible, plane rotations are generated and applied in + ! vector operations of length nr over the index set j1:j2:klu1. + ! the sines of the plane rotations are stored in work(1:max(m,n)) + ! and the cosines in work(max(m,n)+1:2*max(m,n)). + mn = max( m, n ) + klm = min( m-1, kl ) + kun = min( n-1, ku ) + kb = klm + kun + kb1 = kb + 1 + inca = kb1*ldab + nr = 0 + j1 = klm + 2 + j2 = 1 - kun + loop_90: do i = 1, minmn + ! reduce i-th column and i-th row of matrix to bidiagonal form + ml = klm + 1 + mu = kun + 1 + loop_80: do kk = 1, kb + j1 = j1 + kb + j2 = j2 + kb + ! generate plane rotations to annihilate nonzero elements + ! which have been created below the band + if( nr>0 )call stdlib_slargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & + work( mn+j1 ), kb1 ) + ! apply plane rotations from the left + do l = 1, kb + if( j2-klm+l-1>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_slartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & + klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 ) + end do + if( ml>ml0 ) then + if( ml<=m-i+1 ) then + ! generate plane rotation to annihilate a(i+ml-1,i) + ! within the band, and apply rotation from the left + call stdlib_slartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), & + work( i+ml-1 ),ra ) + ab( ku+ml-1, i ) = ra + if( in ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j-1,j+ku) above the band + ! and store it in work(n+1:2*n) + work( j+kun ) = work( j )*ab( 1, j+kun ) + ab( 1, j+kun ) = work( mn+j )*ab( 1, j+kun ) + end do + ! generate plane rotations to annihilate nonzero elements + ! which have been generated above the band + if( nr>0 )call stdlib_slargv( nr, ab( 1, j1+kun-1 ), inca,work( j1+kun ), kb1,& + work( mn+j1+kun ),kb1 ) + ! apply plane rotations from the right + do l = 1, kb + if( j2+l-1>m ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_slartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& + kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 ) + end do + if( ml==ml0 .and. mu>mu0 ) then + if( mu<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+mu-1) + ! within the band, and apply rotation from the right + call stdlib_slartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( & + mn+i+mu-1 ), work( i+mu-1 ),ra ) + ab( ku-mu+3, i+mu-2 ) = ra + call stdlib_srot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1,ab( ku-& + mu+3, i+mu-1 ), 1,work( mn+i+mu-1 ), work( i+mu-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kb1 + end if + if( wantpt ) then + ! accumulate product of plane rotations in p**t + do j = j1, j2, kb1 + call stdlib_srot( n, pt( j+kun-1, 1 ), ldpt,pt( j+kun, 1 ), ldpt, work( & + mn+j+kun ),work( j+kun ) ) + end do + end if + if( j2+kb>m ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j+kl+ku,j+ku-1) below the + ! band and store it in work(1:n) + work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) + ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun ) + end do + if( ml>ml0 ) then + ml = ml - 1 + else + mu = mu - 1 + end if + end do loop_80 + end do loop_90 + end if + if( ku==0 .and. kl>0 ) then + ! a has been reduced to lower bidiagonal form + ! transform lower bidiagonal form to upper bidiagonal by applying + ! plane rotations from the left, storing diagonal elements in d + ! and off-diagonal elements in e + do i = 1, min( m-1, n ) + call stdlib_slartg( ab( 1, i ), ab( 2, i ), rc, rs, ra ) + d( i ) = ra + if( i0 ) then + ! a has been reduced to upper bidiagonal form + if( m1 ) then + rb = -rs*ab( ku, i ) + e( i-1 ) = rc*ab( ku, i ) + end if + if( wantpt )call stdlib_srot( n, pt( i, 1 ), ldpt, pt( m+1, 1 ), ldpt,rc, rs ) + + end do + else + ! copy off-diagonal elements to e and diagonal elements to d + do i = 1, minmn - 1 + e( i ) = ab( ku, i+1 ) + end do + do i = 1, minmn + d( i ) = ab( ku+1, i ) + end do + end if + else + ! a is diagonal. set elements of e to zero and copy diagonal + ! elements to d. + do i = 1, minmn - 1 + e( i ) = zero + end do + do i = 1, minmn + d( i ) = ab( 1, i ) + end do + end if + return + end subroutine stdlib_sgbbrd + + !> SGBCON: estimates the reciprocal of the condition number of a real + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by SGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_sgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, onenrm + character :: normin + integer(ilp) :: ix, j, jp, kase, kase1, kd, lm + real(sp) :: ainvnm, scale, smlnum, t + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,min + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( anorm0 + kase = 0 + 10 continue + call stdlib_slacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(l). + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + jp = ipiv( j ) + t = work( jp ) + if( jp/=j ) then + work( jp ) = work( j ) + work( j ) = t + end if + call stdlib_saxpy( lm, -t, ab( kd+1, j ), 1, work( j+1 ), 1 ) + end do + end if + ! multiply by inv(u). + call stdlib_slatbs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, & + ldab, work, scale, work( 2*n+1 ),info ) + else + ! multiply by inv(u**t). + call stdlib_slatbs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, ldab, & + work, scale, work( 2*n+1 ),info ) + ! multiply by inv(l**t). + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + work( j ) = work( j ) - stdlib_sdot( lm, ab( kd+1, j ), 1,work( j+1 ), 1 ) + + jp = ipiv( j ) + if( jp/=j ) then + t = work( jp ) + work( jp ) = work( j ) + work( j ) = t + end if + end do + end if + end if + ! divide x by 1/scale if doing so will not cause overflow. + normin = 'Y' + if( scale/=one ) then + ix = stdlib_isamax( n, work, 1 ) + if( scale SGBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_sgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(sp) :: bignum, rcmax, rcmin, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab SGBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from SGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_sgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(sp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + ! Intrinsic Functions + intrinsic :: abs,max,min,log + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabzero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = max( j-ku, 1 ), min( j+kl, m ) + c( j ) = max( c( j ), abs( ab( kd+i-j, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_sgbequb + + !> SGBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_sgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, x, & + ldx, ferr, berr, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + real(sp), intent(out) :: berr(*), ferr(*), work(*) + real(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transt + integer(ilp) :: count, i, j, k, kase, kk, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldabsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_sgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, info ) + + call stdlib_saxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_slacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_sgbtrs( transt, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, & + info ) + do i = 1, n + work( n+i ) = work( n+i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( n+i )*work( i ) + end do + call stdlib_sgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv,work( n+1 ), n, & + info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_sgbrfs + + !> SGBTRF: computes an LU factorization of a real m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_sgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp, ju, k2, km, kv, nb, & + nw + real(sp) :: temp + ! Local Arrays + real(sp) :: work13(ldwork,nbmax), work31(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabkl ) then + ! use unblocked code + call stdlib_sgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + else + ! use blocked code + ! zero the superdiagonal elements of the work array work13 + do j = 1, nb + do i = 1, j - 1 + work13( i, j ) = zero + end do + end do + ! zero the subdiagonal elements of the work array work31 + do j = 1, nb + do i = j + 1, nb + work31( i, j ) = zero + end do + end do + ! gaussian elimination with partial pivoting + ! set fill-in elements in columns ku+2 to kv to zero + do j = ku + 2, min( kv, n ) + do i = kv - j + 2, kl + ab( i, j ) = zero + end do + end do + ! ju is the index of the last column affected by the current + ! stage of the factorization + ju = 1 + loop_180: do j = 1, min( m, n ), nb + jb = min( nb, min( m, n )-j+1 ) + ! the active part of the matrix is partitioned + ! a11 a12 a13 + ! a21 a22 a23 + ! a31 a32 a33 + ! here a11, a21 and a31 denote the current block of jb columns + ! which is about to be factorized. the number of rows in the + ! partitioning are jb, i2, i3 respectively, and the numbers + ! of columns are jb, j2, j3. the superdiagonal elements of a13 + ! and the subdiagonal elements of a31 lie outside the band. + i2 = min( kl-jb, m-j-jb+1 ) + i3 = min( jb, m-j-kl+1 ) + ! j2 and j3 are computed after ju has been updated. + ! factorize the current block of jb columns + loop_80: do jj = j, j + jb - 1 + ! set fill-in elements in column jj+kv to zero + if( jj+kv<=n ) then + do i = 1, kl + ab( i, jj+kv ) = zero + end do + end if + ! find pivot and test for singularity. km is the number of + ! subdiagonal elements in the current column. + km = min( kl, m-jj ) + jp = stdlib_isamax( km+1, ab( kv+1, jj ), 1 ) + ipiv( jj ) = jp + jj - j + if( ab( kv+jp, jj )/=zero ) then + ju = max( ju, min( jj+ku+jp-1, n ) ) + if( jp/=1 ) then + ! apply interchange to columns j to j+jb-1 + if( jp+jj-1jj )call stdlib_sger( km, jm-jj, -one, ab( kv+2, jj ), 1,ab( kv, jj+& + 1 ), ldab-1,ab( kv+1, jj+1 ), ldab-1 ) + else + ! if pivot is zero, set info to the index of the pivot + ! unless a zero pivot has already been found. + if( info==0 )info = jj + end if + ! copy current column of a31 into the work array work31 + nw = min( jj-j+1, i3 ) + if( nw>0 )call stdlib_scopy( nw, ab( kv+kl+1-jj+j, jj ), 1,work31( 1, jj-j+1 )& + , 1 ) + end do loop_80 + if( j+jb<=n ) then + ! apply the row interchanges to the other blocks. + j2 = min( ju-j+1, kv ) - jb + j3 = max( 0, ju-j-kv+1 ) + ! use stdlib_slaswp to apply the row interchanges to a12, a22, and + ! a32. + call stdlib_slaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,ipiv( j ), 1 ) + + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + ! apply the row interchanges to a13, a23, and a33 + ! columnwise. + k2 = j - 1 + jb + j2 + do i = 1, j3 + jj = k2 + i + do ii = j + i - 1, j + jb - 1 + ip = ipiv( ii ) + if( ip/=ii ) then + temp = ab( kv+1+ii-jj, jj ) + ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj ) + ab( kv+1+ip-jj, jj ) = temp + end if + end do + end do + ! update the relevant part of the trailing submatrix + if( j2>0 ) then + ! update a12 + call stdlib_strsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j2, one, ab(& + kv+1, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1 ) + if( i2>0 ) then + ! update a22 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j2,jb, -one, ab( & + kv+1+jb, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1, one,ab( kv+1, j+jb ), & + ldab-1 ) + end if + if( i3>0 ) then + ! update a32 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j2,jb, -one, & + work31, ldwork,ab( kv+1-jb, j+jb ), ldab-1, one,ab( kv+kl+1-jb, j+jb ), & + ldab-1 ) + end if + end if + if( j3>0 ) then + ! copy the lower triangle of a13 into the work array + ! work13 + do jj = 1, j3 + do ii = jj, jb + work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 ) + end do + end do + ! update a13 in the work array + call stdlib_strsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j3, one, ab(& + kv+1, j ), ldab-1,work13, ldwork ) + if( i2>0 ) then + ! update a23 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j3,jb, -one, ab( & + kv+1+jb, j ), ldab-1,work13, ldwork, one, ab( 1+jb, j+kv ),ldab-1 ) + + end if + if( i3>0 ) then + ! update a33 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j3,jb, -one, & + work31, ldwork, work13,ldwork, one, ab( 1+kl, j+kv ), ldab-1 ) + end if + ! copy the lower triangle of a13 back into place + do jj = 1, j3 + do ii = jj, jb + ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj ) + end do + end do + end if + else + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + end if + ! partially undo the interchanges in the current block to + ! restore the upper triangular form of a31 and copy the upper + ! triangle of a31 back into place + do jj = j + jb - 1, j, -1 + jp = ipiv( jj ) - jj + 1 + if( jp/=1 ) then + ! apply interchange to columns j to jj-1 + if( jp+jj-10 )call stdlib_scopy( nw, work31( 1, jj-j+1 ), 1,ab( kv+kl+1-jj+j, jj )& + , 1 ) + end do + end do loop_180 + end if + return + end subroutine stdlib_sgbtrf + + !> SGECON: estimates the reciprocal of the condition number of a general + !> real matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by SGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_sgecon( norm, n, a, lda, anorm, rcond, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, scale, sl, smlnum, su + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_sgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: bignum, rcmax, rcmin, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from SGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_sgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(sp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: c(*), r(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + ! Intrinsic Functions + intrinsic :: abs,max,min,log + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldazero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = 1, m + c( j ) = max( c( j ), abs( a( i, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_sgeequb + + !> DGEMLQT overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'T': Q**T C C Q**T + !> where Q is a real orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**T + !> generated using the compact WY representation as returned by SGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_sgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, mb, ldt + ! Array Arguments + real(sp), intent(in) :: v(ldv,*), t(ldt,*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( mb<1 .or. (mb>k .and. k>0)) then + info = -6 + else if( ldv SGEMQRT: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'T': Q**T C C Q**T + !> where Q is a real orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**T + !> generated using the compact WY representation as returned by SGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_sgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, nb, ldt + ! Array Arguments + real(sp), intent(in) :: v(ldv,*), t(ldt,*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( nb<1 .or. (nb>k .and. k>0)) then + info = -6 + else if( ldv SGESC2: solves a system of linear equations + !> A * X = scale* RHS + !> with a general N-by-N matrix A using the LU factorization with + !> complete pivoting computed by SGETC2. + + pure subroutine stdlib_sgesc2( n, a, lda, rhs, ipiv, jpiv, scale ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: rhs(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: bignum, eps, smlnum, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! set constant to control overflow + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! apply permutations ipiv to rhs + call stdlib_slaswp( 1, rhs, lda, 1, n-1, ipiv, 1 ) + ! solve for l part + do i = 1, n - 1 + do j = i + 1, n + rhs( j ) = rhs( j ) - a( j, i )*rhs( i ) + end do + end do + ! solve for u part + scale = one + ! check for scaling + i = stdlib_isamax( n, rhs, 1 ) + if( two*smlnum*abs( rhs( i ) )>abs( a( n, n ) ) ) then + temp = ( one / two ) / abs( rhs( i ) ) + call stdlib_sscal( n, temp, rhs( 1 ), 1 ) + scale = scale*temp + end if + do i = n, 1, -1 + temp = one / a( i, i ) + rhs( i ) = rhs( i )*temp + do j = i + 1, n + rhs( i ) = rhs( i ) - rhs( j )*( a( i, j )*temp ) + end do + end do + ! apply permutations jpiv to the solution (rhs) + call stdlib_slaswp( 1, rhs, lda, 1, n-1, jpiv, -1 ) + return + end subroutine stdlib_sgesc2 + + !> SGETC2: computes an LU factorization with complete pivoting of the + !> n-by-n matrix A. The factorization has the form A = P * L * U * Q, + !> where P and Q are permutation matrices, L is lower triangular with + !> unit diagonal elements and U is upper triangular. + !> This is the Level 2 BLAS algorithm. + + pure subroutine stdlib_sgetc2( n, a, lda, ipiv, jpiv, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*), jpiv(*) + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ip, ipv, j, jp, jpv + real(sp) :: bignum, eps, smin, smlnum, xmax + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! handle the case n=1 by itself + if( n==1 ) then + ipiv( 1 ) = 1 + jpiv( 1 ) = 1 + if( abs( a( 1, 1 ) )=xmax ) then + xmax = abs( a( ip, jp ) ) + ipv = ip + jpv = jp + end if + end do + end do + if( i==1 )smin = max( eps*xmax, smlnum ) + ! swap rows + if( ipv/=i )call stdlib_sswap( n, a( ipv, 1 ), lda, a( i, 1 ), lda ) + ipiv( i ) = ipv + ! swap columns + if( jpv/=i )call stdlib_sswap( n, a( 1, jpv ), 1, a( 1, i ), 1 ) + jpiv( i ) = jpv + ! check for singularity + if( abs( a( i, i ) ) SGETF2: computes an LU factorization of a general m-by-n matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_sgetf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: sfmin + integer(ilp) :: i, j, jp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_sscal( m-j, one / a( j, j ), a( j+1, j ), 1 ) + else + do i = 1, m-j + a( j+i, j ) = a( j+i, j ) / a( j, j ) + end do + end if + end if + else if( info==0 ) then + info = j + end if + if( j SGETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + + pure recursive subroutine stdlib_sgetrf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: sfmin, temp + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_sscal( m-1, one / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 1, m-1 + a( 1+i, 1 ) = a( 1+i, 1 ) / a( 1, 1 ) + end do + end if + else + info = 1 + end if + else + ! use recursive code + n1 = min( m, n ) / 2 + n2 = n-n1 + ! [ a11 ] + ! factor [ --- ] + ! [ a21 ] + call stdlib_sgetrf2( m, n1, a, lda, ipiv, iinfo ) + if ( info==0 .and. iinfo>0 )info = iinfo + ! [ a12 ] + ! apply interchanges to [ --- ] + ! [ a22 ] + call stdlib_slaswp( n2, a( 1, n1+1 ), lda, 1, n1, ipiv, 1 ) + ! solve a12 + call stdlib_strsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1, n1+1 ), lda ) + + ! update a22 + call stdlib_sgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, one, a( n1+1, n1+1 ), lda ) + ! factor a22 + call stdlib_sgetrf2( m-n1, n2, a( n1+1, n1+1 ), lda, ipiv( n1+1 ),iinfo ) + ! adjust info and the pivot indices + if ( info==0 .and. iinfo>0 )info = iinfo + n1 + do i = n1+1, min( m, n ) + ipiv( i ) = ipiv( i ) + n1 + end do + ! apply interchanges to a21 + call stdlib_slaswp( n1, a( 1, 1 ), lda, n1+1, min( m, n), ipiv, 1 ) + end if + return + end subroutine stdlib_sgetrf2 + + !> SGETRI: computes the inverse of a matrix using the LU factorization + !> computed by SGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + + pure subroutine stdlib_sgetri( n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iws, j, jb, jj, jp, ldwork, lwkopt, nb, nbmin, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'SGETRI', ' ', n, -1, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( lda 0 from stdlib_strtri, then u is singular, + ! and the inverse is not computed. + call stdlib_strtri( 'UPPER', 'NON-UNIT', n, a, lda, info ) + if( info>0 )return + nbmin = 2 + ldwork = n + if( nb>1 .and. nb=n ) then + ! use unblocked code. + do j = n, 1, -1 + ! copy current column of l to work and replace with zeros. + do i = j + 1, n + work( i ) = a( i, j ) + a( i, j ) = zero + end do + ! compute current column of inv(a). + if( j SGETRS: solves a system of linear equations + !> A * X = B or A**T * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by SGETRF. + + pure subroutine stdlib_sgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda SGGBAL: balances a pair of general real matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + + pure subroutine stdlib_sggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: lscale(*), rscale(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sclfac = 1.0e+1_sp + + + ! Local Scalars + integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, & + lrab, lsfmax, lsfmin, m, nr, nrp2 + real(sp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, & + pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc + ! Intrinsic Functions + intrinsic :: abs,int,log10,max,min,real,sign + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldacmax )cmax = abs( cor ) + lscale( i ) = lscale( i ) + cor + cor = alpha*work( i ) + if( abs( cor )>cmax )cmax = abs( cor ) + rscale( i ) = rscale( i ) + cor + end do + if( cmax SGGHRD: reduces a pair of real matrices (A,B) to generalized upper + !> Hessenberg form using orthogonal transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the orthogonal matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**T*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**T*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**T*x. + !> The orthogonal matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T + !> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T + !> If Q1 is the orthogonal matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then SGGHRD reduces the original + !> problem to generalized Hessenberg form. + + pure subroutine stdlib_sgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilq, ilz + integer(ilp) :: icompq, icompz, jcol, jrow + real(sp) :: c, s, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode compq + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + ! decode compz + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! test the input parameters. + info = 0 + if( icompq<=0 ) then + info = -1 + else if( icompz<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi SGTTRS: solves one of the systems of equations + !> A*X = B or A**T*X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by SGTTRF. + + pure subroutine stdlib_sgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: itrans, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + notran = ( trans=='N' .or. trans=='N' ) + if( .not.notran .and. .not.( trans=='T' .or. trans=='T' ) .and. .not.( trans=='C' .or. & + trans=='C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_sgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_sgtts2( itrans, n, jb, dl, d, du, du2, ipiv, b( 1, j ),ldb ) + end do + end if + end subroutine stdlib_sgttrs + + !> SISNAN: returns .TRUE. if its argument is NaN, and .FALSE. + !> otherwise. To be replaced by the Fortran 2003 intrinsic in the + !> future. + + pure logical(lk) function stdlib_sisnan( sin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: sin + ! ===================================================================== + ! Executable Statements + stdlib_sisnan = stdlib_slaisnan(sin,sin) + return + end function stdlib_sisnan + + !> SLA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_sla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, ldab, m, n, kl, ku, trans + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( kl<0 .or. kl>m-1 ) then + info = 4 + else if( ku<0 .or. ku>n-1 ) then + info = 5 + else if( ldab0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + kd = ku + 1 + ke = kl + 1 + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = abs( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_sla_gbamv + + !> SLA_GBRCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(sp) function stdlib_sla_gbrcond( trans, n, kl, ku, ab, ldab, afb, ldafb,ipiv, cmode, c, & + info, work, iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: n, ldab, ldafb, kl, ku, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: ab(ldab,*), afb(ldafb,*), c(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j, kd, ke + real(sp) :: ainvnm, tmp + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_sla_gbrcond = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame(trans, 'T').and. .not. stdlib_lsame(trans, & + 'C') ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 .or. kl>n-1 ) then + info = -3 + else if( ku<0 .or. ku>n-1 ) then + info = -4 + else if( ldab SLA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_sla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n, trans + ! Array Arguments + real(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' )) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, lenx + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, lenx + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = 1, lenx + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + jx = kx + do j = 1, lenx + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_sla_geamv + + !> SLA_GERCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(sp) function stdlib_sla_gercond( trans, n, a, lda, af, ldaf, ipiv,cmode, c, info, work, & + iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(sp) :: ainvnm, tmp + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_sla_gercond = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame(trans, 'T').and. .not. stdlib_lsame(trans, & + 'C') ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SLA_LIN_BERR: computes componentwise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the componentwise absolute value of the matrix + !> or vector Z. + + pure subroutine stdlib_sla_lin_berr( n, nz, nrhs, res, ayb, berr ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, nz, nrhs + ! Array Arguments + real(sp), intent(in) :: ayb(n,nrhs) + real(sp), intent(out) :: berr(nrhs) + real(sp), intent(in) :: res(n,nrhs) + ! ===================================================================== + ! Local Scalars + real(sp) :: tmp,safe1 + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! adding safe1 to the numerator guards against spuriously zero + ! residuals. a similar safeguard is in the sla_yyamv routine used + ! to compute ayb. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (nz+1)*safe1 + do j = 1, nrhs + berr(j) = zero + do i = 1, n + if (ayb(i,j) /= 0.0_sp) then + tmp = (safe1+abs(res(i,j)))/ayb(i,j) + berr(j) = max( berr(j), tmp ) + end if + ! if ayb is exactly 0.0_sp (and if computed by sla_yyamv), then we know + ! the true residual also must be exactly zero. + end do + end do + end subroutine stdlib_sla_lin_berr + + !> SLA_PORCOND: Estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(sp) function stdlib_sla_porcond( uplo, n, a, lda, af, ldaf, cmode, c,info, work, iwork ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + real(sp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(sp), intent(out) :: work(*) + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase, i, j + real(sp) :: ainvnm, tmp + logical(lk) :: up + ! Array Arguments + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_sla_porcond = zero + info = 0 + if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLA_PORCOND', -info ) + return + end if + if( n==0 ) then + stdlib_sla_porcond = one + return + end if + up = .false. + if ( stdlib_lsame( uplo, 'U' ) ) up = .true. + ! compute the equilibration matrix r such that + ! inv(r)*a*c has unit 1-norm. + if ( up ) then + do i = 1, n + tmp = zero + if ( cmode == 1 ) then + do j = 1, i + tmp = tmp + abs( a( j, i ) * c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) * c( j ) ) + end do + else if ( cmode == 0 ) then + do j = 1, i + tmp = tmp + abs( a( j, i ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) ) + end do + else + do j = 1, i + tmp = tmp + abs( a( j ,i ) / c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( i, j ) / c( j ) ) + end do + end if + work( 2*n+i ) = tmp + end do + else + do i = 1, n + tmp = zero + if ( cmode == 1 ) then + do j = 1, i + tmp = tmp + abs( a( i, j ) * c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) * c( j ) ) + end do + else if ( cmode == 0 ) then + do j = 1, i + tmp = tmp + abs( a( i, j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) ) + end do + else + do j = 1, i + tmp = tmp + abs( a( i, j ) / c( j ) ) + end do + do j = i+1, n + tmp = tmp + abs( a( j, i ) / c( j ) ) + end do + end if + work( 2*n+i ) = tmp + end do + endif + ! estimate the norm of inv(op(a)). + ainvnm = zero + kase = 0 + 10 continue + call stdlib_slacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==2 ) then + ! multiply by r. + do i = 1, n + work( i ) = work( i ) * work( 2*n+i ) + end do + if (up) then + call stdlib_spotrs( 'UPPER', n, 1, af, ldaf, work, n, info ) + else + call stdlib_spotrs( 'LOWER', n, 1, af, ldaf, work, n, info ) + endif + ! multiply by inv(c). + if ( cmode == 1 ) then + do i = 1, n + work( i ) = work( i ) / c( i ) + end do + else if ( cmode == -1 ) then + do i = 1, n + work( i ) = work( i ) * c( i ) + end do + end if + else + ! multiply by inv(c**t). + if ( cmode == 1 ) then + do i = 1, n + work( i ) = work( i ) / c( i ) + end do + else if ( cmode == -1 ) then + do i = 1, n + work( i ) = work( i ) * c( i ) + end do + end if + if ( up ) then + call stdlib_spotrs( 'UPPER', n, 1, af, ldaf, work, n, info ) + else + call stdlib_spotrs( 'LOWER', n, 1, af, ldaf, work, n, info ) + endif + ! multiply by r. + do i = 1, n + work( i ) = work( i ) * work( 2*n+i ) + end do + end if + go to 10 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm /= 0.0_sp )stdlib_sla_porcond = ( 1.0_sp / ainvnm ) + return + end function stdlib_sla_porcond + + !> SLA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_sla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n, uplo + ! Array Arguments + real(sp), intent(in) :: a(lda,*), x(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(sp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + ! Intrinsic Functions + intrinsic :: max,abs,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) ) then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_slamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + do j = i+1, n + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + do j = i+1, n + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = abs( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = abs( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*abs( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_sla_syamv + + !> SLA_SYRCOND: estimates the Skeel condition number of op(A) * op2(C) + !> where op2 is determined by CMODE as follows + !> CMODE = 1 op2(C) = C + !> CMODE = 0 op2(C) = I + !> CMODE = -1 op2(C) = inv(C) + !> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) + !> is computed by computing scaling factors R such that + !> diag(R)*A*op2(C) is row equilibrated and computing the standard + !> infinity-norm condition number. + + real(sp) function stdlib_sla_syrcond( uplo, n, a, lda, af, ldaf, ipiv, cmode,c, info, work, & + iwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, ldaf, cmode + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*), af(ldaf,*), c(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + character :: normin + integer(ilp) :: kase, i, j + real(sp) :: ainvnm, smlnum, tmp + logical(lk) :: up + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + stdlib_sla_syrcond = zero + info = 0 + if( n<0 ) then + info = -2 + else if( lda SLA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(sp) function stdlib_sla_syrpvgrw( uplo, n, info, a, lda, af, ldaf, ipiv,work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(sp), intent(in) :: a(lda,*), af(ldaf,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(sp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if ( upper ) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( abs( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( abs( a( i, j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( abs( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( abs( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_ssytrs. + ! calls to stdlib_sswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( abs( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( abs( af( i, k ) ), work( k ) ) + work( k-1 ) = max( abs( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( abs( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( abs( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( abs( af( i, k ) ), work( k ) ) + work( k+1 ) = max( abs( af(i, k+1 ) ), work( k+1 ) ) + end do + work( k ) = max( abs( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_sla_syrpvgrw = rpvgrw + end function stdlib_sla_syrpvgrw + + + pure subroutine stdlib_sladiv1( a, b, c, d, p, q ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(inout) :: a + real(sp), intent(in) :: b, c, d + real(sp), intent(out) :: p, q + ! ===================================================================== + + ! Local Scalars + real(sp) :: r, t + ! Executable Statements + r = d / c + t = one / (c + d * r) + p = stdlib_sladiv2(a, b, c, d, r, t) + a = -a + q = stdlib_sladiv2(b, a, c, d, r, t) + return + end subroutine stdlib_sladiv1 + + !> SLAED6: computes the positive or negative root (closest to the origin) + !> of + !> z(1) z(2) z(3) + !> f(x) = rho + --------- + ---------- + --------- + !> d(1)-x d(2)-x d(3)-x + !> It is assumed that + !> if ORGATI = .true. the root is between d(2) and d(3); + !> otherwise it is between d(1) and d(2) + !> This routine will be called by SLAED4 when necessary. In most cases, + !> the root sought is the smallest in magnitude, though it might not be + !> in some extremely rare situations. + + pure subroutine stdlib_slaed6( kniter, orgati, rho, d, z, finit, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: orgati + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kniter + real(sp), intent(in) :: finit, rho + real(sp), intent(out) :: tau + ! Array Arguments + real(sp), intent(in) :: d(3), z(3) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + + + ! Local Arrays + real(sp) :: dscale(3), zscale(3) + ! Local Scalars + logical(lk) :: scale + integer(ilp) :: i, iter, niter + real(sp) :: a, b, base, c, ddf, df, eps, erretm, eta, f, fc, sclfac, sclinv, small1, & + small2, sminv1, sminv2, temp, temp1, temp2, temp3, temp4, lbd, ubd + ! Intrinsic Functions + intrinsic :: abs,int,log,max,min,sqrt + ! Executable Statements + info = 0 + if( orgati ) then + lbd = d(2) + ubd = d(3) + else + lbd = d(1) + ubd = d(2) + end if + if( finit < zero )then + lbd = zero + else + ubd = zero + end if + niter = 1 + tau = zero + if( kniter==2 ) then + if( orgati ) then + temp = ( d( 3 )-d( 2 ) ) / two + c = rho + z( 1 ) / ( ( d( 1 )-d( 2 ) )-temp ) + a = c*( d( 2 )+d( 3 ) ) + z( 2 ) + z( 3 ) + b = c*d( 2 )*d( 3 ) + z( 2 )*d( 3 ) + z( 3 )*d( 2 ) + else + temp = ( d( 1 )-d( 2 ) ) / two + c = rho + z( 3 ) / ( ( d( 3 )-d( 2 ) )-temp ) + a = c*( d( 1 )+d( 2 ) ) + z( 1 ) + z( 2 ) + b = c*d( 1 )*d( 2 ) + z( 1 )*d( 2 ) + z( 2 )*d( 1 ) + end if + temp = max( abs( a ), abs( b ), abs( c ) ) + a = a / temp + b = b / temp + c = c / temp + if( c==zero ) then + tau = b / a + else if( a<=zero ) then + tau = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + tau = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + if( tau < lbd .or. tau > ubd )tau = ( lbd+ubd )/two + if( d(1)==tau .or. d(2)==tau .or. d(3)==tau ) then + tau = zero + else + temp = finit + tau*z(1)/( d(1)*( d( 1 )-tau ) ) +tau*z(2)/( d(2)*( d( 2 )-tau ) )& + +tau*z(3)/( d(3)*( d( 3 )-tau ) ) + if( temp <= zero )then + lbd = tau + else + ubd = tau + end if + if( abs( finit )<=abs( temp ) )tau = zero + end if + end if + ! get machine parameters for possible scaling to avoid overflow + ! modified by sven: parameters small1, sminv1, small2, + ! sminv2, eps are not saved anymore between one call to the + ! others but recomputed at each call + eps = stdlib_slamch( 'EPSILON' ) + base = stdlib_slamch( 'BASE' ) + small1 = base**( int( log( stdlib_slamch( 'SAFMIN' ) ) / log( base ) /three,KIND=ilp) ) + + sminv1 = one / small1 + small2 = small1*small1 + sminv2 = sminv1*sminv1 + ! determine if scaling of inputs necessary to avoid overflow + ! when computing 1/temp**3 + if( orgati ) then + temp = min( abs( d( 2 )-tau ), abs( d( 3 )-tau ) ) + else + temp = min( abs( d( 1 )-tau ), abs( d( 2 )-tau ) ) + end if + scale = .false. + if( temp<=small1 ) then + scale = .true. + if( temp<=small2 ) then + ! scale up by power of radix nearest 1/safmin**(2/3) + sclfac = sminv2 + sclinv = small2 + else + ! scale up by power of radix nearest 1/safmin**(1/3) + sclfac = sminv1 + sclinv = small1 + end if + ! scaling up safe because d, z, tau scaled elsewhere to be o(1) + do i = 1, 3 + dscale( i ) = d( i )*sclfac + zscale( i ) = z( i )*sclfac + end do + tau = tau*sclfac + lbd = lbd*sclfac + ubd = ubd*sclfac + else + ! copy d and z to dscale and zscale + do i = 1, 3 + dscale( i ) = d( i ) + zscale( i ) = z( i ) + end do + end if + fc = zero + df = zero + ddf = zero + do i = 1, 3 + temp = one / ( dscale( i )-tau ) + temp1 = zscale( i )*temp + temp2 = temp1*temp + temp3 = temp2*temp + fc = fc + temp1 / dscale( i ) + df = df + temp2 + ddf = ddf + temp3 + end do + f = finit + tau*fc + if( abs( f )<=zero )go to 60 + if( f <= zero )then + lbd = tau + else + ubd = tau + end if + ! iteration begins -- use gragg-thornton-warner cubic convergent + ! scheme + ! it is not hard to see that + ! 1) iterations will go up monotonically + ! if finit < 0; + ! 2) iterations will go down monotonically + ! if finit > 0. + iter = niter + 1 + loop_50: do niter = iter, maxit + if( orgati ) then + temp1 = dscale( 2 ) - tau + temp2 = dscale( 3 ) - tau + else + temp1 = dscale( 1 ) - tau + temp2 = dscale( 2 ) - tau + end if + a = ( temp1+temp2 )*f - temp1*temp2*df + b = temp1*temp2*f + c = f - ( temp1+temp2 )*df + temp1*temp2*ddf + temp = max( abs( a ), abs( b ), abs( c ) ) + a = a / temp + b = b / temp + c = c / temp + if( c==zero ) then + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + if( f*eta>=zero ) then + eta = -f / df + end if + tau = tau + eta + if( tau < lbd .or. tau > ubd )tau = ( lbd + ubd )/two + fc = zero + erretm = zero + df = zero + ddf = zero + do i = 1, 3 + if ( ( dscale( i )-tau )/=zero ) then + temp = one / ( dscale( i )-tau ) + temp1 = zscale( i )*temp + temp2 = temp1*temp + temp3 = temp2*temp + temp4 = temp1 / dscale( i ) + fc = fc + temp4 + erretm = erretm + abs( temp4 ) + df = df + temp2 + ddf = ddf + temp3 + else + go to 60 + end if + end do + f = finit + tau*fc + erretm = eight*( abs( finit )+abs( tau )*erretm ) +abs( tau )*df + if( ( abs( f )<=four*eps*erretm ) .or.( (ubd-lbd)<=four*eps*abs(tau) ) )go to & + 60 + if( f <= zero )then + lbd = tau + else + ubd = tau + end if + end do loop_50 + info = 1 + 60 continue + ! undo scaling + if( scale )tau = tau*sclinv + return + end subroutine stdlib_slaed6 + + !> SLAGS2: computes 2-by-2 orthogonal matrices U, V and Q, such + !> that if ( UPPER ) then + !> U**T *A*Q = U**T *( A1 A2 )*Q = ( x 0 ) + !> ( 0 A3 ) ( x x ) + !> and + !> V**T*B*Q = V**T *( B1 B2 )*Q = ( x 0 ) + !> ( 0 B3 ) ( x x ) + !> or if ( .NOT.UPPER ) then + !> U**T *A*Q = U**T *( A1 0 )*Q = ( x x ) + !> ( A2 A3 ) ( 0 x ) + !> and + !> V**T*B*Q = V**T*( B1 0 )*Q = ( x x ) + !> ( B2 B3 ) ( 0 x ) + !> The rows of the transformed A and B are parallel, where + !> U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) + !> ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) + !> Z**T denotes the transpose of Z. + + pure subroutine stdlib_slags2( upper, a1, a2, a3, b1, b2, b3, csu, snu, csv,snv, csq, snq ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: upper + real(sp), intent(in) :: a1, a2, a3, b1, b2, b3 + real(sp), intent(out) :: csq, csu, csv, snq, snu, snv + ! ===================================================================== + + ! Local Scalars + real(sp) :: a, aua11, aua12, aua21, aua22, avb11, avb12, avb21, avb22, csl, csr, d, s1,& + s2, snl, snr, ua11r, ua22r, vb11r, vb22r, b, c, r, ua11, ua12, ua21, ua22, vb11, vb12,& + vb21, vb22 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + if( upper ) then + ! input matrices a and b are upper triangular matrices + ! form matrix c = a*adj(b) = ( a b ) + ! ( 0 d ) + a = a1*b3 + d = a3*b1 + b = a2*b1 - a1*b2 + ! the svd of real 2-by-2 triangular c + ! ( csl -snl )*( a b )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( 0 d ) ( -snr csr ) ( 0 t ) + call stdlib_slasv2( a, b, d, s1, s2, snr, csr, snl, csl ) + if( abs( csl )>=abs( snl ) .or. abs( csr )>=abs( snr ) )then + ! compute the (1,1) and (1,2) elements of u**t *a and v**t *b, + ! and (1,2) element of |u|**t *|a| and |v|**t *|b|. + ua11r = csl*a1 + ua12 = csl*a2 + snl*a3 + vb11r = csr*b1 + vb12 = csr*b2 + snr*b3 + aua12 = abs( csl )*abs( a2 ) + abs( snl )*abs( a3 ) + avb12 = abs( csr )*abs( b2 ) + abs( snr )*abs( b3 ) + ! zero (1,2) elements of u**t *a and v**t *b + if( ( abs( ua11r )+abs( ua12 ) )/=zero ) then + if( aua12 / ( abs( ua11r )+abs( ua12 ) )<=avb12 /( abs( vb11r )+abs( vb12 ) ) & + ) then + call stdlib_slartg( -ua11r, ua12, csq, snq, r ) + else + call stdlib_slartg( -vb11r, vb12, csq, snq, r ) + end if + else + call stdlib_slartg( -vb11r, vb12, csq, snq, r ) + end if + csu = csl + snu = -snl + csv = csr + snv = -snr + else + ! compute the (2,1) and (2,2) elements of u**t *a and v**t *b, + ! and (2,2) element of |u|**t *|a| and |v|**t *|b|. + ua21 = -snl*a1 + ua22 = -snl*a2 + csl*a3 + vb21 = -snr*b1 + vb22 = -snr*b2 + csr*b3 + aua22 = abs( snl )*abs( a2 ) + abs( csl )*abs( a3 ) + avb22 = abs( snr )*abs( b2 ) + abs( csr )*abs( b3 ) + ! zero (2,2) elements of u**t*a and v**t*b, and then swap. + if( ( abs( ua21 )+abs( ua22 ) )/=zero ) then + if( aua22 / ( abs( ua21 )+abs( ua22 ) )<=avb22 /( abs( vb21 )+abs( vb22 ) ) ) & + then + call stdlib_slartg( -ua21, ua22, csq, snq, r ) + else + call stdlib_slartg( -vb21, vb22, csq, snq, r ) + end if + else + call stdlib_slartg( -vb21, vb22, csq, snq, r ) + end if + csu = snl + snu = csl + csv = snr + snv = csr + end if + else + ! input matrices a and b are lower triangular matrices + ! form matrix c = a*adj(b) = ( a 0 ) + ! ( c d ) + a = a1*b3 + d = a3*b1 + c = a2*b3 - a3*b2 + ! the svd of real 2-by-2 triangular c + ! ( csl -snl )*( a 0 )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( c d ) ( -snr csr ) ( 0 t ) + call stdlib_slasv2( a, c, d, s1, s2, snr, csr, snl, csl ) + if( abs( csr )>=abs( snr ) .or. abs( csl )>=abs( snl ) )then + ! compute the (2,1) and (2,2) elements of u**t *a and v**t *b, + ! and (2,1) element of |u|**t *|a| and |v|**t *|b|. + ua21 = -snr*a1 + csr*a2 + ua22r = csr*a3 + vb21 = -snl*b1 + csl*b2 + vb22r = csl*b3 + aua21 = abs( snr )*abs( a1 ) + abs( csr )*abs( a2 ) + avb21 = abs( snl )*abs( b1 ) + abs( csl )*abs( b2 ) + ! zero (2,1) elements of u**t *a and v**t *b. + if( ( abs( ua21 )+abs( ua22r ) )/=zero ) then + if( aua21 / ( abs( ua21 )+abs( ua22r ) )<=avb21 /( abs( vb21 )+abs( vb22r ) ) & + ) then + call stdlib_slartg( ua22r, ua21, csq, snq, r ) + else + call stdlib_slartg( vb22r, vb21, csq, snq, r ) + end if + else + call stdlib_slartg( vb22r, vb21, csq, snq, r ) + end if + csu = csr + snu = -snr + csv = csl + snv = -snl + else + ! compute the (1,1) and (1,2) elements of u**t *a and v**t *b, + ! and (1,1) element of |u|**t *|a| and |v|**t *|b|. + ua11 = csr*a1 + snr*a2 + ua12 = snr*a3 + vb11 = csl*b1 + snl*b2 + vb12 = snl*b3 + aua11 = abs( csr )*abs( a1 ) + abs( snr )*abs( a2 ) + avb11 = abs( csl )*abs( b1 ) + abs( snl )*abs( b2 ) + ! zero (1,1) elements of u**t*a and v**t*b, and then swap. + if( ( abs( ua11 )+abs( ua12 ) )/=zero ) then + if( aua11 / ( abs( ua11 )+abs( ua12 ) )<=avb11 /( abs( vb11 )+abs( vb12 ) ) ) & + then + call stdlib_slartg( ua12, ua11, csq, snq, r ) + else + call stdlib_slartg( vb12, vb11, csq, snq, r ) + end if + else + call stdlib_slartg( vb12, vb11, csq, snq, r ) + end if + csu = snr + snu = csr + csv = snl + snv = csl + end if + end if + return + end subroutine stdlib_slags2 + + !> SLAGTF: factorizes the matrix (T - lambda*I), where T is an n by n + !> tridiagonal matrix and lambda is a scalar, as + !> T - lambda*I = PLU, + !> where P is a permutation matrix, L is a unit lower tridiagonal matrix + !> with at most one non-zero sub-diagonal elements per column and U is + !> an upper triangular matrix with at most two non-zero super-diagonal + !> elements per column. + !> The factorization is obtained by Gaussian elimination with partial + !> pivoting and implicit row scaling. + !> The parameter LAMBDA is included in the routine so that SLAGTF may + !> be used, in conjunction with SLAGTS, to obtain eigenvectors of T by + !> inverse iteration. + + pure subroutine stdlib_slagtf( n, a, lambda, b, c, tol, d, in, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: lambda, tol + ! Array Arguments + integer(ilp), intent(out) :: in(*) + real(sp), intent(inout) :: a(*), b(*), c(*) + real(sp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: k + real(sp) :: eps, mult, piv1, piv2, scale1, scale2, temp, tl + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'SLAGTF', -info ) + return + end if + if( n==0 )return + a( 1 ) = a( 1 ) - lambda + in( n ) = 0 + if( n==1 ) then + if( a( 1 )==zero )in( 1 ) = 1 + return + end if + eps = stdlib_slamch( 'EPSILON' ) + tl = max( tol, eps ) + scale1 = abs( a( 1 ) ) + abs( b( 1 ) ) + loop_10: do k = 1, n - 1 + a( k+1 ) = a( k+1 ) - lambda + scale2 = abs( c( k ) ) + abs( a( k+1 ) ) + if( k<( n-1 ) )scale2 = scale2 + abs( b( k+1 ) ) + if( a( k )==zero ) then + piv1 = zero + else + piv1 = abs( a( k ) ) / scale1 + end if + if( c( k )==zero ) then + in( k ) = 0 + piv2 = zero + scale1 = scale2 + if( k<( n-1 ) )d( k ) = zero + else + piv2 = abs( c( k ) ) / scale2 + if( piv2<=piv1 ) then + in( k ) = 0 + scale1 = scale2 + c( k ) = c( k ) / a( k ) + a( k+1 ) = a( k+1 ) - c( k )*b( k ) + if( k<( n-1 ) )d( k ) = zero + else + in( k ) = 1 + mult = a( k ) / c( k ) + a( k ) = c( k ) + temp = a( k+1 ) + a( k+1 ) = b( k ) - mult*temp + if( k<( n-1 ) ) then + d( k ) = b( k+1 ) + b( k+1 ) = -mult*d( k ) + end if + b( k ) = temp + c( k ) = mult + end if + end if + if( ( max( piv1, piv2 )<=tl ) .and. ( in( n )==0 ) )in( n ) = k + end do loop_10 + if( ( abs( a( n ) )<=scale1*tl ) .and. ( in( n )==0 ) )in( n ) = n + return + end subroutine stdlib_slagtf + + !> SLAGTS: may be used to solve one of the systems of equations + !> (T - lambda*I)*x = y or (T - lambda*I)**T*x = y, + !> where T is an n by n tridiagonal matrix, for x, following the + !> factorization of (T - lambda*I) as + !> (T - lambda*I) = P*L*U , + !> by routine SLAGTF. The choice of equation to be solved is + !> controlled by the argument JOB, and in each case there is an option + !> to perturb zero or very small diagonal elements of U, this option + !> being intended for use in applications such as inverse iteration. + + pure subroutine stdlib_slagts( job, n, a, b, c, d, in, y, tol, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: job, n + real(sp), intent(inout) :: tol + ! Array Arguments + integer(ilp), intent(in) :: in(*) + real(sp), intent(in) :: a(*), b(*), c(*), d(*) + real(sp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: k + real(sp) :: absak, ak, bignum, eps, pert, sfmin, temp + ! Intrinsic Functions + intrinsic :: abs,max,sign + ! Executable Statements + info = 0 + if( ( abs( job )>2 ) .or. ( job==0 ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLAGTS', -info ) + return + end if + if( n==0 )return + eps = stdlib_slamch( 'EPSILON' ) + sfmin = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / sfmin + if( job<0 ) then + if( tol<=zero ) then + tol = abs( a( 1 ) ) + if( n>1 )tol = max( tol, abs( a( 2 ) ), abs( b( 1 ) ) ) + do k = 3, n + tol = max( tol, abs( a( k ) ), abs( b( k-1 ) ),abs( d( k-2 ) ) ) + end do + tol = tol*eps + if( tol==zero )tol = eps + end if + end if + if( abs( job )==1 ) then + do k = 2, n + if( in( k-1 )==0 ) then + y( k ) = y( k ) - c( k-1 )*y( k-1 ) + else + temp = y( k-1 ) + y( k-1 ) = y( k ) + y( k ) = temp - c( k-1 )*y( k ) + end if + end do + if( job==1 ) then + loop_30: do k = n, 1, -1 + if( k<=n-2 ) then + temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 ) + else if( k==n-1 ) then + temp = y( k ) - b( k )*y( k+1 ) + else + temp = y( k ) + end if + ak = a( k ) + absak = abs( ak ) + if( absakabsak )then + info = k + return + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + info = k + return + end if + end if + y( k ) = temp / ak + end do loop_30 + else + loop_50: do k = n, 1, -1 + if( k<=n-2 ) then + temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 ) + else if( k==n-1 ) then + temp = y( k ) - b( k )*y( k+1 ) + else + temp = y( k ) + end if + ak = a( k ) + pert = sign( tol, ak ) + 40 continue + absak = abs( ak ) + if( absakabsak )then + ak = ak + pert + pert = 2*pert + go to 40 + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + ak = ak + pert + pert = 2*pert + go to 40 + end if + end if + y( k ) = temp / ak + end do loop_50 + end if + else + ! come to here if job = 2 or -2 + if( job==2 ) then + loop_60: do k = 1, n + if( k>=3 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 ) + else if( k==2 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) + else + temp = y( k ) + end if + ak = a( k ) + absak = abs( ak ) + if( absakabsak )then + info = k + return + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + info = k + return + end if + end if + y( k ) = temp / ak + end do loop_60 + else + loop_80: do k = 1, n + if( k>=3 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 ) + else if( k==2 ) then + temp = y( k ) - b( k-1 )*y( k-1 ) + else + temp = y( k ) + end if + ak = a( k ) + pert = sign( tol, ak ) + 70 continue + absak = abs( ak ) + if( absakabsak )then + ak = ak + pert + pert = 2*pert + go to 70 + else + temp = temp*bignum + ak = ak*bignum + end if + else if( abs( temp )>absak*bignum ) then + ak = ak + pert + pert = 2*pert + go to 70 + end if + end if + y( k ) = temp / ak + end do loop_80 + end if + do k = n, 2, -1 + if( in( k-1 )==0 ) then + y( k-1 ) = y( k-1 ) - c( k-1 )*y( k ) + else + temp = y( k-1 ) + y( k-1 ) = y( k ) + y( k ) = temp - c( k-1 )*y( k ) + end if + end do + end if + end subroutine stdlib_slagts + + !> SLAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then SLAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**T gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**T and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ alpha ] + !> [ gamma ] + !> where alpha = x**T*w. + + pure subroutine stdlib_slaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: j, job + real(sp), intent(out) :: c, s, sestpr + real(sp), intent(in) :: gamma, sest + ! Array Arguments + real(sp), intent(in) :: w(j), x(j) + ! ===================================================================== + + + ! Local Scalars + real(sp) :: absalp, absest, absgam, alpha, b, cosine, eps, norma, s1, s2, sine, t, & + test, tmp, zeta1, zeta2 + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + eps = stdlib_slamch( 'EPSILON' ) + alpha = stdlib_sdot( j, x, 1, w, 1 ) + absalp = abs( alpha ) + absgam = abs( gamma ) + absest = abs( sest ) + if( job==1 ) then + ! estimating largest singular value + ! special cases + if( sest==zero ) then + s1 = max( absgam, absalp ) + if( s1==zero ) then + s = zero + c = one + sestpr = zero + else + s = alpha / s1 + c = gamma / s1 + tmp = sqrt( s*s+c*c ) + s = s / tmp + c = c / tmp + sestpr = s1*tmp + end if + return + else if( absgam<=eps*absest ) then + s = one + c = zero + tmp = max( absest, absalp ) + s1 = absest / tmp + s2 = absalp / tmp + sestpr = tmp*sqrt( s1*s1+s2*s2 ) + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = one + c = zero + sestpr = s2 + else + s = zero + c = one + sestpr = s1 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + s = sqrt( one+tmp*tmp ) + sestpr = s2*s + c = ( gamma / s2 ) / s + s = sign( one, alpha ) / s + else + tmp = s2 / s1 + c = sqrt( one+tmp*tmp ) + sestpr = s1*c + s = ( alpha / s1 ) / c + c = sign( one, gamma ) / c + end if + return + else + ! normal case + zeta1 = alpha / absest + zeta2 = gamma / absest + b = ( one-zeta1*zeta1-zeta2*zeta2 )*half + c = zeta1*zeta1 + if( b>zero ) then + t = c / ( b+sqrt( b*b+c ) ) + else + t = sqrt( b*b+c ) - b + end if + sine = -zeta1 / t + cosine = -zeta2 / ( one+t ) + tmp = sqrt( sine*sine+cosine*cosine ) + s = sine / tmp + c = cosine / tmp + sestpr = sqrt( t+one )*absest + return + end if + else if( job==2 ) then + ! estimating smallest singular value + ! special cases + if( sest==zero ) then + sestpr = zero + if( max( absgam, absalp )==zero ) then + sine = one + cosine = zero + else + sine = -gamma + cosine = alpha + end if + s1 = max( abs( sine ), abs( cosine ) ) + s = sine / s1 + c = cosine / s1 + tmp = sqrt( s*s+c*c ) + s = s / tmp + c = c / tmp + return + else if( absgam<=eps*absest ) then + s = zero + c = one + sestpr = absgam + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = zero + c = one + sestpr = s1 + else + s = one + c = zero + sestpr = s2 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + c = sqrt( one+tmp*tmp ) + sestpr = absest*( tmp / c ) + s = -( gamma / s2 ) / c + c = sign( one, alpha ) / c + else + tmp = s2 / s1 + s = sqrt( one+tmp*tmp ) + sestpr = absest / s + c = ( alpha / s1 ) / s + s = -sign( one, gamma ) / s + end if + return + else + ! normal case + zeta1 = alpha / absest + zeta2 = gamma / absest + norma = max( one+zeta1*zeta1+abs( zeta1*zeta2 ),abs( zeta1*zeta2 )+zeta2*zeta2 ) + + ! see if root is closer to zero or to one + test = one + two*( zeta1-zeta2 )*( zeta1+zeta2 ) + if( test>=zero ) then + ! root is close to zero, compute directly + b = ( zeta1*zeta1+zeta2*zeta2+one )*half + c = zeta2*zeta2 + t = c / ( b+sqrt( abs( b*b-c ) ) ) + sine = zeta1 / ( one-t ) + cosine = -zeta2 / t + sestpr = sqrt( t+four*eps*eps*norma )*absest + else + ! root is closer to one, shift by that amount + b = ( zeta2*zeta2+zeta1*zeta1-one )*half + c = zeta1*zeta1 + if( b>=zero ) then + t = -c / ( b+sqrt( b*b+c ) ) + else + t = b - sqrt( b*b+c ) + end if + sine = -zeta1 / t + cosine = -zeta2 / ( one+t ) + sestpr = sqrt( one+t+four*eps*eps*norma )*absest + end if + tmp = sqrt( sine*sine+cosine*cosine ) + s = sine / tmp + c = cosine / tmp + return + end if + end if + return + end subroutine stdlib_slaic1 + + !> SLANEG: computes the Sturm count, the number of negative pivots + !> encountered while factoring tridiagonal T - sigma I = L D L^T. + !> This implementation works directly on the factors without forming + !> the tridiagonal matrix T. The Sturm count is also the number of + !> eigenvalues of T less than sigma. + !> This routine is called from SLARRB. + !> The current routine does not use the PIVMIN parameter but rather + !> requires IEEE-754 propagation of Infinities and NaNs. This + !> routine also has no input range restrictions but does require + !> default exception handling such that x/0 produces Inf when x is + !> non-zero, and Inf/Inf produces NaN. For more information, see: + !> Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in + !> Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on + !> Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 + !> (Tech report version in LAWN 172 with the same title.) + + pure integer(ilp) function stdlib_slaneg( n, d, lld, sigma, pivmin, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, r + real(sp), intent(in) :: pivmin, sigma + ! Array Arguments + real(sp), intent(in) :: d(*), lld(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: blklen = 128 + + ! some architectures propagate infinities and nans very slowly, so + ! the code computes counts in blklen chunks. then a nan can + ! propagate at most blklen columns before being detected. this is + ! not a general tuning parameter; it needs only to be just large + ! enough that the overhead is tiny in common cases. + + ! Local Scalars + integer(ilp) :: bj, j, neg1, neg2, negcnt + real(sp) :: bsav, dminus, dplus, gamma, p, t, tmp + logical(lk) :: sawnan + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + negcnt = 0 + ! i) upper part: l d l^t - sigma i = l+ d+ l+^t + t = -sigma + loop_210: do bj = 1, r-1, blklen + neg1 = 0 + bsav = t + do j = bj, min(bj+blklen-1, r-1) + dplus = d( j ) + t + if( dplus SLANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + + real(sp) function stdlib_slangb( norm, n, kl, ku, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: kl, ku, ldab, n + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k, l + real(sp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + temp = abs( ab( i, j ) ) + if( value SLANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real matrix A. + + real(sp) function stdlib_slange( norm, m, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, m + temp = abs( a( i, j ) ) + if( value SLANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real tridiagonal matrix A. + + pure real(sp) function stdlib_slangt( norm, n, dl, d, du ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: anorm, scale, sum, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + if( anorm1 ) then + call stdlib_slassq( n-1, dl, 1, scale, sum ) + call stdlib_slassq( n-1, du, 1, scale, sum ) + end if + anorm = scale*sqrt( sum ) + end if + stdlib_slangt = anorm + return + end function stdlib_slangt + + !> SLANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + + real(sp) function stdlib_slanhs( norm, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, min( n, j+1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + do j = 1, n + sum = zero + do i = 1, min( n, j+1 ) + sum = sum + abs( a( i, j ) ) + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, min( n, j+1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + do j = 1, n + call stdlib_slassq( min( n, j+1 ), a( 1, j ), 1, scale, sum ) + end do + value = scale*sqrt( sum ) + end if + stdlib_slanhs = value + return + end function stdlib_slanhs + + !> SLANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + + real(sp) function stdlib_slansb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( ab( k+1, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ab( 1, j ) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_slassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_slassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + call stdlib_slassq( n, ab( l, 1 ), ldab, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_slansb = value + return + end function stdlib_slansb + + !> SLANSF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A in RFP format. + + real(sp) function stdlib_slansf( norm, transr, uplo, n, a, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, transr, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: a(0:*) + real(sp), intent(out) :: work(0:*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, ifm, ilu, noe, n1, k, l, lda + real(sp) :: scale, s, value, aa, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + stdlib_slansf = zero + return + else if( n==1 ) then + stdlib_slansf = abs( a(0) ) + return + end if + ! set noe = 1 if n is odd. if n is even set noe=0 + noe = 1 + if( mod( n, 2 )==0 )noe = 0 + ! set ifm = 0 when form='t or 't' and 1 otherwise + ifm = 1 + if( stdlib_lsame( transr, 'T' ) )ifm = 0 + ! set ilu = 0 when uplo='u or 'u' and 1 otherwise + ilu = 1 + if( stdlib_lsame( uplo, 'U' ) )ilu = 0 + ! set lda = (n+1)/2 when ifm = 0 + ! set lda = n when ifm = 1 and noe = 1 + ! set lda = n+1 when ifm = 1 and noe = 0 + if( ifm==1 ) then + if( noe==1 ) then + lda = n + else + ! noe=0 + lda = n + 1 + end if + else + ! ifm=0 + lda = ( n+1 ) / 2 + end if + if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = ( n+1 ) / 2 + value = zero + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is n by k + do j = 0, k - 1 + do i = 0, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + else + ! xpose case; a is k by n + do j = 0, n - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + end if + else + ! n is even + if( ifm==1 ) then + ! a is n+1 by k + do j = 0, k - 1 + do i = 0, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + else + ! xpose case; a is k by n+1 + do j = 0, n + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + if( ifm==1 ) then + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + if( i==k+k )go to 10 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + 10 continue + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu = 1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + if( j>0 ) then + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + end if + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu = 1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + end if + else + ! ifm=0 + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + n1 = k + ! n/2 + k = k + 1 + ! k is the row size and lda + do i = n1, n - 1 + work( i ) = zero + end do + do j = 0, n1 - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,n1+i) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=n1=k-1 is special + s = abs( a( 0+j*lda ) ) + ! a(k-1,k-1) + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,i+n1) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k, n - 1 + s = zero + do i = 0, j - k - 1 + aa = abs( a( i+j*lda ) ) + ! a(i,j-k) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-k + aa = abs( a( i+j*lda ) ) + ! a(j-k,j-k) + s = s + aa + work( j-k ) = work( j-k ) + s + i = i + 1 + s = abs( a( i+j*lda ) ) + ! a(j,j) + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu=1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 2 + ! process + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( a( i+j*lda ) ) + ! i=j so process of a(j,j) + s = s + aa + work( j ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( a( i+j*lda ) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k-1 is special :process col a(k-1,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k, n - 1 + ! process col j of a = a(j,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i+k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=k + aa = abs( a( 0+j*lda ) ) + ! a(k,k) + s = aa + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k,k+i) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k + 1, n - 1 + s = zero + do i = 0, j - 2 - k + aa = abs( a( i+j*lda ) ) + ! a(i,j-k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-1-k + aa = abs( a( i+j*lda ) ) + ! a(j-k-1,j-k-1) + s = s + aa + work( j-k-1 ) = work( j-k-1 ) + s + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,j) + s = aa + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + ! j=n + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(i,k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = work( i ) + s + value = work ( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + else + ! ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + ! j=0 is special :process col a(k:n-1,k) + s = abs( a( 0 ) ) + ! a(k,k) + do i = 1, k - 1 + aa = abs( a( i ) ) + ! a(k+i,k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( k ) = work( k ) + s + do j = 1, k - 1 + ! process + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( a( i+j*lda ) ) + ! i=j-1 so process of a(j-1,j-1) + s = s + aa + work( j-1 ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( a( i+j*lda ) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k is special :process col a(k,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k + 1, n + ! process col j-1 of a = a(j-1,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j-1 ) = work( j-1 ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_sisnan( temp ) )value = temp + end do + end if + end if + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + k = ( n+1 ) / 2 + scale = zero + s = one + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 3 + call stdlib_slassq( k-j-2, a( k+j+1+j*lda ), 1, scale, s ) + ! l at a(k,0) + end do + do j = 0, k - 1 + call stdlib_slassq( k+j-1, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k-1, a( k ), lda+1, scale, s ) + ! tri l at a(k,0) + call stdlib_slassq( k, a( k-1 ), lda+1, scale, s ) + ! tri u at a(k-1,0) + else + ! ilu=1 + do j = 0, k - 1 + call stdlib_slassq( n-j-1, a( j+1+j*lda ), 1, scale, s ) + ! trap l at a(0,0) + end do + do j = 0, k - 2 + call stdlib_slassq( j, a( 0+( 1+j )*lda ), 1, scale, s ) + ! u at a(0,1) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k, a( 0 ), lda+1, scale, s ) + ! tri l at a(0,0) + call stdlib_slassq( k-1, a( 0+lda ), lda+1, scale, s ) + ! tri u at a(0,1) + end if + else + ! a is xpose + if( ilu==0 ) then + ! a**t is upper + do j = 1, k - 2 + call stdlib_slassq( j, a( 0+( k+j )*lda ), 1, scale, s ) + ! u at a(0,k) + end do + do j = 0, k - 2 + call stdlib_slassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k-1 rect. at a(0,0) + end do + do j = 0, k - 2 + call stdlib_slassq( k-j-1, a( j+1+( j+k-1 )*lda ), 1,scale, s ) + ! l at a(0,k-1) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k-1, a( 0+k*lda ), lda+1, scale, s ) + ! tri u at a(0,k) + call stdlib_slassq( k, a( 0+( k-1 )*lda ), lda+1, scale, s ) + ! tri l at a(0,k-1) + else + ! a**t is lower + do j = 1, k - 1 + call stdlib_slassq( j, a( 0+j*lda ), 1, scale, s ) + ! u at a(0,0) + end do + do j = k, n - 1 + call stdlib_slassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k-1 rect. at a(0,k) + end do + do j = 0, k - 3 + call stdlib_slassq( k-j-2, a( j+2+j*lda ), 1, scale, s ) + ! l at a(1,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + call stdlib_slassq( k-1, a( 1 ), lda+1, scale, s ) + ! tri l at a(1,0) + end if + end if + else + ! n is even + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 2 + call stdlib_slassq( k-j-1, a( k+j+2+j*lda ), 1, scale, s ) + ! l at a(k+1,0) + end do + do j = 0, k - 1 + call stdlib_slassq( k+j, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k, a( k+1 ), lda+1, scale, s ) + ! tri l at a(k+1,0) + call stdlib_slassq( k, a( k ), lda+1, scale, s ) + ! tri u at a(k,0) + else + ! ilu=1 + do j = 0, k - 1 + call stdlib_slassq( n-j-1, a( j+2+j*lda ), 1, scale, s ) + ! trap l at a(1,0) + end do + do j = 1, k - 1 + call stdlib_slassq( j, a( 0+j*lda ), 1, scale, s ) + ! u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k, a( 1 ), lda+1, scale, s ) + ! tri l at a(1,0) + call stdlib_slassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + end if + else + ! a is xpose + if( ilu==0 ) then + ! a**t is upper + do j = 1, k - 1 + call stdlib_slassq( j, a( 0+( k+1+j )*lda ), 1, scale, s ) + ! u at a(0,k+1) + end do + do j = 0, k - 1 + call stdlib_slassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k rect. at a(0,0) + end do + do j = 0, k - 2 + call stdlib_slassq( k-j-1, a( j+1+( j+k )*lda ), 1, scale,s ) + ! l at a(0,k) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k, a( 0+( k+1 )*lda ), lda+1, scale, s ) + ! tri u at a(0,k+1) + call stdlib_slassq( k, a( 0+k*lda ), lda+1, scale, s ) + ! tri l at a(0,k) + else + ! a**t is lower + do j = 1, k - 1 + call stdlib_slassq( j, a( 0+( j+1 )*lda ), 1, scale, s ) + ! u at a(0,1) + end do + do j = k + 1, n + call stdlib_slassq( k, a( 0+j*lda ), 1, scale, s ) + ! k by k rect. at a(0,k+1) + end do + do j = 0, k - 2 + call stdlib_slassq( k-j-1, a( j+1+j*lda ), 1, scale, s ) + ! l at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + call stdlib_slassq( k, a( lda ), lda+1, scale, s ) + ! tri l at a(0,1) + call stdlib_slassq( k, a( 0 ), lda+1, scale, s ) + ! tri u at a(0,0) + end if + end if + end if + value = scale*sqrt( s ) + end if + stdlib_slansf = value + return + end function stdlib_slansf + + !> SLANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A, supplied in packed form. + + real(sp) function stdlib_slansp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 1 + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + end do + else + k = 1 + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( ap( k ) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ap( k ) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_slassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_slassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( ap( k )/=zero ) then + absa = abs( ap( k ) ) + if( scale SLANST: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric tridiagonal matrix A. + + pure real(sp) function stdlib_slanst( norm, n, d, e ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: anorm, scale, sum + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + sum = abs( d( i ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + sum = abs( e( i ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + end do + else if( stdlib_lsame( norm, 'O' ) .or. norm=='1' .or.stdlib_lsame( norm, 'I' ) ) & + then + ! find norm1(a). + if( n==1 ) then + anorm = abs( d( 1 ) ) + else + anorm = abs( d( 1 ) )+abs( e( 1 ) ) + sum = abs( e( n-1 ) )+abs( d( n ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + do i = 2, n - 1 + sum = abs( d( i ) )+abs( e( i ) )+abs( e( i-1 ) ) + if( anorm < sum .or. stdlib_sisnan( sum ) ) anorm = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( n>1 ) then + call stdlib_slassq( n-1, e, 1, scale, sum ) + sum = 2*sum + end if + call stdlib_slassq( n, d, 1, scale, sum ) + anorm = scale*sqrt( sum ) + end if + stdlib_slanst = anorm + return + end function stdlib_slanst + + !> SLANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> real symmetric matrix A. + + real(sp) function stdlib_slansy( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( a( j, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( a( j, j ) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_slassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_slassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + call stdlib_slassq( n, a, lda+1, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_slansy = value + return + end function stdlib_slansy + + !> SLANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + + real(sp) function stdlib_slantb( norm, uplo, diag, n, k, ab,ldab, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(sp), intent(in) :: ab(ldab,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, l + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = max( k+2-j, 1 ), k + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = max( k+2-j, 1 ), k + 1 + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = 2, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = 1, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = 1 - j + do i = j + 1, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = 1 - j + do i = j, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + end if + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 2, n + call stdlib_slassq( min( j-1, k ),ab( max( k+2-j, 1 ), j ), 1, scale,& + sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_slassq( min( j, k+1 ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 1, n - 1 + call stdlib_slassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_slassq( min( n-j+1, k+1 ), ab( 1, j ), 1, scale,sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_slantb = value + return + end function stdlib_slantb + + !> SLANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + + real(sp) function stdlib_slantp( norm, uplo, diag, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(in) :: ap(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, k + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = 1 + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 2 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + k = 1 + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = k, k + j - 2 + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + j - 1 + sum = sum + abs( ap( i ) ) + end do + end if + k = k + j + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = k + 1, k + n - j + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + n - j + sum = sum + abs( ap( i ) ) + end do + end if + k = k + n - j + 1 + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + do i = 1, j - 1 + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + k = k + 1 + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, j + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + k = k + 1 + do i = j + 1, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = j, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + end if + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 2, n + call stdlib_slassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_slassq( j, ap( k ), 1, scale, sum ) + k = k + j + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 1, n - 1 + call stdlib_slassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_slassq( n-j+1, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_slantp = value + return + end function stdlib_slantp + + !> SLANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + + real(sp) function stdlib_slantr( norm, uplo, diag, m, n, a, lda,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j + real(sp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j-1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j + 1, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( ( udiag ) .and. ( j<=m ) ) then + sum = one + do i = 1, j - 1 + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = 1, min( m, j ) + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = j + 1, m + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = j, m + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, m + work( i ) = one + end do + do j = 1, n + do i = 1, min( m, j-1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = 1, min( m, j ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, min( m, n ) + work( i ) = one + end do + do i = n + 1, m + work( i ) = zero + end do + do j = 1, n + do i = j + 1, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = j, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + end if + value = zero + do i = 1, m + sum = work( i ) + if( value < sum .or. stdlib_sisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 2, n + call stdlib_slassq( min( m, j-1 ), a( 1, j ), 1, scale, sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_slassq( min( m, j ), a( 1, j ), 1, scale, sum ) + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 1, n + call stdlib_slassq( m-j, a( min( m, j+1 ), j ), 1, scale,sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_slassq( m-j+1, a( j, j ), 1, scale, sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_slantr = value + return + end function stdlib_slantr + + !> SLAORHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a real general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine SORHR_COL. In SORHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine SLAORHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + + pure subroutine stdlib_slaorhr_col_getrfnp( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_slaorhr_col_getrfnp2( m, n, a, lda, d, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks. + call stdlib_slaorhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo ) + + if( j+jb<=n ) then + ! compute block row of u. + call stdlib_strsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, & + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_slaorhr_col_getrfnp + + !> SLAPY2: returns sqrt(x**2+y**2), taking care not to cause unnecessary + !> overflow and unnecessary underflow. + + pure real(sp) function stdlib_slapy2( x, y ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: x, y + ! ===================================================================== + + + ! Local Scalars + real(sp) :: w, xabs, yabs, z, hugeval + logical(lk) :: x_is_nan, y_is_nan + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + x_is_nan = stdlib_sisnan( x ) + y_is_nan = stdlib_sisnan( y ) + if ( x_is_nan ) stdlib_slapy2 = x + if ( y_is_nan ) stdlib_slapy2 = y + hugeval = stdlib_slamch( 'OVERFLOW' ) + if ( .not.( x_is_nan.or.y_is_nan ) ) then + xabs = abs( x ) + yabs = abs( y ) + w = max( xabs, yabs ) + z = min( xabs, yabs ) + if( z==zero .or. w>hugeval ) then + stdlib_slapy2 = w + else + stdlib_slapy2 = w*sqrt( one+( z / w )**2 ) + end if + end if + return + end function stdlib_slapy2 + + !> Given a 3-by-3 matrix pencil (A,B), SLAQZ1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1). + !> It is assumed that either + !> 1) sr1 = sr2 + !> or + !> 2) si = 0. + !> This is useful for starting double implicit shift bulges + !> in the QZ algorithm. + + pure subroutine stdlib_slaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) + ! arguments + integer(ilp), intent( in ) :: lda, ldb + real(sp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1, sr2, si,beta1, beta2 + real(sp), intent( out ) :: v( * ) + + ! local scalars + real(sp) :: w(2), safmin, safmax, scale1, scale2 + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one/safmin + ! calculate first shifted vector + w( 1 ) = beta1*a( 1, 1 )-sr1*b( 1, 1 ) + w( 2 ) = beta1*a( 2, 1 )-sr1*b( 2, 1 ) + scale1 = sqrt( abs( w( 1 ) ) ) * sqrt( abs( w( 2 ) ) ) + if( scale1 >= safmin .and. scale1 <= safmax ) then + w( 1 ) = w( 1 )/scale1 + w( 2 ) = w( 2 )/scale1 + end if + ! solve linear system + w( 2 ) = w( 2 )/b( 2, 2 ) + w( 1 ) = ( w( 1 )-b( 1, 2 )*w( 2 ) )/b( 1, 1 ) + scale2 = sqrt( abs( w( 1 ) ) ) * sqrt( abs( w( 2 ) ) ) + if( scale2 >= safmin .and. scale2 <= safmax ) then + w( 1 ) = w( 1 )/scale2 + w( 2 ) = w( 2 )/scale2 + end if + ! apply second shift + v( 1 ) = beta2*( a( 1, 1 )*w( 1 )+a( 1, 2 )*w( 2 ) )-sr2*( b( 1,1 )*w( 1 )+b( 1, 2 )*w(& + 2 ) ) + v( 2 ) = beta2*( a( 2, 1 )*w( 1 )+a( 2, 2 )*w( 2 ) )-sr2*( b( 2,1 )*w( 1 )+b( 2, 2 )*w(& + 2 ) ) + v( 3 ) = beta2*( a( 3, 1 )*w( 1 )+a( 3, 2 )*w( 2 ) )-sr2*( b( 3,1 )*w( 1 )+b( 3, 2 )*w(& + 2 ) ) + ! account for imaginary part + v( 1 ) = v( 1 )+si*si*b( 1, 1 )/scale1/scale2 + ! check for overflow + if( abs( v( 1 ) )>safmax .or. abs( v( 2 ) ) > safmax .or.abs( v( 3 ) )>safmax .or. & + stdlib_sisnan( v( 1 ) ) .or.stdlib_sisnan( v( 2 ) ) .or. stdlib_sisnan( v( 3 ) ) ) & + then + v( 1 ) = zero + v( 2 ) = zero + v( 3 ) = zero + end if + end subroutine stdlib_slaqz1 + + !> SLAQZ2: chases a 2x2 shift bulge in a matrix pencil down a single position + + pure subroutine stdlib_slaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & + q, ldq, nz, zstart, z, ldz ) + ! arguments + logical(lk), intent( in ) :: ilq, ilz + integer(ilp), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & + zstart, ihi + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + + ! local variables + real(sp) :: h(2,3), c1, s1, c2, s2, temp + if( k+2 == ihi ) then + ! shift is located on the edge of the matrix, remove it + h = b( ihi-1:ihi, ihi-2:ihi ) + ! make h upper triangular + call stdlib_slartg( h( 1, 1 ), h( 2, 1 ), c1, s1, temp ) + h( 2, 1 ) = zero + h( 1, 1 ) = temp + call stdlib_srot( 2, h( 1, 2 ), 2, h( 2, 2 ), 2, c1, s1 ) + call stdlib_slartg( h( 2, 3 ), h( 2, 2 ), c1, s1, temp ) + call stdlib_srot( 1, h( 1, 3 ), 1, h( 1, 2 ), 1, c1, s1 ) + call stdlib_slartg( h( 1, 2 ), h( 1, 1 ), c2, s2, temp ) + call stdlib_srot( ihi-istartm+1, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c1, & + s1 ) + call stdlib_srot( ihi-istartm+1, b( istartm, ihi-1 ), 1, b( istartm,ihi-2 ), 1, c2, & + s2 ) + b( ihi-1, ihi-2 ) = zero + b( ihi, ihi-2 ) = zero + call stdlib_srot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c1, & + s1 ) + call stdlib_srot( ihi-istartm+1, a( istartm, ihi-1 ), 1, a( istartm,ihi-2 ), 1, c2, & + s2 ) + if ( ilz ) then + call stdlib_srot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c1, s1 & + ) + call stdlib_srot( nz, z( 1, ihi-1-zstart+1 ), 1, z( 1,ihi-2-zstart+1 ), 1, c2, & + s2 ) + end if + call stdlib_slartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp ) + a( ihi-1, ihi-2 ) = temp + a( ihi, ihi-2 ) = zero + call stdlib_srot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 & + ) + call stdlib_srot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 & + ) + if ( ilq ) then + call stdlib_srot( nq, q( 1, ihi-1-qstart+1 ), 1, q( 1, ihi-qstart+1 ), 1, c1, s1 & + ) + end if + call stdlib_slartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp ) + b( ihi, ihi ) = temp + b( ihi, ihi-1 ) = zero + call stdlib_srot( ihi-istartm, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c1, s1 ) + + call stdlib_srot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c1, & + s1 ) + if ( ilz ) then + call stdlib_srot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c1, s1 & + ) + end if + else + ! normal operation, move bulge down + h = b( k+1:k+2, k:k+2 ) + ! make h upper triangular + call stdlib_slartg( h( 1, 1 ), h( 2, 1 ), c1, s1, temp ) + h( 2, 1 ) = zero + h( 1, 1 ) = temp + call stdlib_srot( 2, h( 1, 2 ), 2, h( 2, 2 ), 2, c1, s1 ) + ! calculate z1 and z2 + call stdlib_slartg( h( 2, 3 ), h( 2, 2 ), c1, s1, temp ) + call stdlib_srot( 1, h( 1, 3 ), 1, h( 1, 2 ), 1, c1, s1 ) + call stdlib_slartg( h( 1, 2 ), h( 1, 1 ), c2, s2, temp ) + ! apply transformations from the right + call stdlib_srot( k+3-istartm+1, a( istartm, k+2 ), 1, a( istartm,k+1 ), 1, c1, s1 ) + + call stdlib_srot( k+3-istartm+1, a( istartm, k+1 ), 1, a( istartm,k ), 1, c2, s2 ) + + call stdlib_srot( k+2-istartm+1, b( istartm, k+2 ), 1, b( istartm,k+1 ), 1, c1, s1 ) + + call stdlib_srot( k+2-istartm+1, b( istartm, k+1 ), 1, b( istartm,k ), 1, c2, s2 ) + + if ( ilz ) then + call stdlib_srot( nz, z( 1, k+2-zstart+1 ), 1, z( 1, k+1-zstart+1 ), 1, c1, s1 ) + + call stdlib_srot( nz, z( 1, k+1-zstart+1 ), 1, z( 1, k-zstart+1 ),1, c2, s2 ) + + end if + b( k+1, k ) = zero + b( k+2, k ) = zero + ! calculate q1 and q2 + call stdlib_slartg( a( k+2, k ), a( k+3, k ), c1, s1, temp ) + a( k+2, k ) = temp + a( k+3, k ) = zero + call stdlib_slartg( a( k+1, k ), a( k+2, k ), c2, s2, temp ) + a( k+1, k ) = temp + a( k+2, k ) = zero + ! apply transformations from the left + call stdlib_srot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 ) + call stdlib_srot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 ) + call stdlib_srot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 ) + call stdlib_srot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 ) + if ( ilq ) then + call stdlib_srot( nq, q( 1, k+2-qstart+1 ), 1, q( 1, k+3-qstart+1 ), 1, c1, s1 ) + + call stdlib_srot( nq, q( 1, k+1-qstart+1 ), 1, q( 1, k+2-qstart+1 ), 1, c2, s2 ) + + end if + end if + end subroutine stdlib_slaqz2 + + !> SLAQZ4: Executes a single multishift QZ sweep + + pure subroutine stdlib_slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr, & + si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) + ! function arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & + nblock_desired, ldqc, ldzc + real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), qc( & + ldqc, * ), zc( ldzc, * ), work( * ), sr( * ),si( * ), ss( * ) + integer(ilp), intent( out ) :: info + + ! local scalars + integer(ilp) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & + ishift, nblock, npos + real(sp) :: temp, v(3), c1, s1, c2, s2, swap + info = 0 + if ( nblock_desired < nshifts+1 ) then + info = -8 + end if + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = n*nblock_desired + return + else if ( lwork < n*nblock_desired ) then + info = -25 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLAQZ4', -info ) + return + end if + ! executable statements + if ( nshifts < 2 ) then + return + end if + if ( ilo >= ihi ) then + return + end if + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + ! shuffle shifts into pairs of real shifts and pairs + ! of complex conjugate shifts assuming complex + ! conjugate shifts are already adjacent to one + ! another + do i = 1, nshifts-2, 2 + if( si( i )/=-si( i+1 ) ) then + swap = sr( i ) + sr( i ) = sr( i+1 ) + sr( i+1 ) = sr( i+2 ) + sr( i+2 ) = swap + swap = si( i ) + si( i ) = si( i+1 ) + si( i+1 ) = si( i+2 ) + si( i+2 ) = swap + swap = ss( i ) + ss( i ) = ss( i+1 ) + ss( i+1 ) = ss( i+2 ) + ss( i+2 ) = swap + end if + end do + ! nshfts is supposed to be even, but if it is odd, + ! then simply reduce it by one. the shuffle above + ! ensures that the dropped shift is real and that + ! the remaining shifts are paired. + ns = nshifts-mod( nshifts, 2 ) + npos = max( nblock_desired-ns, 1 ) + ! the following block introduces the shifts and chases + ! them down one by one just enough to make space for + ! the other shifts. the near-the-diagonal block is + ! of size (ns+1) x ns. + call stdlib_slaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc ) + call stdlib_slaset( 'FULL', ns, ns, zero, one, zc, ldzc ) + do i = 1, ns, 2 + ! introduce the shift + call stdlib_slaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( & + i ), ss( i ), ss( i+1 ), v ) + temp = v( 2 ) + call stdlib_slartg( temp, v( 3 ), c1, s1, v( 2 ) ) + call stdlib_slartg( v( 1 ), v( 2 ), c2, s2, temp ) + call stdlib_srot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 ) + call stdlib_srot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 ) + call stdlib_srot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 ) + call stdlib_srot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 ) + call stdlib_srot( ns+1, qc( 1, 2 ), 1, qc( 1, 3 ), 1, c1, s1 ) + call stdlib_srot( ns+1, qc( 1, 1 ), 1, qc( 1, 2 ), 1, c2, s2 ) + ! chase the shift down + do j = 1, ns-1-i + call stdlib_slaqz2( .true., .true., j, 1, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & + ilo, ilo ), ldb, ns+1, 1, qc,ldqc, ns, 1, zc, ldzc ) + end do + end do + ! update the rest of the pencil + ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) + ! from the left with qc(1:ns+1,1:ns+1)' + sheight = ns+1 + swidth = istopm-( ilo+ns )+1 + if ( swidth > 0 ) then + call stdlib_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns & + ), lda, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) + + call stdlib_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns & + ), ldb, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) + + end if + if ( ilq ) then + call stdlib_sgemm( 'N', 'N', n, sheight, sheight, one, q( 1, ilo ),ldq, qc, ldqc, & + zero, work, n ) + call stdlib_slacpy( 'ALL', n, sheight, work, n, q( 1, ilo ), ldq ) + end if + ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) + ! from the right with zc(1:ns,1:ns) + sheight = ilo-1-istartm+1 + swidth = ns + if ( sheight > 0 ) then + call stdlib_sgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, & + zc, ldzc, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) + + call stdlib_sgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, & + zc, ldzc, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) + + end if + if ( ilz ) then + call stdlib_sgemm( 'N', 'N', n, swidth, swidth, one, z( 1, ilo ), ldz,zc, ldzc, & + zero, work, n ) + call stdlib_slacpy( 'ALL', n, swidth, work, n, z( 1, ilo ), ldz ) + end if + ! the following block chases the shifts down to the bottom + ! right block. if possible, a shift is moved down npos + ! positions at a time + k = ilo + do while ( k < ihi-ns ) + np = min( ihi-ns-k, npos ) + ! size of the near-the-diagonal block + nblock = ns+np + ! istartb points to the first row we will be updating + istartb = k+1 + ! istopb points to the last column we will be updating + istopb = k+nblock-1 + call stdlib_slaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc ) + call stdlib_slaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc ) + ! near the diagonal shift chase + do i = ns-1, 0, -2 + do j = 0, np-1 + ! move down the block with index k+i+j-1, updating + ! the (ns+np x ns+np) block: + ! (k:k+ns+np,k:k+ns+np-1) + call stdlib_slaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, & + ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(k+1:k+ns+np, k+ns+np:istopm) and + ! b(k+1:k+ns+np, k+ns+np:istopm) + ! from the left with qc(1:ns+np,1:ns+np)' + sheight = ns+np + swidth = istopm-( k+ns+np )+1 + if ( swidth > 0 ) then + call stdlib_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+& + ns+np ), lda, zero, work,sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & + ) + call stdlib_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+& + ns+np ), ldb, zero, work,sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & + ) + end if + if ( ilq ) then + call stdlib_sgemm( 'N', 'N', n, nblock, nblock, one, q( 1, k+1 ),ldq, qc, ldqc, & + zero, work, n ) + call stdlib_slacpy( 'ALL', n, nblock, work, n, q( 1, k+1 ), ldq ) + end if + ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) + ! from the right with zc(1:ns+np,1:ns+np) + sheight = k-istartm+1 + swidth = nblock + if ( sheight > 0 ) then + call stdlib_sgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, & + zc, ldzc, zero, work,sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) + + call stdlib_sgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, & + zc, ldzc, zero, work,sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) + + end if + if ( ilz ) then + call stdlib_sgemm( 'N', 'N', n, nblock, nblock, one, z( 1, k ),ldz, zc, ldzc, & + zero, work, n ) + call stdlib_slacpy( 'ALL', n, nblock, work, n, z( 1, k ), ldz ) + end if + k = k+np + end do + ! the following block removes the shifts from the bottom right corner + ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). + call stdlib_slaset( 'FULL', ns, ns, zero, one, qc, ldqc ) + call stdlib_slaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc ) + ! istartb points to the first row we will be updating + istartb = ihi-ns+1 + ! istopb points to the last column we will be updating + istopb = ihi + do i = 1, ns, 2 + ! chase the shift down to the bottom right corner + do ishift = ihi-i-1, ihi-2 + call stdlib_slaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & + ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(ihi-ns+1:ihi, ihi+1:istopm) + ! from the left with qc(1:ns,1:ns)' + sheight = ns + swidth = istopm-( ihi+1 )+1 + if ( swidth > 0 ) then + call stdlib_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, & + ihi+1 ), lda, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & + ) + call stdlib_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, & + ihi+1 ), ldb, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & + ) + end if + if ( ilq ) then + call stdlib_sgemm( 'N', 'N', n, ns, ns, one, q( 1, ihi-ns+1 ), ldq,qc, ldqc, zero, & + work, n ) + call stdlib_slacpy( 'ALL', n, ns, work, n, q( 1, ihi-ns+1 ), ldq ) + end if + ! update a(istartm:ihi-ns,ihi-ns:ihi) + ! from the right with zc(1:ns+1,1:ns+1) + sheight = ihi-ns-istartm+1 + swidth = ns+1 + if ( sheight > 0 ) then + call stdlib_sgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,& + zc, ldzc, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & + ) + call stdlib_sgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,& + zc, ldzc, zero, work, sheight ) + call stdlib_slacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & + ) + end if + if ( ilz ) then + call stdlib_sgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1, ihi-ns ), ldz, zc,ldzc, zero, & + work, n ) + call stdlib_slacpy( 'ALL', n, ns+1, work, n, z( 1, ihi-ns ), ldz ) + end if + end subroutine stdlib_slaqz4 + + !> SLAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + + pure subroutine stdlib_slar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc, & + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1, bn, n + integer(ilp), intent(out) :: negcnt + integer(ilp), intent(inout) :: r + real(sp), intent(in) :: gaptol, lambda, pivmin + real(sp), intent(out) :: mingma, nrminv, resid, rqcorr, ztz + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*) + real(sp), intent(in) :: d(*), l(*), ld(*), lld(*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: z(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: sawnan1, sawnan2 + integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2 + real(sp) :: dminus, dplus, eps, s, tmp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + eps = stdlib_slamch( 'PRECISION' ) + if( r==0 ) then + r1 = b1 + r2 = bn + else + r1 = r + r2 = r + end if + ! storage for lplus + indlpl = 0 + ! storage for uminus + indumn = n + inds = 2*n + 1 + indp = 3*n + 1 + if( b1==1 ) then + work( inds ) = zero + else + work( inds+b1-1 ) = lld( b1-1 ) + end if + ! compute the stationary transform (using the differential form) + ! until the index r2. + sawnan1 = .false. + neg1 = 0 + s = work( inds+b1-1 ) - lambda + do i = b1, r1 - 1 + dplus = d( i ) + s + work( indlpl+i ) = ld( i ) / dplus + if(dplus SLARFG: generates a real elementary reflector H of order n, such + !> that + !> H * ( alpha ) = ( beta ), H**T * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, and x is an (n-1)-element real + !> vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**T ) , + !> ( v ) + !> where tau is a real scalar and v is a real (n-1)-element + !> vector. + !> If the elements of x are all zero, then tau = 0 and H is taken to be + !> the unit matrix. + !> Otherwise 1 <= tau <= 2. + + pure subroutine stdlib_slarfg( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(sp), intent(inout) :: alpha + real(sp), intent(out) :: tau + ! Array Arguments + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(sp) :: beta, rsafmn, safmin, xnorm + ! Intrinsic Functions + intrinsic :: abs,sign + ! Executable Statements + if( n<=1 ) then + tau = zero + return + end if + xnorm = stdlib_snrm2( n-1, x, incx ) + if( xnorm==zero ) then + ! h = i + tau = zero + else + ! general case + beta = -sign( stdlib_slapy2( alpha, xnorm ), alpha ) + safmin = stdlib_slamch( 'S' ) / stdlib_slamch( 'E' ) + knt = 0 + if( abs( beta ) SLARFGP: generates a real elementary reflector H of order n, such + !> that + !> H * ( alpha ) = ( beta ), H**T * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is non-negative, and x is + !> an (n-1)-element real vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**T ) , + !> ( v ) + !> where tau is a real scalar and v is a real (n-1)-element + !> vector. + !> If the elements of x are all zero, then tau = 0 and H is taken to be + !> the unit matrix. + + subroutine stdlib_slarfgp( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(sp), intent(inout) :: alpha + real(sp), intent(out) :: tau + ! Array Arguments + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(sp) :: beta, bignum, savealpha, smlnum, xnorm + ! Intrinsic Functions + intrinsic :: abs,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_snrm2( n-1, x, incx ) + if( xnorm==zero ) then + ! h = [+/-1, 0; i], sign chosen so alpha >= 0. + if( alpha>=zero ) then + ! when tau.eq.zero, the vector is special-cased to be + ! all zeros in the application routines. we do not need + ! to clear it. + tau = zero + else + ! however, the application routines rely on explicit + ! zero checks when tau.ne.zero, and we must clear x. + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = 0 + end do + alpha = -alpha + end if + else + ! general case + beta = sign( stdlib_slapy2( alpha, xnorm ), alpha ) + smlnum = stdlib_slamch( 'S' ) / stdlib_slamch( 'E' ) + knt = 0 + if( abs( beta )=zero ) then + tau = zero + else + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = 0 + end do + beta = -savealpha + end if + else + ! this is the general case. + call stdlib_sscal( n-1, one / alpha, x, incx ) + end if + ! if beta is subnormal, it may lose relative accuracy + do j = 1, knt + beta = beta*smlnum + end do + alpha = beta + end if + return + end subroutine stdlib_slarfgp + + !> SLARNV: returns a vector of n random real numbers from a uniform or + !> normal distribution. + + pure subroutine stdlib_slarnv( idist, iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: idist, n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + real(sp), intent(out) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + real(sp), parameter :: twopi = 6.28318530717958647692528676655900576839e+0_sp + + + + ! Local Scalars + integer(ilp) :: i, il, il2, iv + ! Local Arrays + real(sp) :: u(lv) + ! Intrinsic Functions + intrinsic :: cos,log,min,sqrt + ! Executable Statements + do 40 iv = 1, n, lv / 2 + il = min( lv / 2, n-iv+1 ) + if( idist==3 ) then + il2 = 2*il + else + il2 = il + end if + ! call stdlib_slaruv to generate il2 numbers from a uniform (0,1) + ! distribution (il2 <= lv) + call stdlib_slaruv( iseed, il2, u ) + if( idist==1 ) then + ! copy generated numbers + do i = 1, il + x( iv+i-1 ) = u( i ) + end do + else if( idist==2 ) then + ! convert generated numbers to uniform (-1,1) distribution + do i = 1, il + x( iv+i-1 ) = two*u( i ) - one + end do + else if( idist==3 ) then + ! convert generated numbers to normal (0,1) distribution + do i = 1, il + x( iv+i-1 ) = sqrt( -two*log( u( 2*i-1 ) ) )*cos( twopi*u( 2*i ) ) + end do + end if + 40 continue + return + end subroutine stdlib_slarnv + + !> Given the relatively robust representation(RRR) L D L^T, SLARRB: + !> does "limited" bisection to refine the eigenvalues of L D L^T, + !> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial + !> guesses for these eigenvalues are input in W, the corresponding estimate + !> of the error in these guesses and their gaps are input in WERR + !> and WGAP, respectively. During bisection, intervals + !> [left, right] are maintained by storing their mid-points and + !> semi-widths in the arrays W and WERR respectively. + + pure subroutine stdlib_slarrb( n, d, lld, ifirst, ilast, rtol1,rtol2, offset, w, wgap, werr, & + work, iwork,pivmin, spdiam, twist, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ifirst, ilast, n, offset, twist + integer(ilp), intent(out) :: info + real(sp), intent(in) :: pivmin, rtol1, rtol2, spdiam + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: d(*), lld(*) + real(sp), intent(inout) :: w(*), werr(*), wgap(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + integer(ilp) :: maxitr + ! Local Scalars + integer(ilp) :: i, i1, ii, ip, iter, k, negcnt, next, nint, olnint, prev, r + real(sp) :: back, cvrgd, gap, left, lgap, mid, mnwdth, rgap, right, tmp, width + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + maxitr = int( ( log( spdiam+pivmin )-log( pivmin ) ) /log( two ),KIND=ilp) + 2 + mnwdth = two * pivmin + r = twist + if((r<1).or.(r>n)) r = n + ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ]. + ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while + ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 ) + ! for an unconverged interval is set to the index of the next unconverged + ! interval, and is -1 or 0 for a converged interval. thus a linked + ! list of unconverged intervals is set up. + i1 = ifirst + ! the number of unconverged intervals + nint = 0 + ! the last unconverged interval found + prev = 0 + rgap = wgap( i1-offset ) + loop_75: do i = i1, ilast + k = 2*i + ii = i - offset + left = w( ii ) - werr( ii ) + right = w( ii ) + werr( ii ) + lgap = rgap + rgap = wgap( ii ) + gap = min( lgap, rgap ) + ! make sure that [left,right] contains the desired eigenvalue + ! compute negcount from dstqds facto l+d+l+^t = l d l^t - left + ! do while( negcnt(left)>i-1 ) + back = werr( ii ) + 20 continue + negcnt = stdlib_slaneg( n, d, lld, left, pivmin, r ) + if( negcnt>i-1 ) then + left = left - back + back = two*back + go to 20 + end if + ! do while( negcnt(right)=i1).and.(i<=ilast)) iwork( 2*prev-1 ) = i + 1 + else + ! unconverged interval found + prev = i + nint = nint + 1 + iwork( k-1 ) = i + 1 + iwork( k ) = negcnt + end if + work( k-1 ) = left + work( k ) = right + end do loop_75 + ! do while( nint>0 ), i.e. there are still unconverged intervals + ! and while (iter1) lgap = wgap( ii-1 ) + gap = min( lgap, rgap ) + next = iwork( k-1 ) + left = work( k-1 ) + right = work( k ) + mid = half*( left + right ) + ! semiwidth of interval + width = right - mid + tmp = max( abs( left ), abs( right ) ) + cvrgd = max(rtol1*gap,rtol2*tmp) + if( ( width<=cvrgd ) .or. ( width<=mnwdth ).or.( iter==maxitr ) )then + ! reduce number of unconverged intervals + nint = nint - 1 + ! mark interval as converged. + iwork( k-1 ) = 0 + if( i1==i ) then + i1 = next + else + ! prev holds the last unconverged interval previously examined + if(prev>=i1) iwork( 2*prev-1 ) = next + end if + i = next + cycle loop_100 + end if + prev = i + ! perform one bisection step + negcnt = stdlib_slaneg( n, d, lld, mid, pivmin, r ) + if( negcnt<=i-1 ) then + work( k-1 ) = mid + else + work( k ) = mid + end if + i = next + end do loop_100 + iter = iter + 1 + ! do another loop if there are still unconverged intervals + ! however, in the last iteration, all intervals are accepted + ! since this is the best we can do. + if( ( nint>0 ).and.(iter<=maxitr) ) go to 80 + ! at this point, all the intervals have converged + do i = ifirst, ilast + k = 2*i + ii = i - offset + ! all intervals marked by '0' have been refined. + if( iwork( k-1 )==0 ) then + w( ii ) = half*( work( k-1 )+work( k ) ) + werr( ii ) = work( k ) - w( ii ) + end if + end do + do i = ifirst+1, ilast + k = 2*i + ii = i - offset + wgap( ii-1 ) = max( zero,w(ii) - werr (ii) - w( ii-1 ) - werr( ii-1 )) + end do + return + end subroutine stdlib_slarrb + + !> Given the initial representation L D L^T and its cluster of close + !> eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... + !> W( CLEND ), SLARRF: finds a new relatively robust representation + !> L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the + !> eigenvalues of L(+) D(+) L(+)^T is relatively isolated. + + pure subroutine stdlib_slarrf( n, d, l, ld, clstrt, clend,w, wgap, werr,spdiam, clgapl, & + clgapr, pivmin, sigma,dplus, lplus, work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: clstrt, clend, n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: clgapl, clgapr, pivmin, spdiam + real(sp), intent(out) :: sigma + ! Array Arguments + real(sp), intent(in) :: d(*), l(*), ld(*), w(*), werr(*) + real(sp), intent(out) :: dplus(*), lplus(*), work(*) + real(sp), intent(inout) :: wgap(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: quart = 0.25_sp + real(sp), parameter :: maxgrowth1 = 8._sp + real(sp), parameter :: maxgrowth2 = 8._sp + integer(ilp), parameter :: ktrymax = 1 + integer(ilp), parameter :: sleft = 1 + integer(ilp), parameter :: sright = 2 + + ! Local Scalars + logical(lk) :: dorrr1, forcer, nofail, sawnan1, sawnan2, tryrrr1 + integer(ilp) :: i, indx, ktry, shift + real(sp) :: avgap, bestshift, clwdth, eps, fact, fail, fail2, growthbound, ldelta, & + ldmax, lsigma, max1, max2, mingap, oldp, prod, rdelta, rdmax, rrr1, rrr2, rsigma, s, & + smlgrowth, tmp, znm2 + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + fact = real(2**ktrymax,KIND=sp) + eps = stdlib_slamch( 'PRECISION' ) + shift = 0 + forcer = .false. + ! note that we cannot guarantee that for any of the shifts tried, + ! the factorization has a small or even moderate element growth. + ! there could be ritz values at both ends of the cluster and despite + ! backing off, there are examples where all factorizations tried + ! (in ieee mode, allowing zero pivots + ! element growth. + ! for this reason, we should use pivmin in this subroutine so that at + ! least the l d l^t factorization exists. it can be checked afterwards + ! whether the element growth caused bad residuals/orthogonality. + ! decide whether the code should accept the best among all + ! representations despite large element growth or signal info=1 + ! setting nofail to .false. for quick fix for bug 113 + nofail = .false. + ! compute the average gap length of the cluster + clwdth = abs(w(clend)-w(clstrt)) + werr(clend) + werr(clstrt) + avgap = clwdth / real(clend-clstrt,KIND=sp) + mingap = min(clgapl, clgapr) + ! initial values for shifts to both ends of cluster + lsigma = min(w( clstrt ),w( clend )) - werr( clstrt ) + rsigma = max(w( clstrt ),w( clend )) + werr( clend ) + ! use a small fudge to make sure that we really shift to the outside + lsigma = lsigma - abs(lsigma)* two * eps + rsigma = rsigma + abs(rsigma)* two * eps + ! compute upper bounds for how much to back off the initial shifts + ldmax = quart * mingap + two * pivmin + rdmax = quart * mingap + two * pivmin + ldelta = max(avgap,wgap( clstrt ))/fact + rdelta = max(avgap,wgap( clend-1 ))/fact + ! initialize the record of the best representation found + s = stdlib_slamch( 'S' ) + smlgrowth = one / s + fail = real(n-1,KIND=sp)*mingap/(spdiam*eps) + fail2 = real(n-1,KIND=sp)*mingap/(spdiam*sqrt(eps)) + bestshift = lsigma + ! while (ktry <= ktrymax) + ktry = 0 + growthbound = maxgrowth1*spdiam + 5 continue + sawnan1 = .false. + sawnan2 = .false. + ! ensure that we do not back off too much of the initial shifts + ldelta = min(ldmax,ldelta) + rdelta = min(rdmax,rdelta) + ! compute the element growth when shifting to both ends of the cluster + ! accept the shift if there is no element growth at one of the two ends + ! left end + s = -lsigma + dplus( 1 ) = d( 1 ) + s + if(abs(dplus(1)) SLARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by SLARRE. + + pure subroutine stdlib_slarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dol, dou, ldz, m, n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: minrgp, pivmin, vl, vu + real(sp), intent(inout) :: rtol1, rtol2 + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), indexw(*), isplit(*) + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), l(*), w(*), werr(*), wgap(*) + real(sp), intent(in) :: gers(*) + real(sp), intent(out) :: work(*) + real(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 10 + + + ! Local Scalars + logical(lk) :: eskip, needbs, stp2ii, tryrqc, usedbs, usedrq + integer(ilp) :: done, i, ibegin, idone, iend, ii, iindc1, iindc2, iindr, iindwk, iinfo,& + im, in, indeig, indld, indlld, indwrk, isupmn, isupmx, iter, itmp1, j, jblk, k, & + miniwsize, minwsize, nclus, ndepth, negcnt, newcls, newfst, newftt, newlst, newsiz, & + offset, oldcls, oldfst, oldien, oldlst, oldncl, p, parity, q, wbegin, wend, windex, & + windmn, windpl, zfrom, zto, zusedl, zusedu, zusedw + real(sp) :: bstres, bstw, eps, fudge, gap, gaptol, gl, gu, lambda, left, lgap, mingma, & + nrminv, resid, rgap, right, rqcorr, rqtol, savgap, sgndef, sigma, spdiam, ssigma, tau, & + tmp, tol, ztz + ! Intrinsic Functions + intrinsic :: abs,real,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( (n<=0).or.(m<=0) ) then + return + end if + ! the first n entries of work are reserved for the eigenvalues + indld = n+1 + indlld= 2*n+1 + indwrk= 3*n+1 + minwsize = 12 * n + do i= 1,minwsize + work( i ) = zero + end do + ! iwork(iindr+1:iindr+n) hold the twist indices r for the + ! factorization used to compute the fp vector + iindr = 0 + ! iwork(iindc1+1:iinc2+n) are used to store the clusters of the current + ! layer and the one above. + iindc1 = n + iindc2 = 2*n + iindwk = 3*n + 1 + miniwsize = 7 * n + do i= 1,miniwsize + iwork( i ) = 0 + end do + zusedl = 1 + if(dol>1) then + ! set lower bound for use of z + zusedl = dol-1 + endif + zusedu = m + if(doudou) ) then + ibegin = iend + 1 + wbegin = wend + 1 + cycle loop_170 + end if + ! find local spectral diameter of the block + gl = gers( 2*ibegin-1 ) + gu = gers( 2*ibegin ) + do i = ibegin+1 , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + ! oldien is the last index of the previous block + oldien = ibegin - 1 + ! calculate the size of the current block + in = iend - ibegin + 1 + ! the number of eigenvalues in the current block + im = wend - wbegin + 1 + ! this is for a 1x1 block + if( ibegin==iend ) then + done = done+1 + z( ibegin, wbegin ) = one + isuppz( 2*wbegin-1 ) = ibegin + isuppz( 2*wbegin ) = ibegin + w( wbegin ) = w( wbegin ) + sigma + work( wbegin ) = w( wbegin ) + ibegin = iend + 1 + wbegin = wbegin + 1 + cycle loop_170 + end if + ! the desired (shifted) eigenvalues are stored in w(wbegin:wend) + ! note that these can be approximations, in this case, the corresp. + ! entries of werr give the size of the uncertainty interval. + ! the eigenvalue approximations will be refined when necessary as + ! high relative accuracy is required for the computation of the + ! corresponding eigenvectors. + call stdlib_scopy( im, w( wbegin ), 1,work( wbegin ), 1 ) + ! we store in w the eigenvalue approximations w.r.t. the original + ! matrix t. + do i=1,im + w(wbegin+i-1) = w(wbegin+i-1)+sigma + end do + ! ndepth is the current depth of the representation tree + ndepth = 0 + ! parity is either 1 or 0 + parity = 1 + ! nclus is the number of clusters for the next level of the + ! representation tree, we start with nclus = 1 for the root + nclus = 1 + iwork( iindc1+1 ) = 1 + iwork( iindc1+2 ) = im + ! idone is the number of eigenvectors already computed in the current + ! block + idone = 0 + ! loop while( idonem ) then + info = -2 + return + endif + ! breadth first processing of the current level of the representation + ! tree: oldncl = number of clusters on current level + oldncl = nclus + ! reset nclus to count the number of child clusters + nclus = 0 + parity = 1 - parity + if( parity==0 ) then + oldcls = iindc1 + newcls = iindc2 + else + oldcls = iindc2 + newcls = iindc1 + end if + ! process the clusters on the current level + loop_150: do i = 1, oldncl + j = oldcls + 2*i + ! oldfst, oldlst = first, last index of current cluster. + ! cluster indices start with 1 and are relative + ! to wbegin when accessing w, wgap, werr, z + oldfst = iwork( j-1 ) + oldlst = iwork( j ) + if( ndepth>0 ) then + ! retrieve relatively robust representation (rrr) of cluster + ! that has been computed at the previous level + ! the rrr is stored in z and overwritten once the eigenvectors + ! have been computed or when the cluster is refined + if((dol==1).and.(dou==m)) then + ! get representation from location of the leftmost evalue + ! of the cluster + j = wbegin + oldfst - 1 + else + if(wbegin+oldfst-1dou) then + ! get representation from the right end of z array + j = dou + else + j = wbegin + oldfst - 1 + endif + endif + call stdlib_scopy( in, z( ibegin, j ), 1, d( ibegin ), 1 ) + call stdlib_scopy( in-1, z( ibegin, j+1 ), 1, l( ibegin ),1 ) + sigma = z( iend, j+1 ) + ! set the corresponding entries in z to zero + call stdlib_slaset( 'FULL', in, 2, zero, zero,z( ibegin, j), ldz ) + end if + ! compute dl and dll of current rrr + do j = ibegin, iend-1 + tmp = d( j )*l( j ) + work( indld-1+j ) = tmp + work( indlld-1+j ) = tmp*l( j ) + end do + if( ndepth>0 ) then + ! p and q are index of the first and last eigenvalue to compute + ! within the current block + p = indexw( wbegin-1+oldfst ) + q = indexw( wbegin-1+oldlst ) + ! offset for the arrays work, wgap and werr, i.e., the p-offset + ! through the q-offset elements of these arrays are to be used. + ! offset = p-oldfst + offset = indexw( wbegin ) - 1 + ! perform limited bisection (if necessary) to get approximate + ! eigenvalues to the precision needed. + call stdlib_slarrb( in, d( ibegin ),work(indlld+ibegin-1),p, q, rtol1, & + rtol2, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ), iwork(& + iindwk ),pivmin, spdiam, in, iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + ! we also recompute the extremal gaps. w holds all eigenvalues + ! of the unshifted matrix and must be used for computation + ! of wgap, the entries of work might stem from rrrs with + ! different shifts. the gaps from wbegin-1+oldfst to + ! wbegin-1+oldlst are correctly computed in stdlib_slarrb. + ! however, we only allow the gaps to become greater since + ! this is what should happen when we decrease werr + if( oldfst>1) then + wgap( wbegin+oldfst-2 ) =max(wgap(wbegin+oldfst-2),w(wbegin+oldfst-1)-& + werr(wbegin+oldfst-1)- w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) ) + + endif + if( wbegin + oldlst -1 < wend ) then + wgap( wbegin+oldlst-1 ) =max(wgap(wbegin+oldlst-1),w(wbegin+oldlst)-& + werr(wbegin+oldlst)- w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) ) + endif + ! each time the eigenvalues in work get refined, we store + ! the newly found approximation with all shifts applied in w + do j=oldfst,oldlst + w(wbegin+j-1) = work(wbegin+j-1)+sigma + end do + end if + ! process the current node. + newfst = oldfst + loop_140: do j = oldfst, oldlst + if( j==oldlst ) then + ! we are at the right end of the cluster, this is also the + ! boundary of the child cluster + newlst = j + else if ( wgap( wbegin + j -1)>=minrgp* abs( work(wbegin + j -1) ) ) & + then + ! the right relative gap is big enough, the child cluster + ! (newfst,..,newlst) is well separated from the following + newlst = j + else + ! inside a child cluster, the relative gap is not + ! big enough. + cycle loop_140 + end if + ! compute size of child cluster found + newsiz = newlst - newfst + 1 + ! newftt is the place in z where the new rrr or the computed + ! eigenvector is to be stored + if((dol==1).and.(dou==m)) then + ! store representation at location of the leftmost evalue + ! of the cluster + newftt = wbegin + newfst - 1 + else + if(wbegin+newfst-1dou) then + ! store representation at the right end of z array + newftt = dou + else + newftt = wbegin + newfst - 1 + endif + endif + if( newsiz>1) then + ! current child is not a singleton but a cluster. + ! compute and store new representation of child. + ! compute left and right cluster gap. + ! lgap and rgap are not computed from work because + ! the eigenvalue approximations may stem from rrrs + ! different shifts. however, w hold all eigenvalues + ! of the unshifted matrix. still, the entries in wgap + ! have to be computed from work since the entries + ! in w might be of the same order so that gaps are not + ! exhibited correctly for very close eigenvalues. + if( newfst==1 ) then + lgap = max( zero,w(wbegin)-werr(wbegin) - vl ) + else + lgap = wgap( wbegin+newfst-2 ) + endif + rgap = wgap( wbegin+newlst-1 ) + ! compute left- and rightmost eigenvalue of child + ! to high precision in order to shift as close + ! as possible and obtain as large relative gaps + ! as possible + do k =1,2 + if(k==1) then + p = indexw( wbegin-1+newfst ) + else + p = indexw( wbegin-1+newlst ) + endif + offset = indexw( wbegin ) - 1 + call stdlib_slarrb( in, d(ibegin),work( indlld+ibegin-1 ),p,p,rqtol, & + rqtol, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ),& + iwork( iindwk ), pivmin, spdiam,in, iinfo ) + end do + if((wbegin+newlst-1dou)) then + ! if the cluster contains no desired eigenvalues + ! skip the computation of that branch of the rep. tree + ! we could skip before the refinement of the extremal + ! eigenvalues of the child, but then the representation + ! tree could be different from the one when nothing is + ! skipped. for this reason we skip at this place. + idone = idone + newlst - newfst + 1 + goto 139 + endif + ! compute rrr of child cluster. + ! note that the new rrr is stored in z + ! stdlib_slarrf needs lwork = 2*n + call stdlib_slarrf( in, d( ibegin ), l( ibegin ),work(indld+ibegin-1),& + newfst, newlst, work(wbegin),wgap(wbegin), werr(wbegin),spdiam, lgap, & + rgap, pivmin, tau,z(ibegin, newftt),z(ibegin, newftt+1),work( indwrk ), & + iinfo ) + if( iinfo==0 ) then + ! a new rrr for the cluster was found by stdlib_slarrf + ! update shift and store it + ssigma = sigma + tau + z( iend, newftt+1 ) = ssigma + ! work() are the midpoints and werr() the semi-width + ! note that the entries in w are unchanged. + do k = newfst, newlst + fudge =three*eps*abs(work(wbegin+k-1)) + work( wbegin + k - 1 ) =work( wbegin + k - 1) - tau + fudge = fudge +four*eps*abs(work(wbegin+k-1)) + ! fudge errors + werr( wbegin + k - 1 ) =werr( wbegin + k - 1 ) + fudge + ! gaps are not fudged. provided that werr is small + ! when eigenvalues are close, a zero gap indicates + ! that a new representation is needed for resolving + ! the cluster. a fudge could lead to a wrong decision + ! of judging eigenvalues 'separated' which in + ! reality are not. this could have a negative impact + ! on the orthogonality of the computed eigenvectors. + end do + nclus = nclus + 1 + k = newcls + 2*nclus + iwork( k-1 ) = newfst + iwork( k ) = newlst + else + info = -2 + return + endif + else + ! compute eigenvector of singleton + iter = 0 + tol = four * log(real(in,KIND=sp)) * eps + k = newfst + windex = wbegin + k - 1 + windmn = max(windex - 1,1) + windpl = min(windex + 1,m) + lambda = work( windex ) + done = done + 1 + ! check if eigenvector computation is to be skipped + if((windexdou)) then + eskip = .true. + goto 125 + else + eskip = .false. + endif + left = work( windex ) - werr( windex ) + right = work( windex ) + werr( windex ) + indeig = indexw( windex ) + ! note that since we compute the eigenpairs for a child, + ! all eigenvalue approximations are w.r.t the same shift. + ! in this case, the entries in work should be used for + ! computing the gaps since they exhibit even very small + ! differences in the eigenvalues, as opposed to the + ! entries in w which might "look" the same. + if( k == 1) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vl, the formula + ! lgap = max( zero, (sigma - vl) + lambda ) + ! can lead to an overestimation of the left gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small left gap. + lgap = eps*max(abs(left),abs(right)) + else + lgap = wgap(windmn) + endif + if( k == im) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vu, the formula + ! can lead to an overestimation of the right gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small right gap. + rgap = eps*max(abs(left),abs(right)) + else + rgap = wgap(windex) + endif + gap = min( lgap, rgap ) + if(( k == 1).or.(k == im)) then + ! the eigenvector support can become wrong + ! because significant entries could be cut off due to a + ! large gaptol parameter in lar1v. prevent this. + gaptol = zero + else + gaptol = gap * eps + endif + isupmn = in + isupmx = 1 + ! update wgap so that it holds the minimum gap + ! to the left or the right. this is crucial in the + ! case where bisection is used to ensure that the + ! eigenvalue is refined up to the required precision. + ! the correct value is restored afterwards. + savgap = wgap(windex) + wgap(windex) = gap + ! we want to use the rayleigh quotient correction + ! as often as possible since it converges quadratically + ! when we are close enough to the desired eigenvalue. + ! however, the rayleigh quotient can have the wrong sign + ! and lead us away from the desired eigenvalue. in this + ! case, the best we can do is to use bisection. + usedbs = .false. + usedrq = .false. + ! bisection is initially turned off unless it is forced + needbs = .not.tryrqc + 120 continue + ! check if bisection should be used to refine eigenvalue + if(needbs) then + ! take the bisection as new iterate + usedbs = .true. + itmp1 = iwork( iindr+windex ) + offset = indexw( wbegin ) - 1 + call stdlib_slarrb( in, d(ibegin),work(indlld+ibegin-1),indeig,& + indeig,zero, two*eps, offset,work(wbegin),wgap(wbegin),werr(wbegin),& + work( indwrk ),iwork( iindwk ), pivmin, spdiam,itmp1, iinfo ) + if( iinfo/=0 ) then + info = -3 + return + endif + lambda = work( windex ) + ! reset twist index from inaccurate lambda to + ! force computation of true mingma + iwork( iindr+windex ) = 0 + endif + ! given lambda, compute the eigenvector. + call stdlib_slar1v( in, 1, in, lambda, d( ibegin ),l( ibegin ), work(& + indld+ibegin-1),work(indlld+ibegin-1),pivmin, gaptol, z( ibegin, windex & + ),.not.usedbs, negcnt, ztz, mingma,iwork( iindr+windex ), isuppz( & + 2*windex-1 ),nrminv, resid, rqcorr, work( indwrk ) ) + if(iter == 0) then + bstres = resid + bstw = lambda + elseif(residtol*gap .and. abs( rqcorr )>rqtol*abs( lambda ) .and. .not. & + usedbs)then + ! we need to check that the rqcorr update doesn't + ! move the eigenvalue away from the desired one and + ! towards a neighbor. -> protection with bisection + if(indeig<=negcnt) then + ! the wanted eigenvalue lies to the left + sgndef = -one + else + ! the wanted eigenvalue lies to the right + sgndef = one + endif + ! we only use the rqcorr if it improves the + ! the iterate reasonably. + if( ( rqcorr*sgndef>=zero ).and.( lambda + rqcorr<= right).and.( & + lambda + rqcorr>= left)) then + usedrq = .true. + ! store new midpoint of bisection interval in work + if(sgndef==one) then + ! the current lambda is on the left of the true + ! eigenvalue + left = lambda + ! we prefer to assume that the error estimate + ! is correct. we could make the interval not + ! as a bracket but to be modified if the rqcorr + ! chooses to. in this case, the right side should + ! be modified as follows: + ! right = max(right, lambda + rqcorr) + else + ! the current lambda is on the right of the true + ! eigenvalue + right = lambda + ! see comment about assuming the error estimate is + ! correct above. + ! left = min(left, lambda + rqcorr) + endif + work( windex ) =half * (right + left) + ! take rqcorr since it has the correct sign and + ! improves the iterate reasonably + lambda = lambda + rqcorr + ! update width of error interval + werr( windex ) =half * (right-left) + else + needbs = .true. + endif + if(right-leftzto) then + do ii = zto+1,isupmx + z( ii, windex ) = zero + end do + endif + call stdlib_sscal( zto-zfrom+1, nrminv,z( zfrom, windex ), 1 ) + 125 continue + ! update w + w( windex ) = lambda+sigma + ! recompute the gaps on the left and right + ! but only allow them to become larger and not + ! smaller (which can only happen through "bad" + ! cancellation and doesn't reflect the theory + ! where the initial gaps are underestimated due + ! to werr being too crude.) + if(.not.eskip) then + if( k>1) then + wgap( windmn ) = max( wgap(windmn),w(windex)-werr(windex)- w(& + windmn)-werr(windmn) ) + endif + if( windex SLASCL: multiplies the M by N real matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + + pure subroutine stdlib_slascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, lda, m, n + real(sp), intent(in) :: cfrom, cto + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: i, itype, j, k1, k2, k3, k4 + real(sp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( stdlib_lsame( type, 'G' ) ) then + itype = 0 + else if( stdlib_lsame( type, 'L' ) ) then + itype = 1 + else if( stdlib_lsame( type, 'U' ) ) then + itype = 2 + else if( stdlib_lsame( type, 'H' ) ) then + itype = 3 + else if( stdlib_lsame( type, 'B' ) ) then + itype = 4 + else if( stdlib_lsame( type, 'Q' ) ) then + itype = 5 + else if( stdlib_lsame( type, 'Z' ) ) then + itype = 6 + else + itype = -1 + end if + if( itype==-1 ) then + info = -1 + else if( cfrom==zero .or. stdlib_sisnan(cfrom) ) then + info = -4 + else if( stdlib_sisnan(cto) ) then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 .or. ( itype==4 .and. n/=m ) .or.( itype==5 .and. n/=m ) ) then + info = -7 + else if( itype<=3 .and. lda=4 ) then + if( kl<0 .or. kl>max( m-1, 0 ) ) then + info = -2 + else if( ku<0 .or. ku>max( n-1, 0 ) .or.( ( itype==4 .or. itype==5 ) .and. kl/=ku ) & + )then + info = -3 + else if( ( itype==4 .and. ldaabs( ctoc ) .and. ctoc/=zero ) then + mul = smlnum + done = .false. + cfromc = cfrom1 + else if( abs( cto1 )>abs( cfromc ) ) then + mul = bignum + done = .false. + ctoc = cto1 + else + mul = ctoc / cfromc + done = .true. + end if + end if + if( itype==0 ) then + ! full matrix + do j = 1, n + do i = 1, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==1 ) then + ! lower triangular matrix + do j = 1, n + do i = j, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==2 ) then + ! upper triangular matrix + do j = 1, n + do i = 1, min( j, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==3 ) then + ! upper hessenberg matrix + do j = 1, n + do i = 1, min( j+1, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==4 ) then + ! lower half of a symmetric band matrix + k3 = kl + 1 + k4 = n + 1 + do j = 1, n + do i = 1, min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==5 ) then + ! upper half of a symmetric band matrix + k1 = ku + 2 + k3 = ku + 1 + do j = 1, n + do i = max( k1-j, 1 ), k3 + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==6 ) then + ! band matrix + k1 = kl + ku + 2 + k2 = kl + 1 + k3 = 2*kl + ku + 1 + k4 = kl + ku + 1 + m + do j = 1, n + do i = max( k1-j, k2 ), min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + end if + if( .not.done )go to 10 + return + end subroutine stdlib_slascl + + !> This subroutine computes the square root of the I-th updated + !> eigenvalue of a positive symmetric rank-one modification to + !> a positive diagonal matrix whose entries are given as the squares + !> of the corresponding entries in the array d, and that + !> 0 <= D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) * diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + + pure subroutine stdlib_slasd4( n, i, d, z, delta, rho, sigma, work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i, n + integer(ilp), intent(out) :: info + real(sp), intent(in) :: rho + real(sp), intent(out) :: sigma + ! Array Arguments + real(sp), intent(in) :: d(*), z(*) + real(sp), intent(out) :: delta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 400 + + + ! Local Scalars + logical(lk) :: orgati, swtch, swtch3, geomavg + integer(ilp) :: ii, iim1, iip1, ip1, iter, j, niter + real(sp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, & + dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, & + temp, temp1, temp2, w + ! Local Arrays + real(sp) :: dd(3), zz(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! since this routine is called in an inner loop, we do no argument + ! checking. + ! quick return for n=1 and 2. + info = 0 + if( n==1 ) then + ! presumably, i=1 upon entry + sigma = sqrt( d( 1 )*d( 1 )+rho*z( 1 )*z( 1 ) ) + delta( 1 ) = one + work( 1 ) = one + return + end if + if( n==2 ) then + call stdlib_slasd5( i, d, z, delta, rho, sigma, work ) + return + end if + ! compute machine epsilon + eps = stdlib_slamch( 'EPSILON' ) + rhoinv = one / rho + tau2= zero + ! the case i = n + if( i==n ) then + ! initialize some basic variables + ii = n - 1 + niter = 1 + ! calculate initial guess + temp = rho / two + ! if ||z||_2 is not one, then temp should be set to + ! rho * ||z||_2^2 / two + temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) ) + do j = 1, n + work( j ) = d( j ) + d( n ) + temp1 + delta( j ) = ( d( j )-d( n ) ) - temp1 + end do + psi = zero + do j = 1, n - 2 + psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) ) + end do + c = rhoinv + psi + w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )& + *work( n ) ) + if( w<=zero ) then + temp1 = sqrt( d( n )*d( n )+rho ) + temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+& + temp1 ) ) ) +z( n )*z( n ) / rho + ! the following tau2 is to approximate + ! sigma_n^2 - d( n )*d( n ) + if( c<=temp ) then + tau = rho + else + delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) + a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) + b = z( n )*z( n )*delsq + if( a=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = eta - dtnsq + if( temp>rho )eta = rho + dtnsq + eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) + tau = tau + eta + sigma = sigma + eta + do j = 1, n + delta( j ) = delta( j ) - eta + work( j ) = work( j ) + eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, ii + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + tau2 = work( n )*delta( n ) + temp = z( n ) / tau2 + phi = z( n )*temp + dphi = temp*temp + erretm = eight*( -phi-psi ) + erretm - phi + rhoinv + ! $ + abs( tau2 )*( dpsi+dphi ) + w = rhoinv + phi + psi + ! main loop to update the values of the array delta + iter = niter + 1 + loop_90: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + go to 240 + end if + ! calculate the new step + dtnsq1 = work( n-1 )*delta( n-1 ) + dtnsq = work( n )*delta( n ) + c = w - dtnsq1*dpsi - dtnsq*dphi + a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi ) + b = dtnsq1*dtnsq*w + if( a>=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = eta - dtnsq + if( temp<=zero )eta = eta / two + eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) + tau = tau + eta + sigma = sigma + eta + do j = 1, n + delta( j ) = delta( j ) - eta + work( j ) = work( j ) + eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, ii + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + tau2 = work( n )*delta( n ) + temp = z( n ) / tau2 + phi = z( n )*temp + dphi = temp*temp + erretm = eight*( -phi-psi ) + erretm - phi + rhoinv + ! $ + abs( tau2 )*( dpsi+dphi ) + w = rhoinv + phi + psi + end do loop_90 + ! return with info = 1, niter = maxit and not converged + info = 1 + go to 240 + ! end for the case i = n + else + ! the case for i < n + niter = 1 + ip1 = i + 1 + ! calculate initial guess + delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) ) + delsq2 = delsq / two + sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two ) + temp = delsq2 / ( d( i )+sq2 ) + do j = 1, n + work( j ) = d( j ) + d( i ) + temp + delta( j ) = ( d( j )-d( i ) ) - temp + end do + psi = zero + do j = 1, i - 1 + psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) ) + end do + phi = zero + do j = n, i + 2, -1 + phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) ) + end do + c = rhoinv + psi + phi + w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )& + *delta( ip1 ) ) + geomavg = .false. + if( w>zero ) then + ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + ! we choose d(i) as origin. + orgati = .true. + ii = i + sglb = zero + sgub = delsq2 / ( d( i )+sq2 ) + a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 ) + b = z( i )*z( i )*delsq + if( a>zero ) then + tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + else + tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + end if + ! tau2 now is an estimation of sigma^2 - d( i )^2. the + ! following, however, is the corresponding estimation of + ! sigma - d( i ). + tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) ) + temp = sqrt(eps) + if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then + tau = min( ten*d(i), sgub ) + geomavg = .true. + end if + else + ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + ! we choose d(i+1) as origin. + orgati = .false. + ii = ip1 + sglb = -delsq2 / ( d( ii )+sq2 ) + sgub = zero + a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 ) + b = z( ip1 )*z( ip1 )*delsq + if( azero )swtch3 = .true. + end if + if( ii==1 .or. ii==n )swtch3 = .false. + temp = z( ii ) / ( work( ii )*delta( ii ) ) + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = w + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + ! test for convergence + if( abs( w )<=eps*erretm ) then + go to 240 + end if + if( w<=zero ) then + sglb = max( sglb, tau ) + else + sgub = min( sgub, tau ) + end if + ! calculate the new step + niter = niter + 1 + if( .not.swtch3 ) then + dtipsq = work( ip1 )*delta( ip1 ) + dtisq = work( i )*delta( i ) + if( orgati ) then + c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2 + else + c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2 + end if + a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw + b = dtipsq*dtisq*w + if( c==zero ) then + if( a==zero ) then + if( orgati ) then + a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi ) + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + dtiim = work( iim1 )*delta( iim1 ) + dtiip = work( iip1 )*delta( iip1 ) + temp = rhoinv + psi + phi + if( orgati ) then + temp1 = z( iim1 ) / dtiim + temp1 = temp1*temp1 + c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( & + iip1 ) )*temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + if( dpsi 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) + temp = tau + eta + if( temp>sgub .or. temp zero ) then + eta = sqrt(sgub*tau)-tau + end if + else + if( sglb > zero ) then + eta = sqrt(sglb*tau)-tau + end if + end if + end if + end if + prew = w + tau = tau + eta + sigma = sigma + eta + do j = 1, n + work( j ) = work( j ) + eta + delta( j ) = delta( j ) - eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, iim1 + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + dphi = zero + phi = zero + do j = n, iip1, -1 + temp = z( j ) / ( work( j )*delta( j ) ) + phi = phi + z( j )*temp + dphi = dphi + temp*temp + erretm = erretm + phi + end do + tau2 = work( ii )*delta( ii ) + temp = z( ii ) / tau2 + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = rhoinv + phi + psi + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + swtch = .false. + if( orgati ) then + if( -w>abs( prew ) / ten )swtch = .true. + else + if( w>abs( prew ) / ten )swtch = .true. + end if + ! main loop to update the values of the array delta and work + iter = niter + 1 + loop_230: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + ! $ .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then + go to 240 + end if + if( w<=zero ) then + sglb = max( sglb, tau ) + else + sgub = min( sgub, tau ) + end if + ! calculate the new step + if( .not.swtch3 ) then + dtipsq = work( ip1 )*delta( ip1 ) + dtisq = work( i )*delta( i ) + if( .not.swtch ) then + if( orgati ) then + c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2 + else + c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2 + end if + else + temp = z( ii ) / ( work( ii )*delta( ii ) ) + if( orgati ) then + dpsi = dpsi + temp*temp + else + dphi = dphi + temp*temp + end if + c = w - dtisq*dpsi - dtipsq*dphi + end if + a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw + b = dtipsq*dtisq*w + if( c==zero ) then + if( a==zero ) then + if( .not.swtch ) then + if( orgati ) then + a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) + end if + else + a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + dtiim = work( iim1 )*delta( iim1 ) + dtiip = work( iip1 )*delta( iip1 ) + temp = rhoinv + psi + phi + if( swtch ) then + c = temp - dtiim*dpsi - dtiip*dphi + zz( 1 ) = dtiim*dtiim*dpsi + zz( 3 ) = dtiip*dtiip*dphi + else + if( orgati ) then + temp1 = z( iim1 ) / dtiim + temp1 = temp1*temp1 + temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1 + c = temp - dtiip*( dpsi+dphi ) - temp2 + zz( 1 ) = z( iim1 )*z( iim1 ) + if( dpsi 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) + temp=tau+eta + if( temp>sgub .or. temp zero ) then + eta = sqrt(sgub*tau)-tau + end if + else + if( sglb > zero ) then + eta = sqrt(sglb*tau)-tau + end if + end if + end if + end if + prew = w + tau = tau + eta + sigma = sigma + eta + do j = 1, n + work( j ) = work( j ) + eta + delta( j ) = delta( j ) - eta + end do + ! evaluate psi and the derivative dpsi + dpsi = zero + psi = zero + erretm = zero + do j = 1, iim1 + temp = z( j ) / ( work( j )*delta( j ) ) + psi = psi + z( j )*temp + dpsi = dpsi + temp*temp + erretm = erretm + psi + end do + erretm = abs( erretm ) + ! evaluate phi and the derivative dphi + dphi = zero + phi = zero + do j = n, iip1, -1 + temp = z( j ) / ( work( j )*delta( j ) ) + phi = phi + z( j )*temp + dphi = dphi + temp*temp + erretm = erretm + phi + end do + tau2 = work( ii )*delta( ii ) + temp = z( ii ) / tau2 + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = rhoinv + phi + psi + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) + ! $ + abs( tau2 )*dw + if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch + end do loop_230 + ! return with info = 1, niter = maxit and not converged + info = 1 + end if + 240 continue + return + end subroutine stdlib_slasd4 + + !> SLASD7: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. There + !> are two ways in which deflation can occur: when two or more singular + !> values are close together or if there is a tiny entry in the Z + !> vector. For each such occurrence the order of the related + !> secular equation problem is reduced by one. + !> SLASD7 is called from SLASD6. + + pure subroutine stdlib_slasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & + beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: givptr, info, k + integer(ilp), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre + real(sp), intent(in) :: alpha, beta + real(sp), intent(out) :: c, s + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*) + integer(ilp), intent(inout) :: idxq(*) + real(sp), intent(inout) :: d(*), vf(*), vl(*) + real(sp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*) + + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 + real(sp) :: eps, hlftol, tau, tol, z1 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + m = n + sqre + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldgcoln )go to 90 + if( abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + idxp( k2 ) = j + else + ! check if singular values are close enough to allow deflation. + if( abs( d( j )-d( jprev ) )<=tol ) then + ! deflation is possible. + s = z( jprev ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_slapy2( c, s ) + z( j ) = tau + z( jprev ) = zero + c = c / tau + s = -s / tau + ! record the appropriate givens rotation + if( icompq==1 ) then + givptr = givptr + 1 + idxjp = idxq( idx( jprev )+1 ) + idxj = idxq( idx( j )+1 ) + if( idxjp<=nlp1 ) then + idxjp = idxjp - 1 + end if + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + givcol( givptr, 2 ) = idxjp + givcol( givptr, 1 ) = idxj + givnum( givptr, 2 ) = c + givnum( givptr, 1 ) = s + end if + call stdlib_srot( 1, vf( jprev ), 1, vf( j ), 1, c, s ) + call stdlib_srot( 1, vl( jprev ), 1, vl( j ), 1, c, s ) + k2 = k2 - 1 + idxp( k2 ) = jprev + jprev = j + else + k = k + 1 + zw( k ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + jprev = j + end if + end if + go to 80 + 90 continue + ! record the last singular value. + k = k + 1 + zw( k ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + 100 continue + ! sort the singular values into dsigma. the singular values which + ! were not deflated go into the first k slots of dsigma, except + ! that dsigma(1) is treated separately. + do j = 2, n + jp = idxp( j ) + dsigma( j ) = d( jp ) + vfw( j ) = vf( jp ) + vlw( j ) = vl( jp ) + end do + if( icompq==1 ) then + do j = 2, n + jp = idxp( j ) + perm( j ) = idxq( idx( jp )+1 ) + if( perm( j )<=nlp1 ) then + perm( j ) = perm( j ) - 1 + end if + end do + end if + ! the deflated singular values go back into the last n - k slots of + ! d. + call stdlib_scopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 ) + ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and + ! vl(m). + dsigma( 1 ) = zero + hlftol = tol / two + if( abs( dsigma( 2 ) )<=hlftol )dsigma( 2 ) = hlftol + if( m>n ) then + z( 1 ) = stdlib_slapy2( z1, z( m ) ) + if( z( 1 )<=tol ) then + c = one + s = zero + z( 1 ) = tol + else + c = z1 / z( 1 ) + s = -z( m ) / z( 1 ) + end if + call stdlib_srot( 1, vf( m ), 1, vf( 1 ), 1, c, s ) + call stdlib_srot( 1, vl( m ), 1, vl( 1 ), 1, c, s ) + else + if( abs( z1 )<=tol ) then + z( 1 ) = tol + else + z( 1 ) = z1 + end if + end if + ! restore z, vf, and vl. + call stdlib_scopy( k-1, zw( 2 ), 1, z( 2 ), 1 ) + call stdlib_scopy( n-1, vfw( 2 ), 1, vf( 2 ), 1 ) + call stdlib_scopy( n-1, vlw( 2 ), 1, vl( 2 ), 1 ) + return + end subroutine stdlib_slasd7 + + !> SLASD8: finds the square roots of the roots of the secular equation, + !> as defined by the values in DSIGMA and Z. It makes the appropriate + !> calls to SLASD4, and stores, for each element in D, the distance + !> to its two nearest poles (elements in DSIGMA). It also updates + !> the arrays VF and VL, the first and last components of all the + !> right singular vectors of the original bidiagonal matrix. + !> SLASD8 is called from SLASD6. + + pure subroutine stdlib_slasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, k, lddifr + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*) + real(sp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j + real(sp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( k<1 ) then + info = -2 + else if( lddifr SLASQ3: checks for deflation, computes a shift (TAU) and calls dqds. + !> In case of failure it changes shifts, and tries again until output + !> is positive. + + pure subroutine stdlib_slasq3( i0, n0, z, pp, dmin, sigma, desig, qmax, nfail,iter, ndiv, & + ieee, ttype, dmin1, dmin2, dn, dn1,dn2, g, tau ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ieee + integer(ilp), intent(in) :: i0 + integer(ilp), intent(inout) :: iter, n0, ndiv, nfail, pp + real(sp), intent(inout) :: desig, dmin1, dmin2, dn, dn1, dn2, g, qmax, tau + real(sp), intent(out) :: dmin, sigma + ! Array Arguments + real(sp), intent(inout) :: z(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: cbias = 1.50_sp + real(sp), parameter :: qurtr = 0.250_sp + real(sp), parameter :: hundrd = 100.0_sp + + + ! Local Scalars + integer(ilp) :: ipn4, j4, n0in, nn + integer(ilp), intent(inout) :: ttype + real(sp) :: eps, s, t, temp, tol, tol2 + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + n0in = n0 + eps = stdlib_slamch( 'PRECISION' ) + tol = eps*hundrd + tol2 = tol**2 + ! check for deflation. + 10 continue + if( n0tol2*( sigma+z( nn-3 ) ) .and.z( nn-2*pp-4 )>tol2*z( nn-7 ) )go to & + 30 + 20 continue + z( 4*n0-3 ) = z( 4*n0+pp-3 ) + sigma + n0 = n0 - 1 + go to 10 + ! check whether e(n0-2) is negligible, 2 eigenvalues. + 30 continue + if( z( nn-9 )>tol2*sigma .and.z( nn-2*pp-8 )>tol2*z( nn-11 ) )go to 50 + 40 continue + if( z( nn-3 )>z( nn-7 ) ) then + s = z( nn-3 ) + z( nn-3 ) = z( nn-7 ) + z( nn-7 ) = s + end if + t = half*( ( z( nn-7 )-z( nn-3 ) )+z( nn-5 ) ) + if( z( nn-5 )>z( nn-3 )*tol2.and.t/=zero ) then + s = z( nn-3 )*( z( nn-5 ) / t ) + if( s<=t ) then + s = z( nn-3 )*( z( nn-5 ) /( t*( one+sqrt( one+s / t ) ) ) ) + else + s = z( nn-3 )*( z( nn-5 ) / ( t+sqrt( t )*sqrt( t+s ) ) ) + end if + t = z( nn-7 ) + ( s+z( nn-5 ) ) + z( nn-3 ) = z( nn-3 )*( z( nn-7 ) / t ) + z( nn-7 ) = t + end if + z( 4*n0-7 ) = z( nn-7 ) + sigma + z( 4*n0-3 ) = z( nn-3 ) + sigma + n0 = n0 - 2 + go to 10 + 50 continue + if( pp==2 )pp = 0 + ! reverse the qd-array, if warranted. + if( dmin<=zero .or. n0 0. + 70 continue + call stdlib_slasq5( i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2, dn,dn1, dn2, ieee, & + eps ) + ndiv = ndiv + ( n0-i0+2 ) + iter = iter + 1 + ! check status. + if( dmin>=zero .and. dmin1>=zero ) then + ! success. + go to 90 + else if( dminzero .and.z( 4*( n0-1 )-pp )

0 )info = ierr + call stdlib_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( is, jsp1 ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( is, jsp1 ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_saxpy( js-1, rhs( 1 ), b( 1, js ), 1,f( is, 1 ), ldf ) + + call stdlib_saxpy( js-1, rhs( 2 ), b( 1, jsp1 ), 1,f( is, 1 ), ldf ) + + call stdlib_saxpy( js-1, rhs( 3 ), e( 1, js ), 1,f( is, 1 ), ldf ) + + call stdlib_saxpy( js-1, rhs( 4 ), e( 1, jsp1 ), 1,f( is, 1 ), ldf ) + + end if + if( i

0 )info = ierr + call stdlib_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( is, js ) = rhs( 1 ) + c( isp1, js ) = rhs( 2 ) + f( is, js ) = rhs( 3 ) + f( isp1, js ) = rhs( 4 ) + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_sger( mb, js-1, one, rhs( 1 ), 1, b( 1, js ),1, f( is, 1 ), & + ldf ) + call stdlib_sger( mb, js-1, one, rhs( 3 ), 1, e( 1, js ),1, f( is, 1 ), & + ldf ) + end if + if( i

0 )info = ierr + call stdlib_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + k = 1 + ii = mb*nb + 1 + do jj = 0, nb - 1 + call stdlib_scopy( mb, rhs( k ), 1, c( is, js+jj ), 1 ) + call stdlib_scopy( mb, rhs( ii ), 1, f( is, js+jj ), 1 ) + k = k + mb + ii = ii + mb + end do + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( j>p+2 ) then + call stdlib_sgemm( 'N', 'T', mb, js-1, nb, one,c( is, js ), ldc, b( 1, & + js ), ldb, one,f( is, 1 ), ldf ) + call stdlib_sgemm( 'N', 'T', mb, js-1, nb, one,f( is, js ), ldf, e( 1, & + js ), lde, one,f( is, 1 ), ldf ) + end if + if( i

STGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with real entries. (A, D) and (B, E) must be in + !> generalized (real) Schur canonical form, i.e. A, B are upper quasi + !> triangular and D, E are upper triangular. + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output + !> scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale b, where + !> Z is defined as + !> Z = [ kron(In, A) -kron(B**T, Im) ] (2) + !> [ kron(In, D) -kron(E**T, Im) ]. + !> Here Ik is the identity matrix of size k and X**T is the transpose of + !> X. kron(X, Y) is the Kronecker product between the matrices X and Y. + !> If TRANS = 'T', STGSYL solves the transposed system Z**T*y = scale*b, + !> which is equivalent to solve for R and L in + !> A**T * R + D**T * L = scale * C (3) + !> R * B**T + L * E**T = scale * -F + !> This case (TRANS = 'T') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using SLACON. + !> If IJOB >= 1, STGSYL computes a Frobenius norm-based estimate + !> of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. See [1-2] for more + !> information. + !> This is a level 3 BLAS algorithm. + + pure subroutine stdlib_stgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, dif, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n + integer(ilp), intent(out) :: info + real(sp), intent(out) :: dif, scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + real(sp), intent(inout) :: c(ldc,*), f(ldf,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_scopy by calls to stdlib_slaset. + ! sven hammarling, 1/5/02. + + ! Local Scalars + logical(lk) :: lquery, notran + integer(ilp) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, & + p, ppqq, pq, q + real(sp) :: dscale, dsum, scale2, scaloc + ! Intrinsic Functions + intrinsic :: max,real,sqrt + ! Executable Statements + ! decode and test input parameters + info = 0 + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>4 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda=3 ) then + ifunc = ijob - 2 + call stdlib_slaset( 'F', m, n, zero, zero, c, ldc ) + call stdlib_slaset( 'F', m, n, zero, zero, f, ldf ) + else if( ijob>=1 .and. notran ) then + isolve = 2 + end if + end if + if( ( mb<=1 .and. nb<=1 ) .or. ( mb>=m .and. nb>=n ) )then + loop_30: do iround = 1, isolve + ! use unblocked level 2 solver + dscale = zero + dsum = one + pq = 0 + call stdlib_stgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,& + ldf, scale, dsum, dscale,iwork, pq, info ) + if( dscale/=zero ) then + if( ijob==1 .or. ijob==3 ) then + dif = sqrt( real( 2*m*n,KIND=sp) ) / ( dscale*sqrt( dsum ) ) + else + dif = sqrt( real( pq,KIND=sp) ) / ( dscale*sqrt( dsum ) ) + end if + end if + if( isolve==2 .and. iround==1 ) then + if( notran ) then + ifunc = ijob + end if + scale2 = scale + call stdlib_slacpy( 'F', m, n, c, ldc, work, m ) + call stdlib_slacpy( 'F', m, n, f, ldf, work( m*n+1 ), m ) + call stdlib_slaset( 'F', m, n, zero, zero, c, ldc ) + call stdlib_slaset( 'F', m, n, zero, zero, f, ldf ) + else if( isolve==2 .and. iround==2 ) then + call stdlib_slacpy( 'F', m, n, work, m, c, ldc ) + call stdlib_slacpy( 'F', m, n, work( m*n+1 ), m, f, ldf ) + scale = scale2 + end if + end do loop_30 + return + end if + ! determine block structure of a + p = 0 + i = 1 + 40 continue + if( i>m )go to 50 + p = p + 1 + iwork( p ) = i + i = i + mb + if( i>=m )go to 50 + if( a( i, i-1 )/=zero )i = i + 1 + go to 40 + 50 continue + iwork( p+1 ) = m + 1 + if( iwork( p )==iwork( p+1 ) )p = p - 1 + ! determine block structure of b + q = p + 1 + j = 1 + 60 continue + if( j>n )go to 70 + q = q + 1 + iwork( q ) = j + j = j + nb + if( j>=n )go to 70 + if( b( j, j-1 )/=zero )j = j + 1 + go to 60 + 70 continue + iwork( q+1 ) = n + 1 + if( iwork( q )==iwork( q+1 ) )q = q - 1 + if( notran ) then + loop_150: do iround = 1, isolve + ! solve (i, j)-subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1,..., 1; j = 1, 2,..., q + dscale = zero + dsum = one + pq = 0 + scale = one + loop_130: do j = p + 2, q + js = iwork( j ) + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_120: do i = p, 1, -1 + is = iwork( i ) + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + ppqq = 0 + call stdlib_stgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), & + ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, & + scaloc, dsum, dscale,iwork( q+2 ), ppqq, linfo ) + if( linfo>0 )info = linfo + pq = pq + ppqq + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_sscal( is-1, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( is-1, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_sscal( m-ie, scaloc, c( ie+1, k ), 1 ) + call stdlib_sscal( m-ie, scaloc, f( ie+1, k ), 1 ) + end do + do k = je + 1, n + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! substitute r(i, j) and l(i, j) into remaining + ! equation. + if( i>1 ) then + call stdlib_sgemm( 'N', 'N', is-1, nb, mb, -one,a( 1, is ), lda, c( is, & + js ), ldc, one,c( 1, js ), ldc ) + call stdlib_sgemm( 'N', 'N', is-1, nb, mb, -one,d( 1, is ), ldd, c( is, & + js ), ldc, one,f( 1, js ), ldf ) + end if + if( j0 )info = linfo + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_sscal( is-1, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( is-1, scaloc, f( 1, k ), 1 ) + end do + do k = js, je + call stdlib_sscal( m-ie, scaloc, c( ie+1, k ), 1 ) + call stdlib_sscal( m-ie, scaloc, f( ie+1, k ), 1 ) + end do + do k = je + 1, n + call stdlib_sscal( m, scaloc, c( 1, k ), 1 ) + call stdlib_sscal( m, scaloc, f( 1, k ), 1 ) + end do + scale = scale*scaloc + end if + ! substitute r(i, j) and l(i, j) into remaining equation. + if( j>p+2 ) then + call stdlib_sgemm( 'N', 'T', mb, js-1, nb, one, c( is, js ),ldc, b( 1, js )& + , ldb, one, f( is, 1 ),ldf ) + call stdlib_sgemm( 'N', 'T', mb, js-1, nb, one, f( is, js ),ldf, e( 1, js )& + , lde, one, f( is, 1 ),ldf ) + end if + if( i

STPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_stpcon( norm, uplo, diag, n, ap, rcond, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: ap(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'STPCON', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + rcond = one + return + end if + rcond = zero + smlnum = stdlib_slamch( 'SAFE MINIMUM' )*real( max( 1, n ),KIND=sp) + ! compute the norm of the triangular matrix a. + anorm = stdlib_slantp( norm, uplo, diag, n, ap, work ) + ! continue only if anorm > 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_slacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_slatps( uplo, 'NO TRANSPOSE', diag, normin, n, ap,work, scale, & + work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_slatps( uplo, 'TRANSPOSE', diag, normin, n, ap,work, scale, work( & + 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_isamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale STPLQT2: computes a LQ a factorization of a real "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_stplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + real(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda STPMLQT: applies a real orthogonal matrix Q obtained from a + !> "triangular-pentagonal" real block reflector H to a general + !> real matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_stpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, mb, ldt + ! Array Arguments + real(sp), intent(in) :: v(ldv,*), t(ldt,*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, nb, lb, kf, ldaq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldaq = max( 1, k ) + else if ( right ) then + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( mb<1 .or. (mb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_stprfb( 'L', 'T', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + do i = 1, k, mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_stprfb( 'R', 'N', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. tran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_stprfb( 'L', 'N', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_stprfb( 'R', 'T', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_stpmlqt + + !> STPMQRT: applies a real orthogonal matrix Q obtained from a + !> "triangular-pentagonal" real block reflector H to a general + !> real matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_stpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt + ! Array Arguments + real(sp), intent(in) :: v(ldv,*), t(ldt,*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, mb, lb, kf, ldaq, ldvq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'T' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldvq = max( 1, m ) + ldaq = max( 1, k ) + else if ( right ) then + ldvq = max( 1, n ) + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( nb<1 .or. (nb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_stprfb( 'L', 'T', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + do i = 1, k, nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_stprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. notran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_stprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_stprfb( 'R', 'T', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_stpmqrt + + !> STPQRT2: computes a QR factorization of a real "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_stpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + real(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda STRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_strcon( norm, uplo, diag, n, a, lda, rcond, work,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(sp) :: ainvnm, anorm, scale, smlnum, xnorm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_slacn2( n, work( n+1 ), work, iwork, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_slatrs( uplo, 'NO TRANSPOSE', diag, normin, n, a,lda, work, scale,& + work( 2*n+1 ), info ) + else + ! multiply by inv(a**t). + call stdlib_slatrs( uplo, 'TRANSPOSE', diag, normin, n, a, lda,work, scale, & + work( 2*n+1 ), info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_isamax( n, work, 1 ) + xnorm = abs( work( ix ) ) + if( scale STZRZF: reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A + !> to upper triangular form by means of orthogonal transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper + !> triangular matrix. + + pure subroutine stdlib_stzrzf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iws, ki, kk, ldwork, lwkmin, lwkopt, m1, mu, nb, nbmin, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarzt( 'BACKWARD', 'ROWWISE', n-m, ib, a( i, m1 ),lda, tau( i ), & + work, ldwork ) + ! apply h to a(1:i-1,i:n) from the right + call stdlib_slarzb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', i-1, n-i+1,& + ib, n-m, a( i, m1 ),lda, work, ldwork, a( 1, i ), lda,work( ib+1 ), ldwork ) + + end if + end do + mu = i + nb - 1 + else + mu = m + end if + ! use unblocked code to factor the last or only block + if( mu>0 )call stdlib_slatrz( mu, n, n-m, a, lda, tau, work ) + work( 1 ) = lwkopt + return + end subroutine stdlib_stzrzf + + !> SGBSV: computes the solution to a real system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_sgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( kl<0 ) then + info = -2 + else if( ku<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( ldb SGBSVX: uses the LU factorization to compute the solution to a real + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a band matrix of order N with KL subdiagonals and KU + !> superdiagonals, and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_sgbsvx( fact, trans, n, kl, ku, nrhs, ab, ldab, afb,ldafb, ipiv, equed, r, & + c, b, ldb, x, ldx,rcond, ferr, berr, work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*), c(*), r(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + ! moved setting of info = n+1 so info does not subsequently get + ! overwritten. sven, 17 mar 05. + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j, j1, j2 + real(sp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kl<0 ) then + info = -4 + else if( ku<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -14 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + anorm = zero + do j = 1, info + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + anorm = max( anorm, abs( ab( i, j ) ) ) + end do + end do + rpvgrw = stdlib_slantb( 'M', 'U', 'N', info, min( info-1, kl+ku ),afb( max( 1, & + kl+ku+2-info ), 1 ), ldafb,work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = anorm / rpvgrw + end if + work( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_slangb( norm, n, kl, ku, ab, ldab, work ) + rpvgrw = stdlib_slantb( 'M', 'U', 'N', n, kl+ku, afb, ldafb, work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_slangb( 'M', n, kl, ku, ab, ldab, work ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_sgbcon( norm, n, kl, ku, afb, ldafb, ipiv, anorm, rcond,work, iwork, info ) + + ! compute the solution matrix x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_sgbtrs( trans, n, kl, ku, nrhs, afb, ldafb, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_sgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb, ipiv,b, ldb, x, ldx, & + ferr, berr, work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond SGEBAL: balances a general real matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + + pure subroutine stdlib_sgebal( job, n, a, lda, ilo, ihi, scale, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: scale(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: sclfac = 2.0e+0_sp + real(sp), parameter :: factor = 0.95e+0_sp + + + + ! Local Scalars + logical(lk) :: noconv + integer(ilp) :: i, ica, iexc, ira, j, k, l, m + real(sp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2 + ! Intrinsic Functions + intrinsic :: abs,max,min + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=g .or. max( f, c, ca )>=sfmax2 .or.min( r, g, ra )<=sfmin2 )go to 170 + f = f*sclfac + c = c*sclfac + ca = ca*sclfac + r = r / sclfac + g = g / sclfac + ra = ra / sclfac + go to 160 + 170 continue + g = c / sclfac + 180 continue + if( g=sfmax2 .or.min( f, c, g, ca )<=sfmin2 )go to 190 + if( stdlib_sisnan( c+f+ca+r+g+ra ) ) then + ! exit if nan to avoid infinite loop + info = -3 + call stdlib_xerbla( 'SGEBAL', -info ) + return + end if + f = f / sclfac + c = c / sclfac + g = g / sclfac + ca = ca / sclfac + r = r*sclfac + ra = ra*sclfac + go to 180 + ! now balance. + 190 continue + if( ( c+r )>=factor*s )cycle loop_200 + if( fone .and. scale( i )>one ) then + if( scale( i )>=sfmax1 / f )cycle loop_200 + end if + g = one / f + scale( i ) = scale( i )*f + noconv = .true. + call stdlib_sscal( n-k+1, g, a( i, k ), lda ) + call stdlib_sscal( l, f, a( 1, i ), 1 ) + end do loop_200 + if( noconv )go to 140 + 210 continue + ilo = k + ihi = l + return + end subroutine stdlib_sgebal + + !> SGEBD2: reduces a real general m by n matrix A to upper or lower + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_sgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! reduce to upper bidiagonal form + do i = 1, n + ! generate elementary reflector h(i) to annihilate a(i+1:m,i) + call stdlib_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,tauq( i ) ) + + d( i ) = a( i, i ) + a( i, i ) = one + ! apply h(i) to a(i:m,i+1:n) from the left + if( i SGEHD2: reduces a real general matrix A to upper Hessenberg form H by + !> an orthogonal similarity transformation: Q**T * A * Q = H . + + pure subroutine stdlib_sgehd2( n, ilo, ihi, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda SGELQ2: computes an LQ factorization of a real m-by-n matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a n-by-n orthogonal matrix; + !> L is a lower-triangular m-by-m matrix; + !> 0 is a m-by-(n-m) zero matrix, if m < n. + + pure subroutine stdlib_sgelq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGELQF: computes an LQ factorization of a real M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_sgelqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'SGELQF', ' ', m, n, -1, -1 ) + lwkopt = m*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb SGELQT3: recursively computes a LQ factorization of a real M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_sgelqt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, m1, m2, iinfo + ! Executable Statements + info = 0 + if( m < 0 ) then + info = -1 + else if( n < m ) then + info = -2 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, m ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SGELQT3', -info ) + return + end if + if( m==1 ) then + ! compute householder transform when m=1 + call stdlib_slarfg( n, a(1,1), a( 1, min( 2, n ) ), lda, t(1,1) ) + else + ! otherwise, split a into blocks... + m1 = m/2 + m2 = m-m1 + i1 = min( m1+1, m ) + j1 = min( m+1, n ) + ! compute a(1:m1,1:n) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_sgelqt3( m1, n, a, lda, t, ldt, iinfo ) + ! compute a(j1:m,1:n) = q1^h a(j1:m,1:n) [workspace: t(1:n1,j1:n)] + do i=1,m2 + do j=1,m1 + t( i+m1, j ) = a( i+m1, j ) + end do + end do + call stdlib_strmm( 'R', 'U', 'T', 'U', m2, m1, one,a, lda, t( i1, 1 ), ldt ) + call stdlib_sgemm( 'N', 'T', m2, m1, n-m1, one, a( i1, i1 ), lda,a( 1, i1 ), lda, & + one, t( i1, 1 ), ldt) + call stdlib_strmm( 'R', 'U', 'N', 'N', m2, m1, one,t, ldt, t( i1, 1 ), ldt ) + call stdlib_sgemm( 'N', 'N', m2, n-m1, m1, -one, t( i1, 1 ), ldt,a( 1, i1 ), lda, & + one, a( i1, i1 ), lda ) + call stdlib_strmm( 'R', 'U', 'N', 'U', m2, m1 , one,a, lda, t( i1, 1 ), ldt ) + + do i=1,m2 + do j=1,m1 + a( i+m1, j ) = a( i+m1, j ) - t( i+m1, j ) + t( i+m1, j )=0 + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_sgelqt3( m2, n-m1, a( i1, i1 ), lda,t( i1, i1 ), ldt, iinfo ) + ! compute t3 = t(j1:n1,1:n) = -t1 y1^h y2 t2 + do i=1,m2 + do j=1,m1 + t( j, i+m1 ) = (a( j, i+m1 )) + end do + end do + call stdlib_strmm( 'R', 'U', 'T', 'U', m1, m2, one,a( i1, i1 ), lda, t( 1, i1 ), & + ldt ) + call stdlib_sgemm( 'N', 'T', m1, m2, n-m, one, a( 1, j1 ), lda,a( i1, j1 ), lda, & + one, t( 1, i1 ), ldt ) + call stdlib_strmm( 'L', 'U', 'N', 'N', m1, m2, -one, t, ldt,t( 1, i1 ), ldt ) + + call stdlib_strmm( 'R', 'U', 'N', 'N', m1, m2, one,t( i1, i1 ), ldt, t( 1, i1 ), & + ldt ) + ! y = (y1,y2); l = [ l1 0 ]; t = [t1 t3] + ! [ a(1:n1,j1:n) l2 ] [ 0 t2] + end if + return + end subroutine stdlib_sgelqt3 + + !> SGEQL2: computes a QL factorization of a real m by n matrix A: + !> A = Q * L. + + pure subroutine stdlib_sgeql2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGEQLF: computes a QL factorization of a real M-by-N matrix A: + !> A = Q * L. + + pure subroutine stdlib_sgeqlf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h**t to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_slarfb( 'LEFT', 'TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-1, & + n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_sgeql2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_sgeqlf + + !> SGEQR2: computes a QR factorization of a real m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + pure subroutine stdlib_sgeqr2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGEQR2P: computes a QR factorization of a real m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + subroutine stdlib_sgeqr2p( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGEQRF: computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_sgeqrf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + k = min( m, n ) + info = 0 + nb = stdlib_ilaenv( 1, 'SGEQRF', ' ', m, n, -1, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb SGEQR2P computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + subroutine stdlib_sgeqrfp( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'SGEQRF', ' ', m, n, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb SGEQRT2: computes a QR factorization of a real M-by-N matrix A, + !> using the compact WY representation of Q. + + pure subroutine stdlib_sgeqrt2( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(sp) :: aii, alpha + ! Executable Statements + ! test the input arguments + info = 0 + if( n<0 ) then + info = -2 + else if( m t(i,1) + call stdlib_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,t( i, 1 ) ) + if( i SGEQRT3: recursively computes a QR factorization of a real M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_sgeqrt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, n1, n2, iinfo + ! Executable Statements + info = 0 + if( n < 0 ) then + info = -2 + else if( m < n ) then + info = -1 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, n ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SGEQRT3', -info ) + return + end if + if( n==1 ) then + ! compute householder transform when n=1 + call stdlib_slarfg( m, a(1,1), a( min( 2, m ), 1 ), 1, t(1,1) ) + else + ! otherwise, split a into blocks... + n1 = n/2 + n2 = n-n1 + j1 = min( n1+1, n ) + i1 = min( n+1, m ) + ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_sgeqrt3( m, n1, a, lda, t, ldt, iinfo ) + ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)] + do j=1,n2 + do i=1,n1 + t( i, j+n1 ) = a( i, j+n1 ) + end do + end do + call stdlib_strmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1, j1 ), ldt ) + call stdlib_sgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1 ), lda,a( j1, j1 ), lda, & + one, t( 1, j1 ), ldt) + call stdlib_strmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1, j1 ), ldt ) + call stdlib_sgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1 ), lda,t( 1, j1 ), ldt, & + one, a( j1, j1 ), lda ) + call stdlib_strmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1, j1 ), ldt ) + do j=1,n2 + do i=1,n1 + a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 ) + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_sgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo ) + ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2 + do i=1,n1 + do j=1,n2 + t( i, j+n1 ) = (a( j+n1, i )) + end do + end do + call stdlib_strmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1, j1 ), & + ldt ) + call stdlib_sgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1 ), lda,a( i1, j1 ), lda, & + one, t( 1, j1 ), ldt ) + call stdlib_strmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1, j1 ), ldt ) + + call stdlib_strmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1, j1 ), & + ldt ) + ! y = (y1,y2); r = [ r1 a(1:n1,j1:n) ]; t = [t1 t3] + ! [ 0 r2 ] [ 0 t2] + end if + return + end subroutine stdlib_sgeqrt3 + + !> SGERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + + pure subroutine stdlib_sgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + real(sp), intent(out) :: berr(*), ferr(*), work(*) + real(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transt + integer(ilp) :: count, i, j, k, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin, xk + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_sgetrs( trans, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + call stdlib_saxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_slacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_sgetrs( transt, n, 1, af, ldaf, ipiv, work( n+1 ),n, info ) + + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_sgetrs( trans, n, 1, af, ldaf, ipiv, work( n+1 ), n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_sgerfs + + !> SGERQ2: computes an RQ factorization of a real m by n matrix A: + !> A = R * Q. + + pure subroutine stdlib_sgerq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda SGERQF: computes an RQ factorization of a real M-by-N matrix A: + !> A = R * Q. + + pure subroutine stdlib_sgerqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_slarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_slarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-& + k+i+ib-1, ib,a( m-k+i, 1 ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_sgerq2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_sgerqf + + !> SGETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_sgetrf( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_sgetrf2( m, n, a, lda, ipiv, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks and test for exact + ! singularity. + call stdlib_sgetrf2( m-j+1, jb, a( j, j ), lda, ipiv( j ), iinfo ) + ! adjust info and the pivot indices. + if( info==0 .and. iinfo>0 )info = iinfo + j - 1 + do i = j, min( m, j+jb-1 ) + ipiv( i ) = j - 1 + ipiv( i ) + end do + ! apply interchanges to columns 1:j-1. + call stdlib_slaswp( j-1, a, lda, j, j+jb-1, ipiv, 1 ) + if( j+jb<=n ) then + ! apply interchanges to columns j+jb:n. + call stdlib_slaswp( n-j-jb+1, a( 1, j+jb ), lda, j, j+jb-1,ipiv, 1 ) + ! compute block row of u. + call stdlib_strsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, & + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_sgetrf + + !> SGGHD3: reduces a pair of real matrices (A,B) to generalized upper + !> Hessenberg form using orthogonal transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the orthogonal matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**T*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**T*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**T*x. + !> The orthogonal matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T + !> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T + !> If Q1 is the orthogonal matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then SGGHD3 reduces the original + !> problem to generalized Hessenberg form. + !> This is a blocked variant of SGGHRD, using matrix-matrix + !> multiplications for parts of the computation to enhance performance. + + pure subroutine stdlib_sgghd3( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: blk22, initq, initz, lquery, wantq, wantz + character :: compq2, compz2 + integer(ilp) :: cola, i, ierr, j, j0, jcol, jj, jrow, k, kacc22, len, lwkopt, n2nb, nb,& + nblst, nbmin, nh, nnb, nx, ppw, ppwo, pw, top, topq + real(sp) :: c, c1, c2, s, s1, s2, temp, temp1, temp2, temp3 + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'SGGHD3', ' ', n, ilo, ihi, -1 ) + lwkopt = max( 6*n*nb, 1 ) + work( 1 ) = real( lwkopt,KIND=sp) + initq = stdlib_lsame( compq, 'I' ) + wantq = initq .or. stdlib_lsame( compq, 'V' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi1 )call stdlib_slaset( 'LOWER', n-1, n-1, zero, zero, b(2, 1), ldb ) + ! quick return if possible + nh = ihi - ilo + 1 + if( nh<=1 ) then + work( 1 ) = one + return + end if + ! determine the blocksize. + nbmin = stdlib_ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi, -1 ) + if( nb>1 .and. nb=6*n*nbmin ) then + nb = lwork / ( 6*n ) + else + nb = 1 + end if + end if + end if + end if + if( nb=nh ) then + ! use unblocked code below + jcol = ilo + else + ! use blocked code + kacc22 = stdlib_ilaenv( 16, 'SGGHD3', ' ', n, ilo, ihi, -1 ) + blk22 = kacc22==2 + do jcol = ilo, ihi-2, nb + nnb = min( nb, ihi-jcol-1 ) + ! initialize small orthogonal factors that will hold the + ! accumulated givens rotations in workspace. + ! n2nb denotes the number of 2*nnb-by-2*nnb factors + ! nblst denotes the (possibly smaller) order of the last + ! factor. + n2nb = ( ihi-jcol-1 ) / nnb - 1 + nblst = ihi - jcol - n2nb*nnb + call stdlib_slaset( 'ALL', nblst, nblst, zero, one, work, nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_slaset( 'ALL', 2*nnb, 2*nnb, zero, one,work( pw ), 2*nnb ) + pw = pw + 4*nnb*nnb + end do + ! reduce columns jcol:jcol+nnb-1 of a to hessenberg form. + do j = jcol, jcol+nnb-1 + ! reduce jth column of a. store cosines and sines in jth + ! column of a and b, respectively. + do i = ihi, j+2, -1 + temp = a( i-1, j ) + call stdlib_slartg( temp, a( i, j ), c, s, a( i-1, j ) ) + a( i, j ) = c + b( i, j ) = s + end do + ! accumulate givens rotations into workspace array. + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + c = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + c = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + ! top denotes the number of top rows in a and b that will + ! not be updated during the next steps. + if( jcol<=2 ) then + top = 0 + else + top = jcol + end if + ! propagate transformations through b and replace stored + ! left sines/cosines by right sines/cosines. + do jj = n, j+1, -1 + ! update jjth column of b. + do i = min( jj+1, ihi ), j+2, -1 + c = a( i, j ) + s = b( i, j ) + temp = b( i, jj ) + b( i, jj ) = c*temp - s*b( i-1, jj ) + b( i-1, jj ) = s*temp + c*b( i-1, jj ) + end do + ! annihilate b( jj+1, jj ). + if( jj0 ) then + do i = jj, 1, -1 + call stdlib_srot( ihi-top, a( top+1, j+i+1 ), 1,a( top+1, j+i ), 1, a( & + j+1+i, j ),-b( j+1+i, j ) ) + end do + end if + ! update (j+1)th column of a by transformations from left. + if ( j < jcol + nnb - 1 ) then + len = 1 + j - jcol + ! multiply with the trailing accumulated orthogonal + ! matrix, which takes the form + ! [ u11 u12 ] + ! u = [ ], + ! [ u21 u22 ] + ! where u21 is a len-by-len matrix and u12 is lower + ! triangular. + jrow = ihi - nblst + 1 + call stdlib_sgemv( 'TRANSPOSE', nblst, len, one, work,nblst, a( jrow, j+1 )& + , 1, zero,work( pw ), 1 ) + ppw = pw + len + do i = jrow, jrow+nblst-len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_strmv( 'LOWER', 'TRANSPOSE', 'NON-UNIT',nblst-len, work( & + len*nblst + 1 ), nblst,work( pw+len ), 1 ) + call stdlib_sgemv( 'TRANSPOSE', len, nblst-len, one,work( (len+1)*nblst - & + len + 1 ), nblst,a( jrow+nblst-len, j+1 ), 1, one,work( pw+len ), 1 ) + + ppw = pw + do i = jrow, jrow+nblst-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ! multiply with the other accumulated orthogonal + ! matrices, which take the form + ! [ u11 u12 0 ] + ! [ ] + ! u = [ u21 u22 0 ], + ! [ ] + ! [ 0 0 i ] + ! where i denotes the (nnb-len)-by-(nnb-len) identity + ! matrix, u21 is a len-by-len upper triangular matrix + ! and u12 is an nnb-by-nnb lower triangular matrix. + ppwo = 1 + nblst*nblst + j0 = jrow - nnb + do jrow = j0, jcol+1, -nnb + ppw = pw + len + do i = jrow, jrow+nnb-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + ppw = pw + do i = jrow+nnb, jrow+nnb+len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_strmv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', len,work( ppwo + & + nnb ), 2*nnb, work( pw ),1 ) + call stdlib_strmv( 'LOWER', 'TRANSPOSE', 'NON-UNIT', nnb,work( ppwo + & + 2*len*nnb ),2*nnb, work( pw + len ), 1 ) + call stdlib_sgemv( 'TRANSPOSE', nnb, len, one,work( ppwo ), 2*nnb, a( & + jrow, j+1 ), 1,one, work( pw ), 1 ) + call stdlib_sgemv( 'TRANSPOSE', len, nnb, one,work( ppwo + 2*len*nnb + & + nnb ), 2*nnb,a( jrow+nnb, j+1 ), 1, one,work( pw+len ), 1 ) + ppw = pw + do i = jrow, jrow+len+nnb-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + ! apply accumulated orthogonal matrices to a. + cola = n - jcol - nnb + 1 + j = ihi - nblst + 1 + call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', nblst,cola, nblst, one, work, & + nblst,a( j, jcol+nnb ), lda, zero, work( pw ),nblst ) + call stdlib_slacpy( 'ALL', nblst, cola, work( pw ), nblst,a( j, jcol+nnb ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of + ! [ u11 u12 ] + ! u = [ ] + ! [ u21 u22 ], + ! where all blocks are nnb-by-nnb, u21 is upper + ! triangular and u12 is lower triangular. + call stdlib_sorm22( 'LEFT', 'TRANSPOSE', 2*nnb, cola, nnb,nnb, work( ppwo )& + , 2*nnb,a( j, jcol+nnb ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', 2*nnb,cola, 2*nnb, one, & + work( ppwo ), 2*nnb,a( j, jcol+nnb ), lda, zero, work( pw ),2*nnb ) + call stdlib_slacpy( 'ALL', 2*nnb, cola, work( pw ), 2*nnb,a( j, jcol+nnb ),& + lda ) + end if + ppwo = ppwo + 4*nnb*nnb + end do + ! apply accumulated orthogonal matrices to q. + if( wantq ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, one, q( & + topq, j ), ldq,work, nblst, zero, work( pw ), nh ) + call stdlib_slacpy( 'ALL', nh, nblst, work( pw ), nh,q( topq, j ), ldq ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_sorm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,q( topq, j ), ldq, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, one,& + q( topq, j ), ldq,work( ppwo ), 2*nnb, zero, work( pw ),nh ) + call stdlib_slacpy( 'ALL', nh, 2*nnb, work( pw ), nh,q( topq, j ), ldq ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! accumulate right givens rotations if required. + if ( wantz .or. top>0 ) then + ! initialize small orthogonal factors that will hold the + ! accumulated givens rotations in workspace. + call stdlib_slaset( 'ALL', nblst, nblst, zero, one, work,nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_slaset( 'ALL', 2*nnb, 2*nnb, zero, one,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! accumulate givens rotations into workspace array. + do j = jcol, jcol+nnb-1 + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + c = a( i, j ) + a( i, j ) = zero + s = b( i, j ) + b( i, j ) = zero + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + c = a( i, j ) + a( i, j ) = zero + s = b( i, j ) + b( i, j ) = zero + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = c*temp - s*work( jj ) + work( jj ) = s*temp + c*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end do + else + call stdlib_slaset( 'LOWER', ihi - jcol - 1, nnb, zero, zero,a( jcol + 2, & + jcol ), lda ) + call stdlib_slaset( 'LOWER', ihi - jcol - 1, nnb, zero, zero,b( jcol + 2, & + jcol ), ldb ) + end if + ! apply accumulated orthogonal matrices to a and b. + if ( top>0 ) then + j = ihi - nblst + 1 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, one, a( & + 1, j ), lda,work, nblst, zero, work( pw ), top ) + call stdlib_slacpy( 'ALL', top, nblst, work( pw ), top,a( 1, j ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_sorm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,a( 1, j ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + one, a( 1, j ), lda,work( ppwo ), 2*nnb, zero,work( pw ), top ) + call stdlib_slacpy( 'ALL', top, 2*nnb, work( pw ), top,a( 1, j ), lda ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + j = ihi - nblst + 1 + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, one, b( & + 1, j ), ldb,work, nblst, zero, work( pw ), top ) + call stdlib_slacpy( 'ALL', top, nblst, work( pw ), top,b( 1, j ), ldb ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_sorm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,b( 1, j ), ldb, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + one, b( 1, j ), ldb,work( ppwo ), 2*nnb, zero,work( pw ), top ) + call stdlib_slacpy( 'ALL', top, 2*nnb, work( pw ), top,b( 1, j ), ldb ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! apply accumulated orthogonal matrices to z. + if( wantz ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, one, z( & + topq, j ), ldz,work, nblst, zero, work( pw ), nh ) + call stdlib_slacpy( 'ALL', nh, nblst, work( pw ), nh,z( topq, j ), ldz ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_sorm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,z( topq, j ), ldz, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, one,& + z( topq, j ), ldz,work( ppwo ), 2*nnb, zero, work( pw ),nh ) + call stdlib_slacpy( 'ALL', nh, 2*nnb, work( pw ), nh,z( topq, j ), ldz ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + end if + ! use unblocked code to reduce the rest of the matrix + ! avoid re-initialization of modified q and z. + compq2 = compq + compz2 = compz + if ( jcol/=ilo ) then + if ( wantq )compq2 = 'V' + if ( wantz )compz2 = 'V' + end if + if ( jcol SGGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal + !> matrix, and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**T*(inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the + !> transpose of the matrix Z. + + pure subroutine stdlib_sggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'SGEQRF', ' ', n, m, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'SGERQF', ' ', n, p, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'SORMQR', ' ', n, m, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( p<0 ) then + info = -3 + else if( lda SGGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**T + !> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the + !> transpose of the matrix Z. + + pure subroutine stdlib_sggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'SGERQF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'SGEQRF', ' ', p, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'SORMRQ', ' ', m, n, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p)*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( p<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda SGTCON: estimates the reciprocal of the condition number of a real + !> tridiagonal matrix A using the LU factorization as computed by + !> SGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_sgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, iwork, info & + ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(sp), intent(in) :: anorm + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: d(*), dl(*), du(*), du2(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + integer(ilp) :: i, kase, kase1 + real(sp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input arguments. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm SGTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_sgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, x, & + ldx, ferr, berr, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: b(ldb,*), d(*), df(*), dl(*), dlf(*), du(*), du2(*), duf(*) + + real(sp), intent(out) :: berr(*), ferr(*), work(*) + real(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, kase, nz + real(sp) :: eps, lstres, s, safe1, safe2, safmin + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_sgttrs( trans, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, info ) + + call stdlib_saxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_slacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + kase = 0 + 70 continue + call stdlib_slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**t). + call stdlib_sgttrs( transt, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, & + info ) + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( n+i ) = work( i )*work( n+i ) + end do + call stdlib_sgttrs( transn, n, 1, dlf, df, duf, du2, ipiv,work( n+1 ), n, & + info ) + end if + go to 70 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_110 + return + end subroutine stdlib_sgtrfs + + !> SGTSVX: uses the LU factorization to compute the solution to a real + !> system of linear equations A * X = B or A**T * X = B, + !> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_sgtsvx( fact, trans, n, nrhs, dl, d, du, dlf, df, duf,du2, ipiv, b, & + ldb, x, ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: b(ldb,*), d(*), dl(*), du(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + real(sp), intent(inout) :: df(*), dlf(*), du2(*), duf(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact, notran + character :: norm + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + notran = stdlib_lsame( trans, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb1 ) then + call stdlib_scopy( n-1, dl, 1, dlf, 1 ) + call stdlib_scopy( n-1, du, 1, duf, 1 ) + end if + call stdlib_sgttrf( n, dlf, df, duf, du2, ipiv, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_slangt( norm, n, dl, d, du ) + ! compute the reciprocal of the condition number of a. + call stdlib_sgtcon( norm, n, dlf, df, duf, du2, ipiv, anorm, rcond, work,iwork, info ) + + ! compute the solution vectors x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_sgttrs( trans, n, nrhs, dlf, df, duf, du2, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_sgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2, ipiv,b, ldb, x, ldx, & + ferr, berr, work, iwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond SHGEQZ: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**T, B = Q1*T*Z1**T, + !> as computed by SGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**T, T = Q*P*Z**T, + !> where Q and Z are orthogonal matrices, P is an upper triangular + !> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 + !> diagonal blocks. + !> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair + !> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of + !> eigenvalues. + !> Additionally, the 2-by-2 upper triangular diagonal blocks of P + !> corresponding to 2-by-2 blocks of S are reduced to positive diagonal + !> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, + !> P(j,j) > 0, and P(j+1,j+1) > 0. + !> Optionally, the orthogonal matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> orthogonal matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the orthogonal factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Real eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + + subroutine stdlib_shgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alphar, alphai, & + beta, q, ldq, z, ldz, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz, job + integer(ilp), intent(in) :: ihi, ilo, ldh, ldq, ldt, ldz, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), work(*) + real(sp), intent(inout) :: h(ldh,*), q(ldq,*), t(ldt,*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: safety = 1.0e+2_sp + ! $ safety = one ) + + ! Local Scalars + logical(lk) :: ilazr2, ilazro, ilpivt, ilq, ilschr, ilz, lquery + integer(ilp) :: icompq, icompz, ifirst, ifrstm, iiter, ilast, ilastm, in, ischur, & + istart, j, jc, jch, jiter, jr, maxit + real(sp) :: a11, a12, a1i, a1r, a21, a22, a2i, a2r, ad11, ad11l, ad12, ad12l, ad21, & + ad21l, ad22, ad22l, ad32l, an, anorm, ascale, atol, b11, b1a, b1i, b1r, b22, b2a, b2i, & + b2r, bn, bnorm, bscale, btol, c, c11i, c11r, c12, c21, c22i, c22r, cl, cq, cr, cz, & + eshift, s, s1, s1inv, s2, safmax, safmin, scale, sl, sqi, sqr, sr, szi, szr, t1, tau, & + temp, temp2, tempi, tempr, u1, u12, u12l, u2, ulp, vs, w11, w12, w21, w22, wabs, wi, & + wr, wr2 + ! Local Arrays + real(sp) :: v(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! Executable Statements + ! decode job, compq, compz + if( stdlib_lsame( job, 'E' ) ) then + ilschr = .false. + ischur = 1 + else if( stdlib_lsame( job, 'S' ) ) then + ilschr = .true. + ischur = 2 + else + ischur = 0 + end if + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! check argument values + info = 0 + work( 1 ) = max( 1, n ) + lquery = ( lwork==-1 ) + if( ischur==0 ) then + info = -1 + else if( icompq==0 ) then + info = -2 + else if( icompz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi ilo )temp = temp + abs ( t( j - 1, j ) ) + if( abs( t( j, j ) )=btol ) then + if( jch+1>=ilast ) then + go to 80 + else + ifirst = jch + 1 + go to 110 + end if + end if + t( jch+1, jch+1 ) = zero + end do + go to 70 + else + ! only test 2 passed -- chase the zero to t(ilast,ilast) + ! then process as in the case t(ilast,ilast)=0 + do jch = j, ilast - 1 + temp = t( jch, jch+1 ) + call stdlib_slartg( temp, t( jch+1, jch+1 ), c, s,t( jch, jch+1 ) ) + + t( jch+1, jch+1 ) = zero + if( jchilast )ifrstm = ilo + end if + go to 350 + ! qz step + ! this iteration only involves rows/columns ifirst:ilast. we + ! assume ifirst < ilast, and that the diagonal of b is non-zero. + 110 continue + iiter = iiter + 1 + if( .not.ilschr ) then + ifrstm = ifirst + end if + ! compute single shifts. + ! at this point, ifirst < ilast, and the diagonal elements of + ! t(ifirst:ilast,ifirst,ilast) are larger than btol (in + ! magnitude) + if( ( iiter / 10 )*10==iiter ) then + ! exceptional shift. chosen for no particularly good reason. + ! (single shift only.) + if( ( real( maxit,KIND=sp)*safmin )*abs( h( ilast, ilast-1 ) ) abs( (wr2/s2)*t( & + ilast, ilast )- h( ilast, ilast ) ) ) then + temp = wr + wr = wr2 + wr2 = temp + temp = s1 + s1 = s2 + s2 = temp + end if + temp = max( s1, safmin*max( one, abs( wr ), abs( wi ) ) ) + if( wi/=zero )go to 200 + end if + ! fiddle with shift to avoid overflow + temp = min( ascale, one )*( half*safmax ) + if( s1>temp ) then + scale = temp / s1 + else + scale = one + end if + temp = min( bscale, one )*( half*safmax ) + if( abs( wr )>temp )scale = min( scale, temp / abs( wr ) ) + s1 = scale*s1 + wr = scale*wr + ! now check for two consecutive small subdiagonals. + do j = ilast - 1, ifirst + 1, -1 + istart = j + temp = abs( s1*h( j, j-1 ) ) + temp2 = abs( s1*h( j, j )-wr*t( j, j ) ) + tempr = max( temp, temp2 ) + if( tempristart ) then + temp = h( j, j-1 ) + call stdlib_slartg( temp, h( j+1, j-1 ), c, s, h( j, j-1 ) ) + h( j+1, j-1 ) = zero + end if + do jc = j, ilastm + temp = c*h( j, jc ) + s*h( j+1, jc ) + h( j+1, jc ) = -s*h( j, jc ) + c*h( j+1, jc ) + h( j, jc ) = temp + temp2 = c*t( j, jc ) + s*t( j+1, jc ) + t( j+1, jc ) = -s*t( j, jc ) + c*t( j+1, jc ) + t( j, jc ) = temp2 + end do + if( ilq ) then + do jr = 1, n + temp = c*q( jr, j ) + s*q( jr, j+1 ) + q( jr, j+1 ) = -s*q( jr, j ) + c*q( jr, j+1 ) + q( jr, j ) = temp + end do + end if + temp = t( j+1, j+1 ) + call stdlib_slartg( temp, t( j+1, j ), c, s, t( j+1, j+1 ) ) + t( j+1, j ) = zero + do jr = ifrstm, min( j+2, ilast ) + temp = c*h( jr, j+1 ) + s*h( jr, j ) + h( jr, j ) = -s*h( jr, j+1 ) + c*h( jr, j ) + h( jr, j+1 ) = temp + end do + do jr = ifrstm, j + temp = c*t( jr, j+1 ) + s*t( jr, j ) + t( jr, j ) = -s*t( jr, j+1 ) + c*t( jr, j ) + t( jr, j+1 ) = temp + end do + if( ilz ) then + do jr = 1, n + temp = c*z( jr, j+1 ) + s*z( jr, j ) + z( jr, j ) = -s*z( jr, j+1 ) + c*z( jr, j ) + z( jr, j+1 ) = temp + end do + end if + end do loop_190 + go to 350 + ! use francis double-shift + ! note: the francis double-shift should work with real shifts, + ! but only if the block is at least 3x3. + ! this code may break if this point is reached with + ! a 2x2 block with real eigenvalues. + 200 continue + if( ifirst+1==ilast ) then + ! special case -- 2x2 block with complex eigenvectors + ! step 1: standardize, that is, rotate so that + ! ( b11 0 ) + ! b = ( ) with b11 non-negative. + ! ( 0 b22 ) + call stdlib_slasv2( t( ilast-1, ilast-1 ), t( ilast-1, ilast ),t( ilast, ilast ),& + b22, b11, sr, cr, sl, cl ) + if( b11 unfl ) + ! __ + ! (sa - wb) ( cz -sz ) + ! ( sz cz ) + c11r = s1*a11 - wr*b11 + c11i = -wi*b11 + c12 = s1*a12 + c21 = s1*a21 + c22r = s1*a22 - wr*b22 + c22i = -wi*b22 + if( abs( c11r )+abs( c11i )+abs( c12 )>abs( c21 )+abs( c22r )+abs( c22i ) ) & + then + t1 = stdlib_slapy3( c12, c11r, c11i ) + cz = c12 / t1 + szr = -c11r / t1 + szi = -c11i / t1 + else + cz = stdlib_slapy2( c22r, c22i ) + if( cz<=safmin ) then + cz = zero + szr = one + szi = zero + else + tempr = c22r / cz + tempi = c22i / cz + t1 = stdlib_slapy2( cz, c21 ) + cz = cz / t1 + szr = -c21*tempr / t1 + szi = c21*tempi / t1 + end if + end if + ! compute givens rotation on left + ! ( cq sq ) + ! ( __ ) a or b + ! ( -sq cq ) + an = abs( a11 ) + abs( a12 ) + abs( a21 ) + abs( a22 ) + bn = abs( b11 ) + abs( b22 ) + wabs = abs( wr ) + abs( wi ) + if( s1*an>wabs*bn ) then + cq = cz*b11 + sqr = szr*b22 + sqi = -szi*b22 + else + a1r = cz*a11 + szr*a12 + a1i = szi*a12 + a2r = cz*a21 + szr*a22 + a2i = szi*a22 + cq = stdlib_slapy2( a1r, a1i ) + if( cq<=safmin ) then + cq = zero + sqr = one + sqi = zero + else + tempr = a1r / cq + tempi = a1i / cq + sqr = tempr*a2r + tempi*a2i + sqi = tempi*a2r - tempr*a2i + end if + end if + t1 = stdlib_slapy3( cq, sqr, sqi ) + cq = cq / t1 + sqr = sqr / t1 + sqi = sqi / t1 + ! compute diagonal elements of qbz + tempr = sqr*szr - sqi*szi + tempi = sqr*szi + sqi*szr + b1r = cq*cz*b11 + tempr*b22 + b1i = tempi*b22 + b1a = stdlib_slapy2( b1r, b1i ) + b2r = cq*cz*b22 + tempr*b11 + b2i = -tempi*b11 + b2a = stdlib_slapy2( b2r, b2i ) + ! normalize so beta > 0, and im( alpha1 ) > 0 + beta( ilast-1 ) = b1a + beta( ilast ) = b2a + alphar( ilast-1 ) = ( wr*b1a )*s1inv + alphai( ilast-1 ) = ( wi*b1a )*s1inv + alphar( ilast ) = ( wr*b2a )*s1inv + alphai( ilast ) = -( wi*b2a )*s1inv + ! step 3: go to next block -- exit if finished. + ilast = ifirst - 1 + if( ilastilast )ifrstm = ilo + end if + go to 350 + else + ! usual case: 3x3 or larger block, using francis implicit + ! double-shift + ! 2 + ! eigenvalue equation is w - c w + d = 0, + ! -1 2 -1 + ! so compute 1st column of (a b ) - c a b + d + ! using the formula in qzit (from eispack) + ! we assume that the block is at least 3x3 + ad11 = ( ascale*h( ilast-1, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad21 = ( ascale*h( ilast, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad12 = ( ascale*h( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad22 = ( ascale*h( ilast, ilast ) ) /( bscale*t( ilast, ilast ) ) + u12 = t( ilast-1, ilast ) / t( ilast, ilast ) + ad11l = ( ascale*h( ifirst, ifirst ) ) /( bscale*t( ifirst, ifirst ) ) + ad21l = ( ascale*h( ifirst+1, ifirst ) ) /( bscale*t( ifirst, ifirst ) ) + ad12l = ( ascale*h( ifirst, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + ad22l = ( ascale*h( ifirst+1, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + ad32l = ( ascale*h( ifirst+2, ifirst+1 ) ) /( bscale*t( ifirst+1, ifirst+1 ) ) + + u12l = t( ifirst, ifirst+1 ) / t( ifirst+1, ifirst+1 ) + v( 1 ) = ( ad11-ad11l )*( ad22-ad11l ) - ad12*ad21 +ad21*u12*ad11l + ( ad12l-& + ad11l*u12l )*ad21l + v( 2 ) = ( ( ad22l-ad11l )-ad21l*u12l-( ad11-ad11l )-( ad22-ad11l )+ad21*u12 )& + *ad21l + v( 3 ) = ad32l*ad21l + istart = ifirst + call stdlib_slarfg( 3, v( 1 ), v( 2 ), 1, tau ) + v( 1 ) = one + ! sweep + loop_290: do j = istart, ilast - 2 + ! all but last elements: use 3x3 householder transforms. + ! zero (j-1)st column of a + if( j>istart ) then + v( 1 ) = h( j, j-1 ) + v( 2 ) = h( j+1, j-1 ) + v( 3 ) = h( j+2, j-1 ) + call stdlib_slarfg( 3, h( j, j-1 ), v( 2 ), 1, tau ) + v( 1 ) = one + h( j+1, j-1 ) = zero + h( j+2, j-1 ) = zero + end if + do jc = j, ilastm + temp = tau*( h( j, jc )+v( 2 )*h( j+1, jc )+v( 3 )*h( j+2, jc ) ) + h( j, jc ) = h( j, jc ) - temp + h( j+1, jc ) = h( j+1, jc ) - temp*v( 2 ) + h( j+2, jc ) = h( j+2, jc ) - temp*v( 3 ) + temp2 = tau*( t( j, jc )+v( 2 )*t( j+1, jc )+v( 3 )*t( j+2, jc ) ) + t( j, jc ) = t( j, jc ) - temp2 + t( j+1, jc ) = t( j+1, jc ) - temp2*v( 2 ) + t( j+2, jc ) = t( j+2, jc ) - temp2*v( 3 ) + end do + if( ilq ) then + do jr = 1, n + temp = tau*( q( jr, j )+v( 2 )*q( jr, j+1 )+v( 3 )*q( jr, j+2 ) ) + + q( jr, j ) = q( jr, j ) - temp + q( jr, j+1 ) = q( jr, j+1 ) - temp*v( 2 ) + q( jr, j+2 ) = q( jr, j+2 ) - temp*v( 3 ) + end do + end if + ! zero j-th column of b (see slagbc for details) + ! swap rows to pivot + ilpivt = .false. + temp = max( abs( t( j+1, j+1 ) ), abs( t( j+1, j+2 ) ) ) + temp2 = max( abs( t( j+2, j+1 ) ), abs( t( j+2, j+2 ) ) ) + if( max( temp, temp2 )=temp2 ) then + w11 = t( j+1, j+1 ) + w21 = t( j+2, j+1 ) + w12 = t( j+1, j+2 ) + w22 = t( j+2, j+2 ) + u1 = t( j+1, j ) + u2 = t( j+2, j ) + else + w21 = t( j+1, j+1 ) + w11 = t( j+2, j+1 ) + w22 = t( j+1, j+2 ) + w12 = t( j+2, j+2 ) + u2 = t( j+1, j ) + u1 = t( j+2, j ) + end if + ! swap columns if nec. + if( abs( w12 )>abs( w11 ) ) then + ilpivt = .true. + temp = w12 + temp2 = w22 + w12 = w11 + w22 = w21 + w11 = temp + w21 = temp2 + end if + ! lu-factor + temp = w21 / w11 + u2 = u2 - temp*u1 + w22 = w22 - temp*w12 + w21 = zero + ! compute scale + scale = one + if( abs( w22 ) SLABRD: reduces the first NB rows and columns of a real general + !> m by n matrix A to upper or lower bidiagonal form by an orthogonal + !> transformation Q**T * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by SGEBRD + + pure subroutine stdlib_slabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldx, ldy, m, n, nb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), x(ldx,*), y(ldy,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( m>=n ) then + ! reduce to upper bidiagonal form + loop_10: do i = 1, nb + ! update a(i:m,i) + call stdlib_sgemv( 'NO TRANSPOSE', m-i+1, i-1, -one, a( i, 1 ),lda, y( i, 1 ), & + ldy, one, a( i, i ), 1 ) + call stdlib_sgemv( 'NO TRANSPOSE', m-i+1, i-1, -one, x( i, 1 ),ldx, a( 1, i ), 1,& + one, a( i, i ), 1 ) + ! generate reflection q(i) to annihilate a(i+1:m,i) + call stdlib_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,tauq( i ) ) + + d( i ) = a( i, i ) + if( i SLADIV: performs complex division in real arithmetic + !> a + i*b + !> p + i*q = --------- + !> c + i*d + !> The algorithm is due to Michael Baudin and Robert L. Smith + !> and can be found in the paper + !> "A Robust Complex Division in Scilab" + + pure subroutine stdlib_sladiv( a, b, c, d, p, q ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: a, b, c, d + real(sp), intent(out) :: p, q + ! ===================================================================== + ! Parameters + real(sp), parameter :: bs = 2.0_sp + + + + ! Local Scalars + real(sp) :: aa, bb, cc, dd, ab, cd, s, ov, un, be, eps + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + aa = a + bb = b + cc = c + dd = d + ab = max( abs(a), abs(b) ) + cd = max( abs(c), abs(d) ) + s = one + ov = stdlib_slamch( 'OVERFLOW THRESHOLD' ) + un = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'EPSILON' ) + be = bs / (eps*eps) + if( ab >= half*ov ) then + aa = half * aa + bb = half * bb + s = two * s + end if + if( cd >= half*ov ) then + cc = half * cc + dd = half * dd + s = half * s + end if + if( ab <= un*bs/eps ) then + aa = aa * be + bb = bb * be + s = s / be + end if + if( cd <= un*bs/eps ) then + cc = cc * be + dd = dd * be + s = s * be + end if + if( abs( d )<=abs( c ) ) then + call stdlib_sladiv1(aa, bb, cc, dd, p, q) + else + call stdlib_sladiv1(bb, aa, dd, cc, p, q) + q = -q + end if + p = p * s + q = q * s + return + end subroutine stdlib_sladiv + + !> This subroutine computes the I-th updated eigenvalue of a symmetric + !> rank-one modification to a diagonal matrix whose elements are + !> given in the array d, and that + !> D(i) < D(j) for i < j + !> and that RHO > 0. This is arranged by the calling routine, and is + !> no loss in generality. The rank-one modified system is thus + !> diag( D ) + RHO * Z * Z_transpose. + !> where we assume the Euclidean norm of Z is 1. + !> The method consists of approximating the rational functions in the + !> secular equation by simpler interpolating rational functions. + + pure subroutine stdlib_slaed4( n, i, d, z, delta, rho, dlam, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: i, n + integer(ilp), intent(out) :: info + real(sp), intent(out) :: dlam + real(sp), intent(in) :: rho + ! Array Arguments + real(sp), intent(in) :: d(*), z(*) + real(sp), intent(out) :: delta(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + ! Local Scalars + logical(lk) :: orgati, swtch, swtch3 + integer(ilp) :: ii, iim1, iip1, ip1, iter, j, niter + real(sp) :: a, b, c, del, dltlb, dltub, dphi, dpsi, dw, eps, erretm, eta, midpt, phi, & + prew, psi, rhoinv, tau, temp, temp1, w + ! Local Arrays + real(sp) :: zz(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! since this routine is called in an inner loop, we do no argument + ! checking. + ! quick return for n=1 and 2. + info = 0 + if( n==1 ) then + ! presumably, i=1 upon entry + dlam = d( 1 ) + rho*z( 1 )*z( 1 ) + delta( 1 ) = one + return + end if + if( n==2 ) then + call stdlib_slaed5( i, d, z, delta, rho, dlam ) + return + end if + ! compute machine epsilon + eps = stdlib_slamch( 'EPSILON' ) + rhoinv = one / rho + ! the case i = n + if( i==n ) then + ! initialize some basic variables + ii = n - 1 + niter = 1 + ! calculate initial guess + midpt = rho / two + ! if ||z||_2 is not one, then temp should be set to + ! rho * ||z||_2^2 / two + do j = 1, n + delta( j ) = ( d( j )-d( i ) ) - midpt + end do + psi = zero + do j = 1, n - 2 + psi = psi + z( j )*z( j ) / delta( j ) + end do + c = rhoinv + psi + w = c + z( ii )*z( ii ) / delta( ii ) +z( n )*z( n ) / delta( n ) + if( w<=zero ) then + temp = z( n-1 )*z( n-1 ) / ( d( n )-d( n-1 )+rho ) +z( n )*z( n ) / rho + if( c<=temp ) then + tau = rho + else + del = d( n ) - d( n-1 ) + a = -c*del + z( n-1 )*z( n-1 ) + z( n )*z( n ) + b = z( n )*z( n )*del + if( a=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = tau + eta + if( temp>dltub .or. temp=zero ) then + eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>zero )eta = -w / ( dpsi+dphi ) + temp = tau + eta + if( temp>dltub .or. tempzero ) then + ! d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + ! we choose d(i) as origin. + orgati = .true. + a = c*del + z( i )*z( i ) + z( ip1 )*z( ip1 ) + b = z( i )*z( i )*del + if( a>zero ) then + tau = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + else + tau = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + end if + dltlb = zero + dltub = midpt + else + ! (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + ! we choose d(i+1) as origin. + orgati = .false. + a = c*del - z( i )*z( i ) - z( ip1 )*z( ip1 ) + b = z( ip1 )*z( ip1 )*del + if( azero )swtch3 = .true. + end if + if( ii==1 .or. ii==n )swtch3 = .false. + temp = z( ii ) / delta( ii ) + dw = dpsi + dphi + temp*temp + temp = z( ii )*temp + w = w + temp + erretm = eight*( phi-psi ) + erretm + two*rhoinv +three*abs( temp ) + abs( tau )& + *dw + ! test for convergence + if( abs( w )<=eps*erretm ) then + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + go to 250 + end if + if( w<=zero ) then + dltlb = max( dltlb, tau ) + else + dltub = min( dltub, tau ) + end if + ! calculate the new step + niter = niter + 1 + if( .not.swtch3 ) then + if( orgati ) then + c = w - delta( ip1 )*dw - ( d( i )-d( ip1 ) )*( z( i ) / delta( i ) )& + **2 + else + c = w - delta( i )*dw - ( d( ip1 )-d( i ) )*( z( ip1 ) / delta( ip1 ) )& + **2 + end if + a = ( delta( i )+delta( ip1 ) )*w -delta( i )*delta( ip1 )*dw + b = delta( i )*delta( ip1 )*w + if( c==zero ) then + if( a==zero ) then + if( orgati ) then + a = z( i )*z( i ) + delta( ip1 )*delta( ip1 )*( dpsi+dphi ) + else + a = z( ip1 )*z( ip1 ) + delta( i )*delta( i )*( dpsi+dphi ) + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + temp = rhoinv + psi + phi + if( orgati ) then + temp1 = z( iim1 ) / delta( iim1 ) + temp1 = temp1*temp1 + c = temp - delta( iip1 )*( dpsi+dphi ) -( d( iim1 )-d( iip1 ) )*temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + zz( 3 ) = delta( iip1 )*delta( iip1 )*( ( dpsi-temp1 )+dphi ) + else + temp1 = z( iip1 ) / delta( iip1 ) + temp1 = temp1*temp1 + c = temp - delta( iim1 )*( dpsi+dphi ) -( d( iip1 )-d( iim1 ) )*temp1 + zz( 1 ) = delta( iim1 )*delta( iim1 )*( dpsi+( dphi-temp1 ) ) + zz( 3 ) = z( iip1 )*z( iip1 ) + end if + zz( 2 ) = z( ii )*z( ii ) + call stdlib_slaed6( niter, orgati, c, delta( iim1 ), zz, w, eta,info ) + if( info/=0 )go to 250 + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + temp = tau + eta + if( temp>dltub .or. tempabs( prew ) / ten )swtch = .true. + else + if( w>abs( prew ) / ten )swtch = .true. + end if + tau = tau + eta + ! main loop to update the values of the array delta + iter = niter + 1 + loop_240: do niter = iter, maxit + ! test for convergence + if( abs( w )<=eps*erretm ) then + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + go to 250 + end if + if( w<=zero ) then + dltlb = max( dltlb, tau ) + else + dltub = min( dltub, tau ) + end if + ! calculate the new step + if( .not.swtch3 ) then + if( .not.swtch ) then + if( orgati ) then + c = w - delta( ip1 )*dw -( d( i )-d( ip1 ) )*( z( i ) / delta( i ) )& + **2 + else + c = w - delta( i )*dw - ( d( ip1 )-d( i ) )*( z( ip1 ) / delta( ip1 ) )& + **2 + end if + else + temp = z( ii ) / delta( ii ) + if( orgati ) then + dpsi = dpsi + temp*temp + else + dphi = dphi + temp*temp + end if + c = w - delta( i )*dpsi - delta( ip1 )*dphi + end if + a = ( delta( i )+delta( ip1 ) )*w -delta( i )*delta( ip1 )*dw + b = delta( i )*delta( ip1 )*w + if( c==zero ) then + if( a==zero ) then + if( .not.swtch ) then + if( orgati ) then + a = z( i )*z( i ) + delta( ip1 )*delta( ip1 )*( dpsi+dphi ) + + else + a = z( ip1 )*z( ip1 ) +delta( i )*delta( i )*( dpsi+dphi ) + end if + else + a = delta( i )*delta( i )*dpsi +delta( ip1 )*delta( ip1 )& + *dphi + end if + end if + eta = b / a + else if( a<=zero ) then + eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) + else + eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) + end if + else + ! interpolation using three most relevant poles + temp = rhoinv + psi + phi + if( swtch ) then + c = temp - delta( iim1 )*dpsi - delta( iip1 )*dphi + zz( 1 ) = delta( iim1 )*delta( iim1 )*dpsi + zz( 3 ) = delta( iip1 )*delta( iip1 )*dphi + else + if( orgati ) then + temp1 = z( iim1 ) / delta( iim1 ) + temp1 = temp1*temp1 + c = temp - delta( iip1 )*( dpsi+dphi ) -( d( iim1 )-d( iip1 ) )& + *temp1 + zz( 1 ) = z( iim1 )*z( iim1 ) + zz( 3 ) = delta( iip1 )*delta( iip1 )*( ( dpsi-temp1 )+dphi ) + else + temp1 = z( iip1 ) / delta( iip1 ) + temp1 = temp1*temp1 + c = temp - delta( iim1 )*( dpsi+dphi ) -( d( iip1 )-d( iim1 ) )& + *temp1 + zz( 1 ) = delta( iim1 )*delta( iim1 )*( dpsi+( dphi-temp1 ) ) + zz( 3 ) = z( iip1 )*z( iip1 ) + end if + end if + call stdlib_slaed6( niter, orgati, c, delta( iim1 ), zz, w, eta,info ) + if( info/=0 )go to 250 + end if + ! note, eta should be positive if w is negative, and + ! eta should be negative otherwise. however, + ! if for some reason caused by roundoff, eta*w > 0, + ! we simply use one newton step instead. this way + ! will guarantee eta*w < 0. + if( w*eta>=zero )eta = -w / dw + temp = tau + eta + if( temp>dltub .or. tempzero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch + end do loop_240 + ! return with info = 1, niter = maxit and not converged + info = 1 + if( orgati ) then + dlam = d( i ) + tau + else + dlam = d( ip1 ) + tau + end if + end if + 250 continue + return + end subroutine stdlib_slaed4 + + !> SLAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_slaed8( icompq, k, n, qsiz, d, q, ldq, indxq, rho,cutpnt, z, dlamda, & + q2, ldq2, w, perm, givptr,givcol, givnum, indxp, indx, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, icompq, ldq, ldq2, n, qsiz + integer(ilp), intent(out) :: givptr, info, k + real(sp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: givcol(2,*), indx(*), indxp(*), perm(*) + integer(ilp), intent(inout) :: indxq(*) + real(sp), intent(inout) :: d(*), q(ldq,*), z(*) + real(sp), intent(out) :: dlamda(*), givnum(2,*), q2(ldq2,*), w(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: mone = -1.0_sp + + ! Local Scalars + integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2 + real(sp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>1 ) then + info = -1 + else if( n<0 ) then + info = -3 + else if( icompq==1 .and. qsizn ) then + info = -10 + else if( ldq2n )go to 100 + if( rho*abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + indxp( k2 ) = j + else + ! check if eigenvalues are close enough to allow deflation. + s = z( jlam ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_slapy2( c, s ) + t = d( j ) - d( jlam ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( j ) = tau + z( jlam ) = zero + ! record the appropriate givens rotation + givptr = givptr + 1 + givcol( 1, givptr ) = indxq( indx( jlam ) ) + givcol( 2, givptr ) = indxq( indx( j ) ) + givnum( 1, givptr ) = c + givnum( 2, givptr ) = s + if( icompq==1 ) then + call stdlib_srot( qsiz, q( 1, indxq( indx( jlam ) ) ), 1,q( 1, indxq( indx( j & + ) ) ), 1, c, s ) + end if + t = d( jlam )*c*c + d( j )*s*s + d( j ) = d( jlam )*s*s + d( j )*c*c + d( jlam ) = t + k2 = k2 - 1 + i = 1 + 90 continue + if( k2+i<=n ) then + if( d( jlam ) SLAED9: finds the roots of the secular equation, as defined by the + !> values in D, Z, and RHO, between KSTART and KSTOP. It makes the + !> appropriate calls to SLAED4 and then stores the new matrix of + !> eigenvectors for use in calculating the next level of Z vectors. + + pure subroutine stdlib_slaed9( k, kstart, kstop, n, d, q, ldq, rho, dlamda, w,s, lds, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, kstart, kstop, ldq, lds, n + real(sp), intent(in) :: rho + ! Array Arguments + real(sp), intent(out) :: d(*), q(ldq,*), s(lds,*) + real(sp), intent(inout) :: dlamda(*), w(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: temp + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( k<0 ) then + info = -1 + else if( kstart<1 .or. kstart>max( 1, k ) ) then + info = -2 + else if( max( 1, kstop )max( 1, k ) )then + info = -3 + else if( n SLAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg + !> matrix H. + + pure subroutine stdlib_slaein( rightv, noinit, n, h, ldh, wr, wi, vr, vi, b,ldb, work, eps3, & + smlnum, bignum, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: noinit, rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldh, n + real(sp), intent(in) :: bignum, eps3, smlnum, wi, wr + ! Array Arguments + real(sp), intent(out) :: b(ldb,*), work(*) + real(sp), intent(in) :: h(ldh,*) + real(sp), intent(inout) :: vi(*), vr(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: tenth = 1.0e-1_sp + + ! Local Scalars + character :: normin, trans + integer(ilp) :: i, i1, i2, i3, ierr, its, j + real(sp) :: absbii, absbjj, ei, ej, growto, norm, nrmsml, rec, rootn, scale, temp, & + vcrit, vmax, vnorm, w, w1, x, xi, xr, y + ! Intrinsic Functions + intrinsic :: abs,max,real,sqrt + ! Executable Statements + info = 0 + ! growto is the threshold used in the acceptance test for an + ! eigenvector. + rootn = sqrt( real( n,KIND=sp) ) + growto = tenth / rootn + nrmsml = max( one, eps3*rootn )*smlnum + ! form b = h - (wr,wi)*i (except that the subdiagonal elements and + ! the imaginary parts of the diagonal elements are not stored). + do j = 1, n + do i = 1, j - 1 + b( i, j ) = h( i, j ) + end do + b( j, j ) = h( j, j ) - wr + end do + if( wi==zero ) then + ! real eigenvalue. + if( noinit ) then + ! set initial vector. + do i = 1, n + vr( i ) = eps3 + end do + else + ! scale supplied initial vector. + vnorm = stdlib_snrm2( n, vr, 1 ) + call stdlib_sscal( n, ( eps3*rootn ) / max( vnorm, nrmsml ), vr,1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing zero + ! pivots by eps3. + do i = 1, n - 1 + ei = h( i+1, i ) + if( abs( b( i, i ) )=growto*scale )go to 120 + ! choose new orthogonal starting vector and try again. + temp = eps3 / ( rootn+one ) + vr( 1 ) = eps3 + do i = 2, n + vr( i ) = temp + end do + vr( n-its+1 ) = vr( n-its+1 ) - eps3*rootn + end do + ! failure to find eigenvector in n iterations. + info = 1 + 120 continue + ! normalize eigenvector. + i = stdlib_isamax( n, vr, 1 ) + call stdlib_sscal( n, one / abs( vr( i ) ), vr, 1 ) + else + ! complex eigenvalue. + if( noinit ) then + ! set initial vector. + do i = 1, n + vr( i ) = eps3 + vi( i ) = zero + end do + else + ! scale supplied initial vector. + norm = stdlib_slapy2( stdlib_snrm2( n, vr, 1 ), stdlib_snrm2( n, vi, 1 ) ) + + rec = ( eps3*rootn ) / max( norm, nrmsml ) + call stdlib_sscal( n, rec, vr, 1 ) + call stdlib_sscal( n, rec, vi, 1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing zero + ! pivots by eps3. + ! the imaginary part of the (i,j)-th element of u is stored in + ! b(j+1,i). + b( 2, 1 ) = -wi + do i = 2, n + b( i+1, 1 ) = zero + end do + loop_170: do i = 1, n - 1 + absbii = stdlib_slapy2( b( i, i ), b( i+1, i ) ) + ei = h( i+1, i ) + if( absbiivcrit ) then + rec = one / vmax + call stdlib_sscal( n, rec, vr, 1 ) + call stdlib_sscal( n, rec, vi, 1 ) + scale = scale*rec + vmax = one + vcrit = bignum + end if + xr = vr( i ) + xi = vi( i ) + if( rightv ) then + do j = i + 1, n + xr = xr - b( i, j )*vr( j ) + b( j+1, i )*vi( j ) + xi = xi - b( i, j )*vi( j ) - b( j+1, i )*vr( j ) + end do + else + do j = 1, i - 1 + xr = xr - b( j, i )*vr( j ) + b( i+1, j )*vi( j ) + xi = xi - b( j, i )*vi( j ) - b( i+1, j )*vr( j ) + end do + end if + w = abs( b( i, i ) ) + abs( b( i+1, i ) ) + if( w>smlnum ) then + if( ww*bignum ) then + rec = one / w1 + call stdlib_sscal( n, rec, vr, 1 ) + call stdlib_sscal( n, rec, vi, 1 ) + xr = vr( i ) + xi = vi( i ) + scale = scale*rec + vmax = vmax*rec + end if + end if + ! divide by diagonal element of b. + call stdlib_sladiv( xr, xi, b( i, i ), b( i+1, i ), vr( i ),vi( i ) ) + + vmax = max( abs( vr( i ) )+abs( vi( i ) ), vmax ) + vcrit = bignum / vmax + else + do j = 1, n + vr( j ) = zero + vi( j ) = zero + end do + vr( i ) = one + vi( i ) = one + scale = zero + vmax = one + vcrit = bignum + end if + end do loop_250 + ! test for sufficient growth in the norm of (vr,vi). + vnorm = stdlib_sasum( n, vr, 1 ) + stdlib_sasum( n, vi, 1 ) + if( vnorm>=growto*scale )go to 280 + ! choose a new orthogonal starting vector and try again. + y = eps3 / ( rootn+one ) + vr( 1 ) = eps3 + vi( 1 ) = zero + do i = 2, n + vr( i ) = y + vi( i ) = zero + end do + vr( n-its+1 ) = vr( n-its+1 ) - eps3*rootn + end do loop_270 + ! failure to find eigenvector in n iterations + info = 1 + 280 continue + ! normalize eigenvector. + vnorm = zero + do i = 1, n + vnorm = max( vnorm, abs( vr( i ) )+abs( vi( i ) ) ) + end do + call stdlib_sscal( n, one / vnorm, vr, 1 ) + call stdlib_sscal( n, one / vnorm, vi, 1 ) + end if + return + end subroutine stdlib_slaein + + !> SLAGV2: computes the Generalized Schur factorization of a real 2-by-2 + !> matrix pencil (A,B) where B is upper triangular. This routine + !> computes orthogonal (rotation) matrices given by CSL, SNL and CSR, + !> SNR such that + !> 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 + !> types), then + !> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] + !> [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] + !> [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] + !> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], + !> 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, + !> then + !> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] + !> [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] + !> [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] + !> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] + !> where b11 >= b22 > 0. + + pure subroutine stdlib_slagv2( a, lda, b, ldb, alphar, alphai, beta, csl, snl,csr, snr ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb + real(sp), intent(out) :: csl, csr, snl, snr + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(2), alphar(2), beta(2) + ! ===================================================================== + + ! Local Scalars + real(sp) :: anorm, ascale, bnorm, bscale, h1, h2, h3, qq, r, rr, safmin, scale1, & + scale2, t, ulp, wi, wr1, wr2 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + safmin = stdlib_slamch( 'S' ) + ulp = stdlib_slamch( 'P' ) + ! scale a + anorm = max( abs( a( 1, 1 ) )+abs( a( 2, 1 ) ),abs( a( 1, 2 ) )+abs( a( 2, 2 ) ), & + safmin ) + ascale = one / anorm + a( 1, 1 ) = ascale*a( 1, 1 ) + a( 1, 2 ) = ascale*a( 1, 2 ) + a( 2, 1 ) = ascale*a( 2, 1 ) + a( 2, 2 ) = ascale*a( 2, 2 ) + ! scale b + bnorm = max( abs( b( 1, 1 ) ), abs( b( 1, 2 ) )+abs( b( 2, 2 ) ),safmin ) + bscale = one / bnorm + b( 1, 1 ) = bscale*b( 1, 1 ) + b( 1, 2 ) = bscale*b( 1, 2 ) + b( 2, 2 ) = bscale*b( 2, 2 ) + ! check if a can be deflated + if( abs( a( 2, 1 ) )<=ulp ) then + csl = one + snl = zero + csr = one + snr = zero + a( 2, 1 ) = zero + b( 2, 1 ) = zero + wi = zero + ! check if b is singular + else if( abs( b( 1, 1 ) )<=ulp ) then + call stdlib_slartg( a( 1, 1 ), a( 2, 1 ), csl, snl, r ) + csr = one + snr = zero + call stdlib_srot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_srot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + a( 2, 1 ) = zero + b( 1, 1 ) = zero + b( 2, 1 ) = zero + wi = zero + else if( abs( b( 2, 2 ) )<=ulp ) then + call stdlib_slartg( a( 2, 2 ), a( 2, 1 ), csr, snr, t ) + snr = -snr + call stdlib_srot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_srot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + csl = one + snl = zero + a( 2, 1 ) = zero + b( 2, 1 ) = zero + b( 2, 2 ) = zero + wi = zero + else + ! b is nonsingular, first compute the eigenvalues of (a,b) + call stdlib_slag2( a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2,wi ) + if( wi==zero ) then + ! two real eigenvalues, compute s*a-w*b + h1 = scale1*a( 1, 1 ) - wr1*b( 1, 1 ) + h2 = scale1*a( 1, 2 ) - wr1*b( 1, 2 ) + h3 = scale1*a( 2, 2 ) - wr1*b( 2, 2 ) + rr = stdlib_slapy2( h1, h2 ) + qq = stdlib_slapy2( scale1*a( 2, 1 ), h3 ) + if( rr>qq ) then + ! find right rotation matrix to zero 1,1 element of + ! (sa - wb) + call stdlib_slartg( h2, h1, csr, snr, t ) + else + ! find right rotation matrix to zero 2,1 element of + ! (sa - wb) + call stdlib_slartg( h3, scale1*a( 2, 1 ), csr, snr, t ) + end if + snr = -snr + call stdlib_srot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_srot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + ! compute inf norms of a and b + h1 = max( abs( a( 1, 1 ) )+abs( a( 1, 2 ) ),abs( a( 2, 1 ) )+abs( a( 2, 2 ) ) ) + + h2 = max( abs( b( 1, 1 ) )+abs( b( 1, 2 ) ),abs( b( 2, 1 ) )+abs( b( 2, 2 ) ) ) + + if( ( scale1*h1 )>=abs( wr1 )*h2 ) then + ! find left rotation matrix q to zero out b(2,1) + call stdlib_slartg( b( 1, 1 ), b( 2, 1 ), csl, snl, r ) + else + ! find left rotation matrix q to zero out a(2,1) + call stdlib_slartg( a( 1, 1 ), a( 2, 1 ), csl, snl, r ) + end if + call stdlib_srot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_srot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + a( 2, 1 ) = zero + b( 2, 1 ) = zero + else + ! a pair of complex conjugate eigenvalues + ! first compute the svd of the matrix b + call stdlib_slasv2( b( 1, 1 ), b( 1, 2 ), b( 2, 2 ), r, t, snr,csr, snl, csl ) + + ! form (a,b) := q(a,b)z**t where q is left rotation matrix and + ! z is right rotation matrix computed from stdlib_slasv2 + call stdlib_srot( 2, a( 1, 1 ), lda, a( 2, 1 ), lda, csl, snl ) + call stdlib_srot( 2, b( 1, 1 ), ldb, b( 2, 1 ), ldb, csl, snl ) + call stdlib_srot( 2, a( 1, 1 ), 1, a( 1, 2 ), 1, csr, snr ) + call stdlib_srot( 2, b( 1, 1 ), 1, b( 1, 2 ), 1, csr, snr ) + b( 2, 1 ) = zero + b( 1, 2 ) = zero + end if + end if + ! unscaling + a( 1, 1 ) = anorm*a( 1, 1 ) + a( 2, 1 ) = anorm*a( 2, 1 ) + a( 1, 2 ) = anorm*a( 1, 2 ) + a( 2, 2 ) = anorm*a( 2, 2 ) + b( 1, 1 ) = bnorm*b( 1, 1 ) + b( 2, 1 ) = bnorm*b( 2, 1 ) + b( 1, 2 ) = bnorm*b( 1, 2 ) + b( 2, 2 ) = bnorm*b( 2, 2 ) + if( wi==zero ) then + alphar( 1 ) = a( 1, 1 ) + alphar( 2 ) = a( 2, 2 ) + alphai( 1 ) = zero + alphai( 2 ) = zero + beta( 1 ) = b( 1, 1 ) + beta( 2 ) = b( 2, 2 ) + else + alphar( 1 ) = anorm*wr1 / scale1 / bnorm + alphai( 1 ) = anorm*wi / scale1 / bnorm + alphar( 2 ) = alphar( 1 ) + alphai( 2 ) = -alphai( 1 ) + beta( 1 ) = one + beta( 2 ) = one + end if + return + end subroutine stdlib_slagv2 + + !> SLAHR2: reduces the first NB columns of A real general n-BY-(n-k+1) + !> matrix A so that elements below the k-th subdiagonal are zero. The + !> reduction is performed by an orthogonal similarity transformation + !> Q**T * A * Q. The routine returns the matrices V and T which determine + !> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. + !> This is an auxiliary routine called by SGEHRD. + + pure subroutine stdlib_slahr2( n, k, nb, a, lda, tau, t, ldt, y, ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: k, lda, ldt, ldy, n, nb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,nb), tau(nb), y(ldy,nb) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(sp) :: ei + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( n<=1 )return + loop_10: do i = 1, nb + if( i>1 ) then + ! update a(k+1:n,i) + ! update i-th column of a - y * v**t + call stdlib_sgemv( 'NO TRANSPOSE', n-k, i-1, -one, y(k+1,1), ldy,a( k+i-1, 1 ), & + lda, one, a( k+1, i ), 1 ) + ! apply i - v * t**t * v**t to this column (call it b) from the + ! left, using the last column of t as workspace + ! let v = ( v1 ) and b = ( b1 ) (first i-1 rows) + ! ( v2 ) ( b2 ) + ! where v1 is unit lower triangular + ! w := v1**t * b1 + call stdlib_scopy( i-1, a( k+1, i ), 1, t( 1, nb ), 1 ) + call stdlib_strmv( 'LOWER', 'TRANSPOSE', 'UNIT',i-1, a( k+1, 1 ),lda, t( 1, nb ),& + 1 ) + ! w := w + v2**t * b2 + call stdlib_sgemv( 'TRANSPOSE', n-k-i+1, i-1,one, a( k+i, 1 ),lda, a( k+i, i ), & + 1, one, t( 1, nb ), 1 ) + ! w := t**t * w + call stdlib_strmv( 'UPPER', 'TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, nb ), 1 ) + + ! b2 := b2 - v2*w + call stdlib_sgemv( 'NO TRANSPOSE', n-k-i+1, i-1, -one,a( k+i, 1 ),lda, t( 1, nb )& + , 1, one, a( k+i, i ), 1 ) + ! b1 := b1 - v1*w + call stdlib_strmv( 'LOWER', 'NO TRANSPOSE','UNIT', i-1,a( k+1, 1 ), lda, t( 1, & + nb ), 1 ) + call stdlib_saxpy( i-1, -one, t( 1, nb ), 1, a( k+1, i ), 1 ) + a( k+i-1, i-1 ) = ei + end if + ! generate the elementary reflector h(i) to annihilate + ! a(k+i+1:n,i) + call stdlib_slarfg( n-k-i+1, a( k+i, i ), a( min( k+i+1, n ), i ), 1,tau( i ) ) + + ei = a( k+i, i ) + a( k+i, i ) = one + ! compute y(k+1:n,i) + call stdlib_sgemv( 'NO TRANSPOSE', n-k, n-k-i+1,one, a( k+1, i+1 ),lda, a( k+i, i ),& + 1, zero, y( k+1, i ), 1 ) + call stdlib_sgemv( 'TRANSPOSE', n-k-i+1, i-1,one, a( k+i, 1 ), lda,a( k+i, i ), 1, & + zero, t( 1, i ), 1 ) + call stdlib_sgemv( 'NO TRANSPOSE', n-k, i-1, -one,y( k+1, 1 ), ldy,t( 1, i ), 1, & + one, y( k+1, i ), 1 ) + call stdlib_sscal( n-k, tau( i ), y( k+1, i ), 1 ) + ! compute t(1:i,i) + call stdlib_sscal( i-1, -tau( i ), t( 1, i ), 1 ) + call stdlib_strmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, i ), 1 ) + + t( i, i ) = tau( i ) + end do loop_10 + a( k+nb, nb ) = ei + ! compute y(1:k,1:nb) + call stdlib_slacpy( 'ALL', k, nb, a( 1, 2 ), lda, y, ldy ) + call stdlib_strmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE','UNIT', k, nb,one, a( k+1, 1 ), & + lda, y, ldy ) + if( n>k+nb )call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', k,nb, n-k-nb, one,a( 1, & + 2+nb ), lda, a( k+1+nb, 1 ), lda, one, y,ldy ) + call stdlib_strmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE','NON-UNIT', k, nb,one, t, ldt, y, & + ldy ) + return + end subroutine stdlib_slahr2 + + !> SLALN2: solves a system of the form (ca A - w D ) X = s B + !> or (ca A**T - w D) X = s B with possible scaling ("s") and + !> perturbation of A. (A**T means A-transpose.) + !> A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + !> real diagonal matrix, w is a real or complex value, and X and B are + !> NA x 1 matrices -- real if w is real, complex if w is complex. NA + !> may be 1 or 2. + !> If w is complex, X and B are represented as NA x 2 matrices, + !> the first column of each being the real part and the second + !> being the imaginary part. + !> "s" is a scaling factor (<= 1), computed by SLALN2, which is + !> so chosen that X can be computed without overflow. X is further + !> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + !> than overflow. + !> If both singular values of (ca A - w D) are less than SMIN, + !> SMIN*identity will be used instead of (ca A - w D). If only one + !> singular value is less than SMIN, one element of (ca A - w D) will be + !> perturbed enough to make the smallest singular value roughly SMIN. + !> If both singular values are at least SMIN, (ca A - w D) will not be + !> perturbed. In any case, the perturbation will be at most some small + !> multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + !> are computed by infinity-norm approximations, and thus will only be + !> correct to a factor of 2 or so. + !> Note: all input quantities are assumed to be smaller than overflow + !> by a reasonable factor. (See BIGNUM.) + + pure subroutine stdlib_slaln2( ltrans, na, nw, smin, ca, a, lda, d1, d2, b,ldb, wr, wi, x, & + ldx, scale, xnorm, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: ltrans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, na, nw + real(sp), intent(in) :: ca, d1, d2, smin, wi, wr + real(sp), intent(out) :: scale, xnorm + ! Array Arguments + real(sp), intent(in) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: icmax, j + real(sp) :: bbnd, bi1, bi2, bignum, bnorm, br1, br2, ci21, ci22, cmax, cnorm, cr21, & + cr22, csi, csr, li21, lr21, smini, smlnum, temp, u22abs, ui11, ui11r, ui12, ui12s, & + ui22, ur11, ur11r, ur12, ur12s, ur22, xi1, xi2, xr1, xr2 + ! Local Arrays + logical(lk) :: cswap(4), rswap(4) + integer(ilp) :: ipivot(4,4) + real(sp) :: ci(2,2), civ(4), cr(2,2), crv(4) + ! Intrinsic Functions + intrinsic :: abs,max + ! Equivalences + equivalence ( ci( 1, 1 ), civ( 1 ) ),( cr( 1, 1 ), crv( 1 ) ) + ! Data Statements + cswap = [.false.,.false.,.true.,.true.] + rswap = [.false.,.true.,.false.,.true.] + ipivot = reshape([1,2,3,4,2,1,4,3,3,4,1,2,4,3,2,1],[4,4]) + ! Executable Statements + ! compute bignum + smlnum = two*stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + smini = max( smin, smlnum ) + ! don't check for input errors + info = 0 + ! standard initializations + scale = one + if( na==1 ) then + ! 1 x 1 (i.e., scalar) system c x = b + if( nw==1 ) then + ! real 1x1 system. + ! c = ca a - w d + csr = ca*a( 1, 1 ) - wr*d1 + cnorm = abs( csr ) + ! if | c | < smini, use c = smini + if( cnormone ) then + if( bnorm>bignum*cnorm )scale = one / bnorm + end if + ! compute x + x( 1, 1 ) = ( b( 1, 1 )*scale ) / csr + xnorm = abs( x( 1, 1 ) ) + else + ! complex 1x1 system (w is complex) + ! c = ca a - w d + csr = ca*a( 1, 1 ) - wr*d1 + csi = -wi*d1 + cnorm = abs( csr ) + abs( csi ) + ! if | c | < smini, use c = smini + if( cnormone ) then + if( bnorm>bignum*cnorm )scale = one / bnorm + end if + ! compute x + call stdlib_sladiv( scale*b( 1, 1 ), scale*b( 1, 2 ), csr, csi,x( 1, 1 ), x( 1, & + 2 ) ) + xnorm = abs( x( 1, 1 ) ) + abs( x( 1, 2 ) ) + end if + else + ! 2x2 system + ! compute the realpart of c = ca a - w d (or ca a**t - w d,KIND=sp) + cr( 1, 1 ) = ca*a( 1, 1 ) - wr*d1 + cr( 2, 2 ) = ca*a( 2, 2 ) - wr*d2 + if( ltrans ) then + cr( 1, 2 ) = ca*a( 2, 1 ) + cr( 2, 1 ) = ca*a( 1, 2 ) + else + cr( 2, 1 ) = ca*a( 2, 1 ) + cr( 1, 2 ) = ca*a( 1, 2 ) + end if + if( nw==1 ) then + ! real2x2 system (w is real,KIND=sp) + ! find the largest element in c + cmax = zero + icmax = 0 + do j = 1, 4 + if( abs( crv( j ) )>cmax ) then + cmax = abs( crv( j ) ) + icmax = j + end if + end do + ! if norm(c) < smini, use smini*identity. + if( cmaxone ) then + if( bnorm>bignum*smini )scale = one / bnorm + end if + temp = scale / smini + x( 1, 1 ) = temp*b( 1, 1 ) + x( 2, 1 ) = temp*b( 2, 1 ) + xnorm = temp*bnorm + info = 1 + return + end if + ! gaussian elimination with complete pivoting. + ur11 = crv( icmax ) + cr21 = crv( ipivot( 2, icmax ) ) + ur12 = crv( ipivot( 3, icmax ) ) + cr22 = crv( ipivot( 4, icmax ) ) + ur11r = one / ur11 + lr21 = ur11r*cr21 + ur22 = cr22 - ur12*lr21 + ! if smaller pivot < smini, use smini + if( abs( ur22 )one .and. abs( ur22 )=bignum*abs( ur22 ) )scale = one / bbnd + end if + xr2 = ( br2*scale ) / ur22 + xr1 = ( scale*br1 )*ur11r - xr2*( ur11r*ur12 ) + if( cswap( icmax ) ) then + x( 1, 1 ) = xr2 + x( 2, 1 ) = xr1 + else + x( 1, 1 ) = xr1 + x( 2, 1 ) = xr2 + end if + xnorm = max( abs( xr1 ), abs( xr2 ) ) + ! further scaling if norm(a) norm(x) > overflow + if( xnorm>one .and. cmax>one ) then + if( xnorm>bignum / cmax ) then + temp = cmax / bignum + x( 1, 1 ) = temp*x( 1, 1 ) + x( 2, 1 ) = temp*x( 2, 1 ) + xnorm = temp*xnorm + scale = temp*scale + end if + end if + else + ! complex 2x2 system (w is complex) + ! find the largest element in c + ci( 1, 1 ) = -wi*d1 + ci( 2, 1 ) = zero + ci( 1, 2 ) = zero + ci( 2, 2 ) = -wi*d2 + cmax = zero + icmax = 0 + do j = 1, 4 + if( abs( crv( j ) )+abs( civ( j ) )>cmax ) then + cmax = abs( crv( j ) ) + abs( civ( j ) ) + icmax = j + end if + end do + ! if norm(c) < smini, use smini*identity. + if( cmaxone ) then + if( bnorm>bignum*smini )scale = one / bnorm + end if + temp = scale / smini + x( 1, 1 ) = temp*b( 1, 1 ) + x( 2, 1 ) = temp*b( 2, 1 ) + x( 1, 2 ) = temp*b( 1, 2 ) + x( 2, 2 ) = temp*b( 2, 2 ) + xnorm = temp*bnorm + info = 1 + return + end if + ! gaussian elimination with complete pivoting. + ur11 = crv( icmax ) + ui11 = civ( icmax ) + cr21 = crv( ipivot( 2, icmax ) ) + ci21 = civ( ipivot( 2, icmax ) ) + ur12 = crv( ipivot( 3, icmax ) ) + ui12 = civ( ipivot( 3, icmax ) ) + cr22 = crv( ipivot( 4, icmax ) ) + ci22 = civ( ipivot( 4, icmax ) ) + if( icmax==1 .or. icmax==4 ) then + ! code when off-diagonals of pivoted c are real + if( abs( ur11 )>abs( ui11 ) ) then + temp = ui11 / ur11 + ur11r = one / ( ur11*( one+temp**2 ) ) + ui11r = -temp*ur11r + else + temp = ur11 / ui11 + ui11r = -one / ( ui11*( one+temp**2 ) ) + ur11r = -temp*ui11r + end if + lr21 = cr21*ur11r + li21 = cr21*ui11r + ur12s = ur12*ur11r + ui12s = ur12*ui11r + ur22 = cr22 - ur12*lr21 + ui22 = ci22 - ur12*li21 + else + ! code when diagonals of pivoted c are real + ur11r = one / ur11 + ui11r = zero + lr21 = cr21*ur11r + li21 = ci21*ur11r + ur12s = ur12*ur11r + ui12s = ui12*ur11r + ur22 = cr22 - ur12*lr21 + ui12*li21 + ui22 = -ur12*li21 - ui12*lr21 + end if + u22abs = abs( ur22 ) + abs( ui22 ) + ! if smaller pivot < smini, use smini + if( u22absone .and. u22abs=bignum*u22abs ) then + scale = one / bbnd + br1 = scale*br1 + bi1 = scale*bi1 + br2 = scale*br2 + bi2 = scale*bi2 + end if + end if + call stdlib_sladiv( br2, bi2, ur22, ui22, xr2, xi2 ) + xr1 = ur11r*br1 - ui11r*bi1 - ur12s*xr2 + ui12s*xi2 + xi1 = ui11r*br1 + ur11r*bi1 - ui12s*xr2 - ur12s*xi2 + if( cswap( icmax ) ) then + x( 1, 1 ) = xr2 + x( 2, 1 ) = xr1 + x( 1, 2 ) = xi2 + x( 2, 2 ) = xi1 + else + x( 1, 1 ) = xr1 + x( 2, 1 ) = xr2 + x( 1, 2 ) = xi1 + x( 2, 2 ) = xi2 + end if + xnorm = max( abs( xr1 )+abs( xi1 ), abs( xr2 )+abs( xi2 ) ) + ! further scaling if norm(a) norm(x) > overflow + if( xnorm>one .and. cmax>one ) then + if( xnorm>bignum / cmax ) then + temp = cmax / bignum + x( 1, 1 ) = temp*x( 1, 1 ) + x( 2, 1 ) = temp*x( 2, 1 ) + x( 1, 2 ) = temp*x( 1, 2 ) + x( 2, 2 ) = temp*x( 2, 2 ) + xnorm = temp*xnorm + scale = temp*scale + end if + end if + end if + end if + return + end subroutine stdlib_slaln2 + + !> SLALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + + pure subroutine stdlib_slals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: givptr, icompq, k, ldb, ldbx, ldgcol, ldgnum, nl, nr, nrhs,& + sqre + integer(ilp), intent(out) :: info + real(sp), intent(in) :: c, s + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), perm(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(out) :: bx(ldbx,*), work(*) + real(sp), intent(in) :: difl(*), difr(ldgnum,*), givnum(ldgnum,*), poles(ldgnum,*), z(& + *) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, m, n, nlp1 + real(sp) :: diflj, difrj, dj, dsigj, dsigjp, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( nrhs<1 ) then + info = -5 + else if( ldb SLAMSWLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (SLASWLQ) + + pure subroutine stdlib_slamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + real(sp), intent(in) :: a(lda,*), t(ldt,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * mb + else + lw = m * mb + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( k<0 ) then + info = -5 + else if( m=max(m,n,k))) then + call stdlib_sgemlqt( side, trans, m, n, k, mb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.tran) then + ! multiply q to the last block of c + kk = mod((m-k),(nb-k)) + ctr = (m-k)/(nb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_stpmlqt('L','T',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+nb) + ctr = ctr - 1 + call stdlib_stpmlqt('L','T',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:nb) + call stdlib_sgemlqt('L','T',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.notran) then + ! multiply q to the first block of c + kk = mod((m-k),(nb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_sgemlqt('L','N',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (i:i+nb,1:n) + call stdlib_stpmlqt('L','N',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr * k+1), ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_stpmlqt('L','N',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.notran) then + ! multiply q to the last block of c + kk = mod((n-k),(nb-k)) + ctr = (n-k)/(nb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_stpmlqt('R','N',m , kk, k, 0, mb, a(1, ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_stpmlqt('R','N', m, nb-k, k, 0, mb, a(1, i), lda,t(1,ctr*k+1), ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_sgemlqt('R','N',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.tran) then + ! multiply q to the first block of c + kk = mod((n-k),(nb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_sgemlqt('R','T',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_stpmlqt('R','T',m , nb-k, k, 0,mb, a(1,i), lda,t(1, ctr*k+1), ldt, c(1,& + 1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_stpmlqt('R','T',m , kk, k, 0,mb, a(1,ii), lda,t(1,ctr*k+1),ldt, c(1,1),& + ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_slamswlq + + !> SLAMTSQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (SLATSQR) + + pure subroutine stdlib_slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + real(sp), intent(in) :: a(lda,*), t(ldt,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr, q + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * nb + q = m + else + lw = mb * nb + q = n + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m=max(m,n,k))) then + call stdlib_sgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.notran) then + ! multiply q to the last block of c + kk = mod((m-k),(mb-k)) + ctr = (m-k)/(mb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_stpmqrt('L','N',kk , n, k, 0, nb, a(ii,1), lda,t(1,ctr*k+1),ldt , c(1,& + 1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + ctr = ctr - 1 + call stdlib_stpmqrt('L','N',mb-k , n, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1), ldt,& + c(1,1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:mb,1:n) + call stdlib_sgemqrt('L','N',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.tran) then + ! multiply q to the first block of c + kk = mod((m-k),(mb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_sgemqrt('L','T',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + call stdlib_stpmqrt('L','T',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_stpmqrt('L','T',kk , n, k, 0,nb, a(ii,1), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.tran) then + ! multiply q to the last block of c + kk = mod((n-k),(mb-k)) + ctr = (n-k)/(mb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_stpmqrt('R','T',m , kk, k, 0, nb, a(ii,1), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_stpmqrt('R','T',m , mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_sgemqrt('R','T',m , mb, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.notran) then + ! multiply q to the first block of c + kk = mod((n-k),(mb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_sgemqrt('R','N', m, mb , k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_stpmqrt('R','N', m, mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_stpmqrt('R','N', m, kk , k, 0,nb, a(ii,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_slamtsqr + + !> SLANV2: computes the Schur factorization of a real 2-by-2 nonsymmetric + !> matrix in standard form: + !> [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + !> [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + !> where either + !> 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + !> 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + !> conjugate eigenvalues. + + pure subroutine stdlib_slanv2( a, b, c, d, rt1r, rt1i, rt2r, rt2i, cs, sn ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(inout) :: a, b, c, d + real(sp), intent(out) :: cs, rt1i, rt1r, rt2i, rt2r, sn + ! ===================================================================== + ! Parameters + real(sp), parameter :: multpl = 4.0e+0_sp + + + ! Local Scalars + real(sp) :: aa, bb, bcmax, bcmis, cc, cs1, dd, eps, p, sab, sac, scale, sigma, sn1, & + tau, temp, z, safmin, safmn2, safmx2 + integer(ilp) :: count + ! Intrinsic Functions + intrinsic :: abs,max,min,sign,sqrt + ! Executable Statements + safmin = stdlib_slamch( 'S' ) + eps = stdlib_slamch( 'P' ) + safmn2 = stdlib_slamch( 'B' )**int( log( safmin / eps ) /log( stdlib_slamch( 'B' ) ) / & + two,KIND=ilp) + safmx2 = one / safmn2 + if( c==zero ) then + cs = one + sn = zero + else if( b==zero ) then + ! swap rows and columns + cs = zero + sn = one + temp = d + d = a + a = temp + b = -c + c = zero + else if( (a-d)==zero .and. sign( one, b )/=sign( one, c ) ) then + cs = one + sn = zero + else + temp = a - d + p = half*temp + bcmax = max( abs( b ), abs( c ) ) + bcmis = min( abs( b ), abs( c ) )*sign( one, b )*sign( one, c ) + scale = max( abs( p ), bcmax ) + z = ( p / scale )*p + ( bcmax / scale )*bcmis + ! if z is of the order of the machine accuracy, postpone the + ! decision on the nature of eigenvalues + if( z>=multpl*eps ) then + ! real eigenvalues. compute a and d. + z = p + sign( sqrt( scale )*sqrt( z ), p ) + a = d + z + d = d - ( bcmax / z )*bcmis + ! compute b and the rotation matrix + tau = stdlib_slapy2( c, z ) + cs = z / tau + sn = c / tau + b = b - c + c = zero + else + ! complex eigenvalues, or real(almost,KIND=sp) equal eigenvalues. + ! make diagonal elements equal. + count = 0 + sigma = b + c + 10 continue + count = count + 1 + scale = max( abs(temp), abs(sigma) ) + if( scale>=safmx2 ) then + sigma = sigma * safmn2 + temp = temp * safmn2 + if (count <= 20)goto 10 + end if + if( scale<=safmn2 ) then + sigma = sigma * safmx2 + temp = temp * safmx2 + if (count <= 20)goto 10 + end if + p = half*temp + tau = stdlib_slapy2( sigma, temp ) + cs = sqrt( half*( one+abs( sigma ) / tau ) ) + sn = -( p / ( tau*cs ) )*sign( one, sigma ) + ! compute [ aa bb ] = [ a b ] [ cs -sn ] + ! [ cc dd ] [ c d ] [ sn cs ] + aa = a*cs + b*sn + bb = -a*sn + b*cs + cc = c*cs + d*sn + dd = -c*sn + d*cs + ! compute [ a b ] = [ cs sn ] [ aa bb ] + ! [ c d ] [-sn cs ] [ cc dd ] + a = aa*cs + cc*sn + b = bb*cs + dd*sn + c = -aa*sn + cc*cs + d = -bb*sn + dd*cs + temp = half*( a+d ) + a = temp + d = temp + if( c/=zero ) then + if( b/=zero ) then + if( sign( one, b )==sign( one, c ) ) then + ! real eigenvalues: reduce to upper triangular form + sab = sqrt( abs( b ) ) + sac = sqrt( abs( c ) ) + p = sign( sab*sac, c ) + tau = one / sqrt( abs( b+c ) ) + a = temp + p + d = temp - p + b = b - c + c = zero + cs1 = sab*tau + sn1 = sac*tau + temp = cs*cs1 - sn*sn1 + sn = cs*sn1 + sn*cs1 + cs = temp + end if + else + b = -c + c = zero + temp = cs + cs = -sn + sn = temp + end if + end if + end if + end if + ! store eigenvalues in (rt1r,rt1i) and (rt2r,rt2i). + rt1r = a + rt2r = d + if( c==zero ) then + rt1i = zero + rt2i = zero + else + rt1i = sqrt( abs( b ) )*sqrt( abs( c ) ) + rt2i = -rt1i + end if + return + end subroutine stdlib_slanv2 + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + + pure subroutine stdlib_slapll( n, x, incx, y, incy, ssmin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(sp), intent(out) :: ssmin + ! Array Arguments + real(sp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + ! Local Scalars + real(sp) :: a11, a12, a22, c, ssmax, tau + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + ssmin = zero + return + end if + ! compute the qr factorization of the n-by-2 matrix ( x y ) + call stdlib_slarfg( n, x( 1 ), x( 1+incx ), incx, tau ) + a11 = x( 1 ) + x( 1 ) = one + c = -tau*stdlib_sdot( n, x, incx, y, incy ) + call stdlib_saxpy( n, c, x, incx, y, incy ) + call stdlib_slarfg( n-1, y( 1+incy ), y( 1+2*incy ), incy, tau ) + a12 = y( 1 ) + a22 = y( 1+incy ) + ! compute the svd of 2-by-2 upper triangular matrix. + call stdlib_slas2( a11, a12, a22, ssmin, ssmax ) + return + end subroutine stdlib_slapll + + !> SLAQP2: computes a QR factorization with column pivoting of + !> the block A(OFFSET+1:M,1:N). + !> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_slaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, m, n, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: a(lda,*), vn1(*), vn2(*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, itemp, j, mn, offpi, pvt + real(sp) :: aii, temp, temp2, tol3z + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + mn = min( m-offset, n ) + tol3z = sqrt(stdlib_slamch('EPSILON')) + ! compute factorization. + loop_20: do i = 1, mn + offpi = offset + i + ! determine ith pivot column and swap if necessary. + pvt = ( i-1 ) + stdlib_isamax( n-i+1, vn1( i ), 1 ) + if( pvt/=i ) then + call stdlib_sswap( m, a( 1, pvt ), 1, a( 1, i ), 1 ) + itemp = jpvt( pvt ) + jpvt( pvt ) = jpvt( i ) + jpvt( i ) = itemp + vn1( pvt ) = vn1( i ) + vn2( pvt ) = vn2( i ) + end if + ! generate elementary reflector h(i). + if( offpi SLAQPS: computes a step of QR factorization with column pivoting + !> of a real M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_slaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda, ldf, m, n, nb, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*) + real(sp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: itemp, j, k, lastrk, lsticc, pvt, rk + real(sp) :: akk, temp, temp2, tol3z + ! Intrinsic Functions + intrinsic :: abs,max,min,nint,real,sqrt + ! Executable Statements + lastrk = min( m, n+offset ) + lsticc = 0 + k = 0 + tol3z = sqrt(stdlib_slamch('EPSILON')) + ! beginning of while loop. + 10 continue + if( ( k1 ) then + call stdlib_sgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1 ),lda, f( k, 1 ), & + ldf, one, a( rk, k ), 1 ) + end if + ! generate elementary reflector h(k). + if( rk1 ) then + call stdlib_sgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1 ),lda, a( rk, k & + ), 1, zero, auxv( 1 ), 1 ) + call stdlib_sgemv( 'NO TRANSPOSE', n, k-1, one, f( 1, 1 ), ldf,auxv( 1 ), 1, one,& + f( 1, k ), 1 ) + end if + ! update the current row of a: + ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t. + if( k0 ) then + itemp = nint( vn2( lsticc ),KIND=ilp) + vn1( lsticc ) = stdlib_snrm2( m-rk, a( rk+1, lsticc ), 1 ) + ! note: the computation of vn1( lsticc ) relies on the fact that + ! stdlib_snrm2 does not fail on vectors with norm below the value of + ! sqrt(stdlib_dlamch('s')) + vn2( lsticc ) = vn1( lsticc ) + lsticc = itemp + go to 40 + end if + return + end subroutine stdlib_slaqps + + !> SLAQR5:, called by SLAQR0, performs a + !> single small-bulge multi-shift QR sweep. + + pure subroutine stdlib_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, & + iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & + ldz, n, nh, nshfts, nv + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*) + real(sp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(sp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,& + tst1, tst2, ulp + integer(ilp) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, & + krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu + logical(lk) :: accum, bmp22 + ! Intrinsic Functions + intrinsic :: abs,max,min,mod,real + ! Local Arrays + real(sp) :: vt(3) + ! Executable Statements + ! ==== if there are no shifts, then there is nothing to do. ==== + if( nshfts<2 )return + ! ==== if the active block is empty or 1-by-1, then there + ! . is nothing to do. ==== + if( ktop>=kbot )return + ! ==== shuffle shifts into pairs of real shifts and pairs + ! . of complex conjugate shifts assuming complex + ! . conjugate shifts are already adjacent to one + ! . another. ==== + do i = 1, nshfts - 2, 2 + if( si( i )/=-si( i+1 ) ) then + swap = sr( i ) + sr( i ) = sr( i+1 ) + sr( i+1 ) = sr( i+2 ) + sr( i+2 ) = swap + swap = si( i ) + si( i ) = si( i+1 ) + si( i+1 ) = si( i+2 ) + si( i+2 ) = swap + end if + end do + ! ==== nshfts is supposed to be even, but if it is odd, + ! . then simply reduce it by one. the shuffle above + ! . ensures that the dropped shift is real and that + ! . the remaining shifts are paired. ==== + ns = nshfts - mod( nshfts, 2 ) + ! ==== machine constants for deflation ==== + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp) / ulp ) + ! ==== use accumulated reflections to update far-from-diagonal + ! . entries ? ==== + accum = ( kacc22==1 ) .or. ( kacc22==2 ) + ! ==== clear trash ==== + if( ktop+2<=kbot )h( ktop+2, ktop ) = zero + ! ==== nbmps = number of 2-shift bulges in the chain ==== + nbmps = ns / 2 + ! ==== kdu = width of slab ==== + kdu = 4*nbmps + ! ==== create and chase chains of nbmps bulges ==== + loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps + ! jtop = index from which updates from the right start. + if( accum ) then + jtop = max( ktop, incol ) + else if( wantt ) then + jtop = 1 + else + jtop = ktop + end if + ndcol = incol + kdu + if( accum )call stdlib_slaset( 'ALL', kdu, kdu, zero, one, u, ldu ) + ! ==== near-the-diagonal bulge chase. the following loop + ! . performs the near-the-diagonal part of a small bulge + ! . multi-shift qr sweep. each 4*nbmps column diagonal + ! . chunk extends from column incol to column ndcol + ! . (including both column incol and column ndcol). the + ! . following loop chases a 2*nbmps+1 column long chain of + ! . nbmps bulges 2*nbmps-1 columns to the right. (incol + ! . may be less than ktop and and ndcol may be greater than + ! . kbot indicating phantom columns from which to chase + ! . bulges before they are actually introduced or to which + ! . to chase bulges beyond column kbot.) ==== + loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) + ! ==== bulges number mtop to mbot are active double implicit + ! . shift bulges. there may or may not also be small + ! . 2-by-2 bulge, if there is room. the inactive bulges + ! . (if any) must wait until the active bulges have moved + ! . down the diagonal to make room. the phantom matrix + ! . paradigm described above helps keep track. ==== + mtop = max( 1, ( ktop-krcol ) / 2+1 ) + mbot = min( nbmps, ( kbot-krcol-1 ) / 2 ) + m22 = mbot + 1 + bmp22 = ( mbot=ktop ) then + if( h( k+1, k )/=zero ) then + tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) + if( tst1==zero ) then + if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) + end if + if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) + h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) + h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & + then + h( k+1, k ) = zero + end if + end if + end if + end if + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + kms = k - incol + do j = max( 1, ktop-incol ), kdu + refsum = v( 1, m22 )*( u( j, kms+1 )+v( 2, m22 )*u( j, kms+2 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m22 ) + end do + else if( wantz ) then + do j = iloz, ihiz + refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*z( j, k+2 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 ) + end do + end if + end if + ! ==== normal case: chain of 3-by-3 reflections ==== + loop_80: do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + if( k==ktop-1 ) then + call stdlib_slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),si( 2*m-1 ), sr( & + 2*m ), si( 2*m ),v( 1, m ) ) + alpha = v( 1, m ) + call stdlib_slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) ) + else + ! ==== perform delayed transformation of row below + ! . mth bulge. exploit fact that first two elements + ! . of row are actually zero. ==== + refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 ) + h( k+3, k ) = -refsum + h( k+3, k+1 ) = -refsum*v( 2, m ) + h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m ) + ! ==== calculate reflection to move + ! . mth bulge one step. ==== + beta = h( k+1, k ) + v( 2, m ) = h( k+2, k ) + v( 3, m ) = h( k+3, k ) + call stdlib_slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) ) + ! ==== a bulge may collapse because of vigilant + ! . deflation or destructive underflow. in the + ! . underflow case, try the two-small-subdiagonals + ! . trick to try to reinflate the bulge. ==== + if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) & + then + ! ==== typical case: not collapsed (yet). ==== + h( k+1, k ) = beta + h( k+2, k ) = zero + h( k+3, k ) = zero + else + ! ==== atypical case: collapsed. attempt to + ! . reintroduce ignoring h(k+1,k) and h(k+2,k). + ! . if the fill resulting from the new + ! . reflector is too large, then abandon it. + ! . otherwise, use the new one. ==== + call stdlib_slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),si( 2*m-1 ), sr( & + 2*m ), si( 2*m ),vt ) + alpha = vt( 1 ) + call stdlib_slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) ) + refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*h( k+2, k ) ) + if( abs( h( k+2, k )-refsum*vt( 2 ) )+abs( refsum*vt( 3 ) )>ulp*( abs( & + h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then + ! ==== starting a new bulge here would + ! . create non-negligible fill. use + ! . the old one with trepidation. ==== + h( k+1, k ) = beta + h( k+2, k ) = zero + h( k+3, k ) = zero + else + ! ==== starting a new bulge here would + ! . create only negligible fill. + ! . replace the old reflector with + ! . the new one. ==== + h( k+1, k ) = h( k+1, k ) - refsum + h( k+2, k ) = zero + h( k+3, k ) = zero + v( 1, m ) = vt( 1 ) + v( 2, m ) = vt( 2 ) + v( 3, m ) = vt( 3 ) + end if + end if + end if + ! ==== apply reflection from the right and + ! . the first column of update from the left. + ! . these updates are required for the vigilant + ! . deflation check. we still delay most of the + ! . updates from the left for efficiency. ==== + do j = jtop, min( kbot, k+3 ) + refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*h( j, k+2 )+v( 3, m )*h( j, k+3 & + ) ) + h( j, k+1 ) = h( j, k+1 ) - refsum + h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m ) + h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m ) + end do + ! ==== perform update from left for subsequent + ! . column. ==== + refsum = v( 1, m )*( h( k+1, k+1 )+v( 2, m )*h( k+2, k+1 )+v( 3, m )*h( k+3, & + k+1 ) ) + h( k+1, k+1 ) = h( k+1, k+1 ) - refsum + h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m ) + h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m ) + ! ==== the following convergence test requires that + ! . the tradition small-compared-to-nearby-diagonals + ! . criterion and the ahues + ! . criteria both be satisfied. the latter improves + ! . accuracy in some examples. falling back on an + ! . alternate convergence criterion when tst1 or tst2 + ! . is zero (as done here) is traditional but probably + ! . unnecessary. ==== + if( k=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) + end if + if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) + h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) + h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & + then + h( k+1, k ) = zero + end if + end if + end if + end do loop_80 + ! ==== multiply h by reflections from the left ==== + if( accum ) then + jbot = min( ndcol, kbot ) + else if( wantt ) then + jbot = n + else + jbot = kbot + end if + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = max( ktop, krcol + 2*m ), jbot + refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*h( k+2, j )+v( 3, m )*h( k+3, j & + ) ) + h( k+1, j ) = h( k+1, j ) - refsum + h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m ) + h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m ) + end do + end do + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + ! ==== accumulate u. (if needed, update z later + ! . with an efficient matrix-matrix + ! . multiply.) ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + kms = k - incol + i2 = max( 1, ktop-incol ) + i2 = max( i2, kms-(krcol-incol)+1 ) + i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 ) + do j = i2, i4 + refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*u( j, kms+2 )+v( 3, m )*u( & + j, kms+3 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m ) + u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m ) + end do + end do + else if( wantz ) then + ! ==== u is not accumulated, so update z + ! . now by multiplying by reflections + ! . from the right. ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = iloz, ihiz + refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*z( j, k+2 )+v( 3, m )*z( j, & + k+3 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m ) + z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m ) + end do + end do + end if + ! ==== end of near-the-diagonal bulge chase. ==== + end do loop_145 + ! ==== use u (if accumulated) to update far-from-diagonal + ! . entries in h. if required, use u to update z as + ! . well. ==== + if( accum ) then + if( wantt ) then + jtop = 1 + jbot = n + else + jtop = ktop + jbot = kbot + end if + k1 = max( 1, ktop-incol ) + nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1 + ! ==== horizontal multiply ==== + do jcol = min( ndcol, kbot ) + 1, jbot, nh + jlen = min( nh, jbot-jcol+1 ) + call stdlib_sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, & + jcol ), ldh, zero, wh,ldwh ) + call stdlib_slacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) + + end do + ! ==== vertical multiply ==== + do jrow = jtop, max( ktop, incol ) - 1, nv + jlen = min( nv, max( ktop, incol )-jrow ) + call stdlib_sgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( & + k1, k1 ),ldu, zero, wv, ldwv ) + call stdlib_slacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) + + end do + ! ==== z multiply (also vertical) ==== + if( wantz ) then + do jrow = iloz, ihiz, nv + jlen = min( nv, ihiz-jrow+1 ) + call stdlib_sgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(& + k1, k1 ),ldu, zero, wv, ldwv ) + call stdlib_slacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) + + end do + end if + end if + end do loop_180 + end subroutine stdlib_slaqr5 + + !> SLAQTR: solves the real quasi-triangular system + !> op(T)*p = scale*c, if LREAL = .TRUE. + !> or the complex quasi-triangular systems + !> op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. + !> in real arithmetic, where T is upper quasi-triangular. + !> If LREAL = .FALSE., then the first diagonal block of T must be + !> 1 by 1, B is the specially structured matrix + !> B = [ b(1) b(2) ... b(n) ] + !> [ w ] + !> [ w ] + !> [ . ] + !> [ w ] + !> op(A) = A or A**T, A**T denotes the transpose of + !> matrix A. + !> On input, X = [ c ]. On output, X = [ p ]. + !> [ d ] [ q ] + !> This subroutine is designed for the condition number estimation + !> in routine STRSNA. + + subroutine stdlib_slaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: lreal, ltran + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldt, n + real(sp), intent(out) :: scale + real(sp), intent(in) :: w + ! Array Arguments + real(sp), intent(in) :: b(*), t(ldt,*) + real(sp), intent(out) :: work(*) + real(sp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ierr, j, j1, j2, jnext, k, n1, n2 + real(sp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, & + xnorm, z + ! Local Arrays + real(sp) :: d(2,2), v(2,2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! do not test the input parameters for errors + notran = .not.ltran + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + bignum = one / smlnum + xnorm = stdlib_slange( 'M', n, n, t, ldt, d ) + if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib_slange( 'M', n, 1, b, n, d ) ) + + smin = max( smlnum, eps*xnorm ) + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + work( 1 ) = zero + do j = 2, n + work( j ) = stdlib_sasum( j-1, t( 1, j ), 1 ) + end do + if( .not.lreal ) then + do i = 2, n + work( i ) = work( i ) + abs( b( i ) ) + end do + end if + n2 = 2*n + n1 = n + if( .not.lreal )n1 = n2 + k = stdlib_isamax( n1, x, 1 ) + xmax = abs( x( k ) ) + scale = one + if( xmax>bignum ) then + scale = bignum / xmax + call stdlib_sscal( n1, scale, x, 1 ) + xmax = bignum + end if + if( lreal ) then + if( notran ) then + ! solve t*p = scale*c + jnext = n + loop_30: do j = n, 1, -1 + if( j>jnext )cycle loop_30 + j1 = j + j2 = j + jnext = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnext = j - 2 + end if + end if + if( j1==j2 ) then + ! meet 1 by 1 diagonal block + ! scale to avoid overflow when computing + ! x(j) = b(j)/t(j,j) + xj = abs( x( j1 ) ) + tjj = abs( t( j1, j1 ) ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) / tmp + xj = abs( x( j1 ) ) + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j1 of t. + if( xj>one ) then + rec = one / xj + if( work( j1 )>( bignum-xmax )*rec ) then + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + end if + end if + if( j1>1 ) then + call stdlib_saxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + k = stdlib_isamax( j1-1, x, 1 ) + xmax = abs( x( k ) ) + end if + else + ! meet 2 by 2 diagonal block + ! call 2 by 2 linear system solve, to take + ! care of possible overflow by scaling factor. + d( 1, 1 ) = x( j1 ) + d( 2, 1 ) = x( j2 ) + call stdlib_slaln2( .false., 2, 1, smin, one, t( j1, j1 ),ldt, one, one, d,& + 2, zero, zero, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_sscal( n, scaloc, x, 1 ) + scale = scale*scaloc + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2)) + ! to avoid overflow in updating right-hand side. + xj = max( abs( v( 1, 1 ) ), abs( v( 2, 1 ) ) ) + if( xj>one ) then + rec = one / xj + if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + end if + end if + ! update right-hand side + if( j1>1 ) then + call stdlib_saxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_saxpy( j1-1, -x( j2 ), t( 1, j2 ), 1, x, 1 ) + k = stdlib_isamax( j1-1, x, 1 ) + xmax = abs( x( k ) ) + end if + end if + end do loop_30 + else + ! solve t**t*p = scale*c + jnext = 1 + loop_40: do j = 1, n + if( jone ) then + rec = one / xmax + if( work( j1 )>( bignum-xj )*rec ) then + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) - stdlib_sdot( j1-1, t( 1, j1 ), 1, x, 1 ) + xj = abs( x( j1 ) ) + tjj = abs( t( j1, j1 ) ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) / tmp + xmax = max( xmax, abs( x( j1 ) ) ) + else + ! 2 by 2 diagonal block + ! scale if necessary to avoid overflow in forming the + ! right-hand side elements by inner product. + xj = max( abs( x( j1 ) ), abs( x( j2 ) ) ) + if( xmax>one ) then + rec = one / xmax + if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then + call stdlib_sscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + d( 1, 1 ) = x( j1 ) - stdlib_sdot( j1-1, t( 1, j1 ), 1, x,1 ) + d( 2, 1 ) = x( j2 ) - stdlib_sdot( j1-1, t( 1, j2 ), 1, x,1 ) + call stdlib_slaln2( .true., 2, 1, smin, one, t( j1, j1 ),ldt, one, one, d, & + 2, zero, zero, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_sscal( n, scaloc, x, 1 ) + scale = scale*scaloc + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax ) + end if + end do loop_40 + end if + else + sminw = max( eps*abs( w ), smin ) + if( notran ) then + ! solve (t + ib)*(p+iq) = c+id + jnext = n + loop_70: do j = n, 1, -1 + if( j>jnext )cycle loop_70 + j1 = j + j2 = j + jnext = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnext = j - 2 + end if + end if + if( j1==j2 ) then + ! 1 by 1 diagonal block + ! scale if necessary to avoid overflow in division + z = w + if( j1==1 )z = b( 1 ) + xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) + tjj = abs( t( j1, j1 ) ) + abs( z ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_sscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + call stdlib_sladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si ) + x( j1 ) = sr + x( n+j1 ) = si + xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j1 of t. + if( xj>one ) then + rec = one / xj + if( work( j1 )>( bignum-xmax )*rec ) then + call stdlib_sscal( n2, rec, x, 1 ) + scale = scale*rec + end if + end if + if( j1>1 ) then + call stdlib_saxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_saxpy( j1-1, -x( n+j1 ), t( 1, j1 ), 1,x( n+1 ), 1 ) + x( 1 ) = x( 1 ) + b( j1 )*x( n+j1 ) + x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) + xmax = zero + do k = 1, j1 - 1 + xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) ) + end do + end if + else + ! meet 2 by 2 diagonal block + d( 1, 1 ) = x( j1 ) + d( 2, 1 ) = x( j2 ) + d( 1, 2 ) = x( n+j1 ) + d( 2, 2 ) = x( n+j2 ) + call stdlib_slaln2( .false., 2, 2, sminw, one, t( j1, j1 ),ldt, one, one, & + d, 2, zero, -w, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_sscal( 2*n, scaloc, x, 1 ) + scale = scaloc*scale + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + x( n+j1 ) = v( 1, 2 ) + x( n+j2 ) = v( 2, 2 ) + ! scale x(j1), .... to avoid overflow in + ! updating right hand side. + xj = max( abs( v( 1, 1 ) )+abs( v( 1, 2 ) ),abs( v( 2, 1 ) )+abs( v( 2, 2 )& + ) ) + if( xj>one ) then + rec = one / xj + if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then + call stdlib_sscal( n2, rec, x, 1 ) + scale = scale*rec + end if + end if + ! update the right-hand side. + if( j1>1 ) then + call stdlib_saxpy( j1-1, -x( j1 ), t( 1, j1 ), 1, x, 1 ) + call stdlib_saxpy( j1-1, -x( j2 ), t( 1, j2 ), 1, x, 1 ) + call stdlib_saxpy( j1-1, -x( n+j1 ), t( 1, j1 ), 1,x( n+1 ), 1 ) + call stdlib_saxpy( j1-1, -x( n+j2 ), t( 1, j2 ), 1,x( n+1 ), 1 ) + x( 1 ) = x( 1 ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 ) + x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 ) + xmax = zero + do k = 1, j1 - 1 + xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax ) + end do + end if + end if + end do loop_70 + else + ! solve (t + ib)**t*(p+iq) = c+id + jnext = 1 + loop_80: do j = 1, n + if( jone ) then + rec = one / xmax + if( work( j1 )>( bignum-xj )*rec ) then + call stdlib_sscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j1 ) = x( j1 ) - stdlib_sdot( j1-1, t( 1, j1 ), 1, x, 1 ) + x( n+j1 ) = x( n+j1 ) - stdlib_sdot( j1-1, t( 1, j1 ), 1,x( n+1 ), 1 ) + + if( j1>1 ) then + x( j1 ) = x( j1 ) - b( j1 )*x( n+1 ) + x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1 ) + end if + xj = abs( x( j1 ) ) + abs( x( j1+n ) ) + z = w + if( j1==1 )z = b( 1 ) + ! scale if necessary to avoid overflow in + ! complex division + tjj = abs( t( j1, j1 ) ) + abs( z ) + tmp = t( j1, j1 ) + if( tjjbignum*tjj ) then + rec = one / xj + call stdlib_sscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + call stdlib_sladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si ) + x( j1 ) = sr + x( j1+n ) = si + xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax ) + else + ! 2 by 2 diagonal block + ! scale if necessary to avoid overflow in forming the + ! right-hand side element by inner product. + xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) ) + + if( xmax>one ) then + rec = one / xmax + if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then + call stdlib_sscal( n2, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + d( 1, 1 ) = x( j1 ) - stdlib_sdot( j1-1, t( 1, j1 ), 1, x,1 ) + d( 2, 1 ) = x( j2 ) - stdlib_sdot( j1-1, t( 1, j2 ), 1, x,1 ) + d( 1, 2 ) = x( n+j1 ) - stdlib_sdot( j1-1, t( 1, j1 ), 1,x( n+1 ), 1 ) + + d( 2, 2 ) = x( n+j2 ) - stdlib_sdot( j1-1, t( 1, j2 ), 1,x( n+1 ), 1 ) + + d( 1, 1 ) = d( 1, 1 ) - b( j1 )*x( n+1 ) + d( 2, 1 ) = d( 2, 1 ) - b( j2 )*x( n+1 ) + d( 1, 2 ) = d( 1, 2 ) + b( j1 )*x( 1 ) + d( 2, 2 ) = d( 2, 2 ) + b( j2 )*x( 1 ) + call stdlib_slaln2( .true., 2, 2, sminw, one, t( j1, j1 ),ldt, one, one, d,& + 2, zero, w, v, 2,scaloc, xnorm, ierr ) + if( ierr/=0 )info = 2 + if( scaloc/=one ) then + call stdlib_sscal( n2, scaloc, x, 1 ) + scale = scaloc*scale + end if + x( j1 ) = v( 1, 1 ) + x( j2 ) = v( 2, 1 ) + x( n+j1 ) = v( 1, 2 ) + x( n+j2 ) = v( 2, 2 ) + xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )& + , xmax ) + end if + end do loop_80 + end if + end if + return + end subroutine stdlib_slaqtr + + !> SLASD3: finds all the square roots of the roots of the secular + !> equation, as defined by the values in D and Z. It makes the + !> appropriate calls to SLASD4 and then updates the singular + !> vectors by matrix multiplication. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> SLASD3 is called from SLASD1. + + pure subroutine stdlib_slasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,& + vt2, ldvt2, idxc, ctot, z,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre + ! Array Arguments + integer(ilp), intent(in) :: ctot(*), idxc(*) + real(sp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*) + real(sp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*) + real(sp), intent(in) :: u2(ldu2,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1 + real(sp) :: rho, temp + ! Intrinsic Functions + intrinsic :: abs,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre/=1 ) .and. ( sqre/=0 ) ) then + info = -3 + end if + n = nl + nr + 1 + m = n + sqre + nlp1 = nl + 1 + nlp2 = nl + 2 + if( ( k<1 ) .or. ( k>n ) ) then + info = -4 + else if( ldqzero ) then + call stdlib_scopy( n, u2( 1, 1 ), 1, u( 1, 1 ), 1 ) + else + do i = 1, n + u( i, 1 ) = -u2( i, 1 ) + end do + end if + return + end if + ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can + ! be computed with high relative accuracy (barring over/underflow). + ! this is a problem on machines without a guard digit in + ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). + ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), + ! which on any of these machines zeros out the bottommost + ! bit of dsigma(i) if it is 1; this makes the subsequent + ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation + ! occurs. on binary machines with a guard digit (almost all + ! machines) it does not change dsigma(i) at all. on hexadecimal + ! and decimal machines with a guard digit, it slightly + ! changes the bottommost bits of dsigma(i). it does not account + ! for hexadecimal or decimal machines without guard digits + ! (we know of none). we use a subroutine call to compute + ! 2*dsigma(i) to prevent optimizing compilers from eliminating + ! this code. + do i = 1, k + dsigma( i ) = stdlib_slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) + end do + ! keep a copy of z. + call stdlib_scopy( k, z, 1, q, 1 ) + ! normalize z. + rho = stdlib_snrm2( k, z, 1 ) + call stdlib_slascl( 'G', 0, 0, rho, one, k, 1, z, k, info ) + rho = rho*rho + ! find the new singular values. + do j = 1, k + call stdlib_slasd4( k, j, dsigma, z, u( 1, j ), rho, d( j ),vt( 1, j ), info ) + + ! if the zero finder fails, report the convergence failure. + if( info/=0 ) then + return + end if + end do + ! compute updated z. + do i = 1, k + z( i ) = u( i, k )*vt( i, k ) + do j = 1, i - 1 + z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i & + )+dsigma( j ) ) ) + end do + do j = i, k - 1 + z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( & + i )+dsigma( j+1 ) ) ) + end do + z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1 ) ) + end do + ! compute left singular vectors of the modified diagonal matrix, + ! and store related information for the right singular vectors. + do i = 1, k + vt( 1, i ) = z( 1 ) / u( 1, i ) / vt( 1, i ) + u( 1, i ) = negone + do j = 2, k + vt( j, i ) = z( j ) / u( j, i ) / vt( j, i ) + u( j, i ) = dsigma( j )*vt( j, i ) + end do + temp = stdlib_snrm2( k, u( 1, i ), 1 ) + q( 1, i ) = u( 1, i ) / temp + do j = 2, k + jc = idxc( j ) + q( j, i ) = u( jc, i ) / temp + end do + end do + ! update the left singular vector matrix. + if( k==2 ) then + call stdlib_sgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu ) + go to 100 + end if + if( ctot( 1 )>0 ) then + call stdlib_sgemm( 'N', 'N', nl, k, ctot( 1 ), one, u2( 1, 2 ), ldu2,q( 2, 1 ), ldq,& + zero, u( 1, 1 ), ldu ) + if( ctot( 3 )>0 ) then + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + call stdlib_sgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),ldu2, q( & + ktemp, 1 ), ldq, one, u( 1, 1 ), ldu ) + end if + else if( ctot( 3 )>0 ) then + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + call stdlib_sgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),ldu2, q( ktemp, & + 1 ), ldq, zero, u( 1, 1 ), ldu ) + else + call stdlib_slacpy( 'F', nl, k, u2, ldu2, u, ldu ) + end if + call stdlib_scopy( k, q( 1, 1 ), ldq, u( nlp1, 1 ), ldu ) + ktemp = 2 + ctot( 1 ) + ctemp = ctot( 2 ) + ctot( 3 ) + call stdlib_sgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1 ), & + ldq, zero, u( nlp2, 1 ), ldu ) + ! generate the right singular vectors. + 100 continue + do i = 1, k + temp = stdlib_snrm2( k, vt( 1, i ), 1 ) + q( i, 1 ) = vt( 1, i ) / temp + do j = 2, k + jc = idxc( j ) + q( i, j ) = vt( jc, i ) / temp + end do + end do + ! update the right singular vector matrix. + if( k==2 ) then + call stdlib_sgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt ) + + return + end if + ktemp = 1 + ctot( 1 ) + call stdlib_sgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1, 1 ), ldq,vt2( 1, 1 ), ldvt2, & + zero, vt( 1, 1 ), ldvt ) + ktemp = 2 + ctot( 1 ) + ctot( 2 ) + if( ktemp<=ldvt2 )call stdlib_sgemm( 'N', 'N', k, nlp1, ctot( 3 ), one, q( 1, ktemp ),& + ldq, vt2( ktemp, 1 ), ldvt2, one, vt( 1, 1 ),ldvt ) + ktemp = ctot( 1 ) + 1 + nrp1 = nr + sqre + if( ktemp>1 ) then + do i = 1, k + q( i, ktemp ) = q( i, 1 ) + end do + do i = nlp2, m + vt2( ktemp, i ) = vt2( 1, i ) + end do + end if + ctemp = 1 + ctot( 2 ) + ctot( 3 ) + call stdlib_sgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1, ktemp ), ldq,vt2( ktemp, nlp2 )& + , ldvt2, zero, vt( 1, nlp2 ), ldvt ) + return + end subroutine stdlib_slasd3 + + !> SLASD6: computes the SVD of an updated upper bidiagonal matrix B + !> obtained by merging two smaller ones by appending a row. This + !> routine is used only for the problem which requires all singular + !> values and optionally singular vector matrices in factored form. + !> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + !> A related subroutine, SLASD1, handles the case in which all singular + !> values and singular vectors of the bidiagonal matrix are desired. + !> SLASD6 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The singular values of B can be computed using D1, D2, the first + !> components of all the right singular vectors of the lower block, and + !> the last components of all the right singular vectors of the upper + !> block. These components are stored and updated in VF and VL, + !> respectively, in SLASD6. Hence U and VT are not explicitly + !> referenced. + !> The singular values are stored in D. The algorithm consists of two + !> stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or if there is a zero + !> in the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine SLASD7. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the roots of the + !> secular equation via the routine SLASD4 (as called by SLASD8). + !> This routine also updates VF and VL and computes the distances + !> between the updated singular values and the old singular + !> values. + !> SLASD6 is called from SLASDA. + + pure subroutine stdlib_slasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & + givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: givptr, info, k + integer(ilp), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre + real(sp), intent(inout) :: alpha, beta + real(sp), intent(out) :: c, s + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*) + integer(ilp), intent(inout) :: idxq(*) + real(sp), intent(inout) :: d(*), vf(*), vl(*) + real(sp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), & + z(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2 + real(sp) :: orgnrm + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + m = n + sqre + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldgcolorgnrm ) then + orgnrm = abs( d( i ) ) + end if + end do + call stdlib_slascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + alpha = alpha / orgnrm + beta = beta / orgnrm + ! sort and deflate singular values. + call stdlib_slasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, & + work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, & + givptr, givcol, ldgcol, givnum, ldgnum, c, s,info ) + ! solve secular equation, compute difl, difr, and update vf, vl. + call stdlib_slasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( & + iw ), info ) + ! report the possible convergence failure. + if( info/=0 ) then + return + end if + ! save the poles if icompq = 1. + if( icompq==1 ) then + call stdlib_scopy( k, d, 1, poles( 1, 1 ), 1 ) + call stdlib_scopy( k, work( isigma ), 1, poles( 1, 2 ), 1 ) + end if + ! unscale. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + ! prepare the idxq sorting permutation. + n1 = k + n2 = n - k + call stdlib_slamrg( n1, n2, d, 1, -1, idxq ) + return + end subroutine stdlib_slasd6 + + !> SOPGTR: generates a real orthogonal matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> SSPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_sopgtr( uplo, n, ap, tau, q, ldq, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, n + ! Array Arguments + real(sp), intent(in) :: ap(*), tau(*) + real(sp), intent(out) :: q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, ij, j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldq1 ) then + ! generate q(2:n,2:n) + call stdlib_sorg2r( n-1, n-1, n-1, q( 2, 2 ), ldq, tau, work,iinfo ) + end if + end if + return + end subroutine stdlib_sopgtr + + !> SOPMTR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by SSPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_sopmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, m, n + ! Array Arguments + real(sp), intent(inout) :: ap(*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: forwrd, left, notran, upper + integer(ilp) :: i, i1, i2, i3, ic, ii, jc, mi, ni, nq + real(sp) :: aii + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldc SORBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines SORBDB2, SORBDB3, and SORBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_sorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + real(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + real(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < q .or. m-p < q ) then + info = -2 + else if( q < 0 .or. m-q < q ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-2 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SORBDB1', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., q of x11 and x21 + do i = 1, q + call stdlib_slarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + call stdlib_slarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + theta(i) = atan2( x21(i,i), x11(i,i) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i) = one + x21(i,i) = one + call stdlib_slarf( 'L', p-i+1, q-i, x11(i,i), 1, taup1(i), x11(i,i+1),ldx11, work(& + ilarf) ) + call stdlib_slarf( 'L', m-p-i+1, q-i, x21(i,i), 1, taup2(i),x21(i,i+1), ldx21, work(& + ilarf) ) + if( i < q ) then + call stdlib_srot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c, s ) + call stdlib_slarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) + s = x21(i,i+1) + x21(i,i+1) = one + call stdlib_slarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & + ldx11, work(ilarf) ) + call stdlib_slarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & + ldx21, work(ilarf) ) + c = sqrt( stdlib_snrm2( p-i, x11(i+1,i+1), 1 )**2+ stdlib_snrm2( m-p-i, x21(i+1,& + i+1), 1 )**2 ) + phi(i) = atan2( s, c ) + call stdlib_sorbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1,x21(i+1,i+1), 1, x11(i+1,& + i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) + end if + end do + return + end subroutine stdlib_sorbdb1 + + !> SORBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines SORBDB1, SORBDB3, and SORBDB4 handle cases in + !> which P is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_sorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + real(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + real(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < 0 .or. p > m-p ) then + info = -2 + else if( q < 0 .or. q < p .or. m-q < p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SORBDB2', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., p of x11 and x21 + do i = 1, p + if( i > 1 ) then + call stdlib_srot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c, s ) + end if + call stdlib_slarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + c = x11(i,i) + x11(i,i) = one + call stdlib_slarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_slarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & + work(ilarf) ) + s = sqrt( stdlib_snrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_snrm2( m-p-i+1, x21(i,i), 1 & + )**2 ) + theta(i) = atan2( s, c ) + call stdlib_sorbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1, x21(i,i), 1,x11(i+1,i+1), & + ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_sscal( p-i, negone, x11(i+1,i), 1 ) + call stdlib_slarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + if( i < p ) then + call stdlib_slarfgp( p-i, x11(i+1,i), x11(i+2,i), 1, taup1(i) ) + phi(i) = atan2( x11(i+1,i), x21(i,i) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x11(i+1,i) = one + call stdlib_slarf( 'L', p-i, q-i, x11(i+1,i), 1, taup1(i),x11(i+1,i+1), ldx11, & + work(ilarf) ) + end if + x21(i,i) = one + call stdlib_slarf( 'L', m-p-i+1, q-i, x21(i,i), 1, taup2(i),x21(i,i+1), ldx21, work(& + ilarf) ) + end do + ! reduce the bottom-right portion of x21 to the identity matrix + do i = p + 1, q + call stdlib_slarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + x21(i,i) = one + call stdlib_slarf( 'L', m-p-i+1, q-i, x21(i,i), 1, taup2(i),x21(i,i+1), ldx21, work(& + ilarf) ) + end do + return + end subroutine stdlib_sorbdb2 + + !> SORBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines SORBDB1, SORBDB2, and SORBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_sorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + real(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + real(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( 2*p < m .or. p > m ) then + info = -2 + else if( q < m-p .or. m-q < m-p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SORBDB3', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., m-p of x11 and x21 + do i = 1, m-p + if( i > 1 ) then + call stdlib_srot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c, s ) + end if + call stdlib_slarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + s = x21(i,i) + x21(i,i) = one + call stdlib_slarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_slarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + c = sqrt( stdlib_snrm2( p-i+1, x11(i,i), 1 )**2+ stdlib_snrm2( m-p-i, x21(i+1,i), 1 & + )**2 ) + theta(i) = atan2( s, c ) + call stdlib_sorbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1, x21(i+1,i), 1,x11(i,i+1), & + ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_slarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + if( i < m-p ) then + call stdlib_slarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1, taup2(i) ) + phi(i) = atan2( x21(i+1,i), x11(i,i) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x21(i+1,i) = one + call stdlib_slarf( 'L', m-p-i, q-i, x21(i+1,i), 1, taup2(i),x21(i+1,i+1), ldx21, & + work(ilarf) ) + end if + x11(i,i) = one + call stdlib_slarf( 'L', p-i+1, q-i, x11(i,i), 1, taup1(i), x11(i,i+1),ldx11, work(& + ilarf) ) + end do + ! reduce the bottom-right portion of x11 to the identity matrix + do i = m-p + 1, q + call stdlib_slarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + x11(i,i) = one + call stdlib_slarf( 'L', p-i+1, q-i, x11(i,i), 1, taup1(i), x11(i,i+1),ldx11, work(& + ilarf) ) + end do + return + end subroutine stdlib_sorbdb3 + + !> SORBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines SORBDB1, SORBDB2, and SORBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_sorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + phantom, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(sp), intent(out) :: phi(*), theta(*) + real(sp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) + real(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(sp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < m-q .or. m-p < m-q ) then + info = -2 + else if( q < m-q .or. q > m ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( q-1, p-1, m-p-1 ) + iorbdb5 = 2 + lorbdb5 = q + lworkopt = ilarf + llarf - 1 + lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SORBDB4', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., m-q of x11 and x21 + do i = 1, m-q + if( i == 1 ) then + do j = 1, m + phantom(j) = zero + end do + call stdlib_sorbdb5( p, m-p, q, phantom(1), 1, phantom(p+1), 1,x11, ldx11, x21, & + ldx21, work(iorbdb5),lorbdb5, childinfo ) + call stdlib_sscal( p, negone, phantom(1), 1 ) + call stdlib_slarfgp( p, phantom(1), phantom(2), 1, taup1(1) ) + call stdlib_slarfgp( m-p, phantom(p+1), phantom(p+2), 1, taup2(1) ) + theta(i) = atan2( phantom(1), phantom(p+1) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + phantom(1) = one + phantom(p+1) = one + call stdlib_slarf( 'L', p, q, phantom(1), 1, taup1(1), x11, ldx11,work(ilarf) ) + + call stdlib_slarf( 'L', m-p, q, phantom(p+1), 1, taup2(1), x21,ldx21, work(ilarf)& + ) + else + call stdlib_sorbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1,x21(i,i-1), 1, x11(i,i)& + , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) + call stdlib_sscal( p-i+1, negone, x11(i,i-1), 1 ) + call stdlib_slarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1, taup1(i) ) + call stdlib_slarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1,taup2(i) ) + theta(i) = atan2( x11(i,i-1), x21(i,i-1) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i-1) = one + x21(i,i-1) = one + call stdlib_slarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1, taup1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_slarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1, taup2(i),x21(i,i), ldx21, & + work(ilarf) ) + end if + call stdlib_srot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) + call stdlib_slarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + c = x21(i,i) + x21(i,i) = one + call stdlib_slarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_slarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + if( i < m-q ) then + s = sqrt( stdlib_snrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_snrm2( m-p-i, x21(i+1,i),& + 1 )**2 ) + phi(i) = atan2( s, c ) + end if + end do + ! reduce the bottom-right portion of x11 to [ i 0 ] + do i = m - q + 1, p + call stdlib_slarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + x11(i,i) = one + call stdlib_slarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_slarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & + work(ilarf) ) + end do + ! reduce the bottom-right portion of x21 to [ 0 i ] + do i = p + 1, q + call stdlib_slarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) + + x21(m-q+i-p,i) = one + call stdlib_slarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& + , ldx21, work(ilarf) ) + end do + return + end subroutine stdlib_sorbdb4 + + !> SORCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The orthogonal matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + + subroutine stdlib_sorcsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q + ! Array Arguments + real(sp), intent(out) :: theta(*) + real(sp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) + real(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & + lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & + r + logical(lk) :: lquery, wantu1, wantu2, wantv1t + ! Local Arrays + real(sp) :: dum1(1), dum2(1,1) + ! Intrinsic Function + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + lquery = lwork == -1 + if( m < 0 ) then + info = -4 + else if( p < 0 .or. p > m ) then + info = -5 + else if( q < 0 .or. q > m ) then + info = -6 + else if( ldx11 < max( 1, p ) ) then + info = -8 + else if( ldx21 < max( 1, m-p ) ) then + info = -10 + else if( wantu1 .and. ldu1 < max( 1, p ) ) then + info = -13 + else if( wantu2 .and. ldu2 < max( 1, m - p ) ) then + info = -15 + else if( wantv1t .and. ldv1t < max( 1, q ) ) then + info = -17 + end if + r = min( p, m-p, q, m-q ) + ! compute workspace + ! work layout: + ! |-------------------------------------------------------| + ! | lworkopt (1) | + ! |-------------------------------------------------------| + ! | phi (max(1,r-1)) | + ! |-------------------------------------------------------| + ! | taup1 (max(1,p)) | b11d (r) | + ! | taup2 (max(1,m-p)) | b11e (r-1) | + ! | tauq1 (max(1,q)) | b12d (r) | + ! |-----------------------------------------| b12e (r-1) | + ! | stdlib_sorbdb work | stdlib_sorgqr work | stdlib_sorglq work | b21d (r) | + ! | | | | b21e (r-1) | + ! | | | | b22d (r) | + ! | | | | b22e (r-1) | + ! | | | | stdlib_sbbcsd work | + ! |-------------------------------------------------------| + if( info == 0 ) then + iphi = 2 + ib11d = iphi + max( 1, r-1 ) + ib11e = ib11d + max( 1, r ) + ib12d = ib11e + max( 1, r - 1 ) + ib12e = ib12d + max( 1, r ) + ib21d = ib12e + max( 1, r - 1 ) + ib21e = ib21d + max( 1, r ) + ib22d = ib21e + max( 1, r - 1 ) + ib22e = ib22d + max( 1, r ) + ibbcsd = ib22e + max( 1, r - 1 ) + itaup1 = iphi + max( 1, r-1 ) + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m-p ) + iorbdb = itauq1 + max( 1, q ) + iorgqr = itauq1 + max( 1, q ) + iorglq = itauq1 + max( 1, q ) + lorgqrmin = 1 + lorgqropt = 1 + lorglqmin = 1 + lorglqopt = 1 + if( r == q ) then + call stdlib_sorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1, work, -1,childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_sorgqr( p, p, q, u1, ldu1, dum1, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + endif + if( wantu2 .and. m-p > 0 ) then + call stdlib_sorgqr( m-p, m-p, q, u2, ldu2, dum1, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_sorglq( q-1, q-1, q-1, v1t, ldv1t,dum1, work(1), -1, childinfo ) + + lorglqmin = max( lorglqmin, q-1 ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_sbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum1, u1, & + ldu1, u2, ldu2, v1t, ldv1t, dum2,1, dum1, dum1, dum1, dum1, dum1,dum1, dum1, & + dum1, work(1), -1, childinfo) + lbbcsd = int( work(1),KIND=ilp) + else if( r == p ) then + call stdlib_sorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1, work(1), -1,childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_sorgqr( p-1, p-1, p-1, u1(2,2), ldu1, dum1,work(1), -1, childinfo & + ) + lorgqrmin = max( lorgqrmin, p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_sorgqr( m-p, m-p, q, u2, ldu2, dum1, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_sorglq( q, q, r, v1t, ldv1t, dum1, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_sbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum1, v1t, & + ldv1t, dum2, 1, u1, ldu1, u2,ldu2, dum1, dum1, dum1, dum1, dum1,dum1, dum1, dum1,& + work(1), -1, childinfo) + lbbcsd = int( work(1),KIND=ilp) + else if( r == m-p ) then + call stdlib_sorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1, work(1), -1,childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_sorgqr( p, p, q, u1, ldu1, dum1, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_sorgqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2, dum1,work(1), -1, & + childinfo ) + lorgqrmin = max( lorgqrmin, m-p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_sorglq( q, q, r, v1t, ldv1t, dum1, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_sbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum1, & + dum2, 1, v1t, ldv1t, u2, ldu2,u1, ldu1, dum1, dum1, dum1, dum1,dum1, dum1, dum1, & + dum1, work(1), -1,childinfo ) + lbbcsd = int( work(1),KIND=ilp) + else + call stdlib_sorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,dum1, dum1, dum1, & + dum1, dum1,work(1), -1, childinfo ) + lorbdb = m + int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_sorgqr( p, p, m-q, u1, ldu1, dum1, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_sorgqr( m-p, m-p, m-q, u2, ldu2, dum1, work(1),-1, childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_sorglq( q, q, q, v1t, ldv1t, dum1, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_sbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum1, u2, & + ldu2, u1, ldu1, dum2, 1,v1t, ldv1t, dum1, dum1, dum1, dum1,dum1, dum1, dum1, & + dum1, work(1), -1,childinfo ) + lbbcsd = int( work(1),KIND=ilp) + end if + lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1,ibbcsd+lbbcsd-& + 1 ) + lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1,ibbcsd+lbbcsd-& + 1 ) + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -19 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'SORCSD2BY1', -info ) + return + else if( lquery ) then + return + end if + lorgqr = lwork-iorgqr+1 + lorglq = lwork-iorglq+1 + ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, + ! in which r = min(p,m-p,q,m-q) + if( r == q ) then + ! case 1: r = q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_sorbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_slacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_sorgqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_slacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_sorgqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + v1t(1,1) = one + do j = 2, q + v1t(1,j) = zero + v1t(j,1) = zero + end do + call stdlib_slacpy( 'U', q-1, q-1, x21(1,2), ldx21, v1t(2,2),ldv1t ) + call stdlib_sorglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglq, childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_sbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,work(iphi), u1, & + ldu1, u2, ldu2, v1t, ldv1t,dum2, 1, work(ib11d), work(ib11e), work(ib12d),work(& + ib12e), work(ib21d), work(ib21e),work(ib22d), work(ib22e), work(ibbcsd), lbbcsd,& + childinfo ) + ! permute rows and columns to place zero submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_slapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == p ) then + ! case 2: r = p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_sorbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + u1(1,1) = one + do j = 2, p + u1(1,j) = zero + u1(j,1) = zero + end do + call stdlib_slacpy( 'L', p-1, p-1, x11(2,1), ldx11, u1(2,2), ldu1 ) + call stdlib_sorgqr( p-1, p-1, p-1, u1(2,2), ldu1, work(itaup1),work(iorgqr), & + lorgqr, childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_slacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_sorgqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_slacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) + call stdlib_sorglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_sbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,work(iphi), v1t, & + ldv1t, dum1, 1, u1, ldu1, u2,ldu2, work(ib11d), work(ib11e), work(ib12d),work(ib12e)& + , work(ib21d), work(ib21e),work(ib22d), work(ib22e), work(ibbcsd), lbbcsd,childinfo & + ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_slapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == m-p ) then + ! case 3: r = m-p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_sorbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_slacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_sorgqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + u2(1,1) = one + do j = 2, m-p + u2(1,j) = zero + u2(j,1) = zero + end do + call stdlib_slacpy( 'L', m-p-1, m-p-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_sorgqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2,work(itaup2), work(iorgqr)& + , lorgqr, childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_slacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) + call stdlib_sorglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_sbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, work(iphi), & + dum1, 1, v1t, ldv1t, u2,ldu2, u1, ldu1, work(ib11d), work(ib11e),work(ib12d), work(& + ib12e), work(ib21d),work(ib21e), work(ib22d), work(ib22e),work(ibbcsd), lbbcsd, & + childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > r ) then + do i = 1, r + iwork(i) = q - r + i + end do + do i = r + 1, q + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_slapmt( .false., p, q, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_slapmr( .false., q, q, v1t, ldv1t, iwork ) + end if + end if + else + ! case 4: r = m-q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_sorbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,work(iphi), work(itaup1)& + , work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, childinfo ) + + ! accumulate householder reflectors + if( wantu2 .and. m-p > 0 ) then + call stdlib_scopy( m-p, work(iorbdb+p), 1, u2, 1 ) + end if + if( wantu1 .and. p > 0 ) then + call stdlib_scopy( p, work(iorbdb), 1, u1, 1 ) + do j = 2, p + u1(1,j) = zero + end do + call stdlib_slacpy( 'L', p-1, m-q-1, x11(2,1), ldx11, u1(2,2),ldu1 ) + call stdlib_sorgqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + do j = 2, m-p + u2(1,j) = zero + end do + call stdlib_slacpy( 'L', m-p-1, m-q-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_sorgqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_slacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) + call stdlib_slacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& + 1), ldv1t ) + call stdlib_slacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) + + call stdlib_sorglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_sbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, work(iphi), & + u2, ldu2, u1, ldu1, dum1, 1,v1t, ldv1t, work(ib11d), work(ib11e), work(ib12d),work(& + ib12e), work(ib21d), work(ib21e),work(ib22d), work(ib22e), work(ibbcsd), lbbcsd,& + childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( p > r ) then + do i = 1, r + iwork(i) = p - r + i + end do + do i = r + 1, p + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_slapmt( .false., p, p, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_slapmr( .false., p, q, v1t, ldv1t, iwork ) + end if + end if + end if + return + end subroutine stdlib_sorcsd2by1 + + !> SORGTR: generates a real orthogonal matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> SSYTRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_sorgtr( uplo, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, j, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + ! generate q(2:n,2:n) + call stdlib_sorgqr( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sorgtr + + !> SORGTSQR: generates an M-by-N real matrix Q_out with orthonormal columns, + !> which are the first N columns of a product of real orthogonal + !> matrices of order M which are returned by SLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for SLATSQR. + + pure subroutine stdlib_sorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + lquery = lwork==-1 + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. m SORMTR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by SSYTRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_sormtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery, upper + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, ni, nb, nq, nw + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )& + then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda SPBTRF: computes the Cholesky factorization of a real symmetric + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_spbtrf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 32 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ib, ii, j, jj, nb + ! Local Arrays + real(sp) :: work(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( .not.stdlib_lsame( uplo, 'U' ) ) .and.( .not.stdlib_lsame( uplo, 'L' ) ) ) & + then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldabkd ) then + ! use unblocked code + call stdlib_spbtf2( uplo, n, kd, ab, ldab, info ) + else + ! use blocked code + if( stdlib_lsame( uplo, 'U' ) ) then + ! compute the cholesky factorization of a symmetric band + ! matrix, given the upper triangle of the matrix in band + ! storage. + ! zero the upper triangle of the work array. + do j = 1, nb + do i = 1, j - 1 + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_70: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_spotf2( uplo, ib, ab( kd+1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 a12 a13 + ! a22 a23 + ! a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a12, a22 and + ! a23 are empty if ib = kd. the upper triangle of a13 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a12 + call stdlib_strsm( 'LEFT', 'UPPER', 'TRANSPOSE','NON-UNIT', ib, i2, one,& + ab( kd+1, i ),ldab-1, ab( kd+1-ib, i+ib ), ldab-1 ) + ! update a22 + call stdlib_ssyrk( 'UPPER', 'TRANSPOSE', i2, ib, -one,ab( kd+1-ib, i+ib & + ), ldab-1, one,ab( kd+1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the lower triangle of a13 into the work array. + do jj = 1, i3 + do ii = jj, ib + work( ii, jj ) = ab( ii-jj+1, jj+i+kd-1 ) + end do + end do + ! update a13 (in the work array). + call stdlib_strsm( 'LEFT', 'UPPER', 'TRANSPOSE','NON-UNIT', ib, i3, one,& + ab( kd+1, i ),ldab-1, work, ldwork ) + ! update a23 + if( i2>0 )call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', i2, i3,ib, -& + one, ab( kd+1-ib, i+ib ),ldab-1, work, ldwork, one,ab( 1+ib, i+kd ), & + ldab-1 ) + ! update a33 + call stdlib_ssyrk( 'UPPER', 'TRANSPOSE', i3, ib, -one,work, ldwork, one,& + ab( kd+1, i+kd ),ldab-1 ) + ! copy the lower triangle of a13 back into place. + do jj = 1, i3 + do ii = jj, ib + ab( ii-jj+1, jj+i+kd-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_70 + else + ! compute the cholesky factorization of a symmetric band + ! matrix, given the lower triangle of the matrix in band + ! storage. + ! zero the lower triangle of the work array. + do j = 1, nb + do i = j + 1, nb + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_140: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_spotf2( uplo, ib, ab( 1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 + ! a21 a22 + ! a31 a32 a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a21, a22 and + ! a32 are empty if ib = kd. the lower triangle of a31 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a21 + call stdlib_strsm( 'RIGHT', 'LOWER', 'TRANSPOSE','NON-UNIT', i2, ib, & + one, ab( 1, i ),ldab-1, ab( 1+ib, i ), ldab-1 ) + ! update a22 + call stdlib_ssyrk( 'LOWER', 'NO TRANSPOSE', i2, ib, -one,ab( 1+ib, i ), & + ldab-1, one,ab( 1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the upper triangle of a31 into the work array. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + work( ii, jj ) = ab( kd+1-jj+ii, jj+i-1 ) + end do + end do + ! update a31 (in the work array). + call stdlib_strsm( 'RIGHT', 'LOWER', 'TRANSPOSE','NON-UNIT', i3, ib, & + one, ab( 1, i ),ldab-1, work, ldwork ) + ! update a32 + if( i2>0 )call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', i3, i2,ib, -& + one, work, ldwork,ab( 1+ib, i ), ldab-1, one,ab( 1+kd-ib, i+ib ), ldab-& + 1 ) + ! update a33 + call stdlib_ssyrk( 'LOWER', 'NO TRANSPOSE', i3, ib, -one,work, ldwork, & + one, ab( 1, i+kd ),ldab-1 ) + ! copy the upper triangle of a31 back into place. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + ab( kd+1-jj+ii, jj+i-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_140 + end if + end if + return + 150 continue + return + end subroutine stdlib_spbtrf + + !> SPFTRI: computes the inverse of a real (symmetric) positive definite + !> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T + !> computed by SPFTRF. + + pure subroutine stdlib_spftri( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SPFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_stftri( transr, uplo, 'N', n, a, info ) + if( info>0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution of triangular matrix multiply: inv(u)*inv(u)^c or + ! inv(l)^c*inv(l). there are eight cases. + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_slauum( 'L', n1, a( 0 ), n, info ) + call stdlib_ssyrk( 'L', 'T', n1, n2, one, a( n1 ), n, one,a( 0 ), n ) + call stdlib_strmm( 'L', 'U', 'N', 'N', n2, n1, one, a( n ), n,a( n1 ), n ) + + call stdlib_slauum( 'U', n2, a( n ), n, info ) + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_slauum( 'L', n1, a( n2 ), n, info ) + call stdlib_ssyrk( 'L', 'N', n1, n2, one, a( 0 ), n, one,a( n2 ), n ) + call stdlib_strmm( 'R', 'U', 'T', 'N', n1, n2, one, a( n1 ), n,a( 0 ), n ) + + call stdlib_slauum( 'U', n2, a( n1 ), n, info ) + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose, and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_slauum( 'U', n1, a( 0 ), n1, info ) + call stdlib_ssyrk( 'U', 'N', n1, n2, one, a( n1*n1 ), n1, one,a( 0 ), n1 ) + + call stdlib_strmm( 'R', 'L', 'N', 'N', n1, n2, one, a( 1 ), n1,a( n1*n1 ), n1 & + ) + call stdlib_slauum( 'L', n2, a( 1 ), n1, info ) + else + ! srpa for upper, transpose, and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_slauum( 'U', n1, a( n2*n2 ), n2, info ) + call stdlib_ssyrk( 'U', 'T', n1, n2, one, a( 0 ), n2, one,a( n2*n2 ), n2 ) + + call stdlib_strmm( 'L', 'L', 'T', 'N', n2, n1, one, a( n1*n2 ),n2, a( 0 ), n2 & + ) + call stdlib_slauum( 'L', n2, a( n1*n2 ), n2, info ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_slauum( 'L', k, a( 1 ), n+1, info ) + call stdlib_ssyrk( 'L', 'T', k, k, one, a( k+1 ), n+1, one,a( 1 ), n+1 ) + + call stdlib_strmm( 'L', 'U', 'N', 'N', k, k, one, a( 0 ), n+1,a( k+1 ), n+1 ) + + call stdlib_slauum( 'U', k, a( 0 ), n+1, info ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_slauum( 'L', k, a( k+1 ), n+1, info ) + call stdlib_ssyrk( 'L', 'N', k, k, one, a( 0 ), n+1, one,a( k+1 ), n+1 ) + + call stdlib_strmm( 'R', 'U', 'T', 'N', k, k, one, a( k ), n+1,a( 0 ), n+1 ) + + call stdlib_slauum( 'U', k, a( k ), n+1, info ) + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose, and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1), + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_slauum( 'U', k, a( k ), k, info ) + call stdlib_ssyrk( 'U', 'N', k, k, one, a( k*( k+1 ) ), k, one,a( k ), k ) + + call stdlib_strmm( 'R', 'L', 'N', 'N', k, k, one, a( 0 ), k,a( k*( k+1 ) ), k & + ) + call stdlib_slauum( 'L', k, a( 0 ), k, info ) + else + ! srpa for upper, transpose, and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0), + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_slauum( 'U', k, a( k*( k+1 ) ), k, info ) + call stdlib_ssyrk( 'U', 'T', k, k, one, a( 0 ), k, one,a( k*( k+1 ) ), k ) + + call stdlib_strmm( 'L', 'L', 'T', 'N', k, k, one, a( k*k ), k,a( 0 ), k ) + + call stdlib_slauum( 'L', k, a( k*k ), k, info ) + end if + end if + end if + return + end subroutine stdlib_spftri + + !> SPOTRF: computes the Cholesky factorization of a real symmetric + !> positive definite matrix A. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_spotrf( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code. + call stdlib_spotrf2( uplo, n, a, lda, info ) + else + ! use blocked code. + if( upper ) then + ! compute the cholesky factorization a = u**t*u. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_ssyrk( 'UPPER', 'TRANSPOSE', jb, j-1, -one,a( 1, j ), lda, one, a(& + j, j ), lda ) + call stdlib_spotrf2( 'UPPER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block row. + call stdlib_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', jb, n-j-jb+1,j-1, -one, a( & + 1, j ), lda, a( 1, j+jb ),lda, one, a( j, j+jb ), lda ) + call stdlib_strsm( 'LEFT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',jb, n-j-jb+1, & + one, a( j, j ), lda,a( j, j+jb ), lda ) + end if + end do + else + ! compute the cholesky factorization a = l*l**t. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_ssyrk( 'LOWER', 'NO TRANSPOSE', jb, j-1, -one,a( j, 1 ), lda, one,& + a( j, j ), lda ) + call stdlib_spotrf2( 'LOWER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block column. + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', n-j-jb+1, jb,j-1, -one, a( & + j+jb, 1 ), lda, a( j, 1 ),lda, one, a( j+jb, j ), lda ) + call stdlib_strsm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',n-j-jb+1, jb, & + one, a( j, j ), lda,a( j+jb, j ), lda ) + end if + end do + end if + end if + go to 40 + 30 continue + info = info + j - 1 + 40 continue + return + end subroutine stdlib_spotrf + + !> SPTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric positive definite + !> and tridiagonal, and provides error bounds and backward error + !> estimates for the solution. + + pure subroutine stdlib_sptrfs( n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(sp), intent(in) :: b(ldb,*), d(*), df(*), e(*), ef(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*) + real(sp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + integer(ilp) :: count, i, ix, j, nz + real(sp) :: bi, cx, dx, eps, ex, lstres, s, safe1, safe2, safmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldbsafe2 ) then + s = max( s, abs( work( n+i ) ) / work( i ) ) + else + s = max( s, ( abs( work( n+i ) )+safe1 ) /( work( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_spttrs( n, 1, df, ef, work( n+1 ), n, info ) + call stdlib_saxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + do i = 1, n + if( work( i )>safe2 ) then + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + else + work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1 + end if + end do + ix = stdlib_isamax( n, work, 1 ) + ferr( j ) = work( ix ) + ! estimate the norm of inv(a). + ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by + ! m(i,j) = abs(a(i,j)), i = j, + ! m(i,j) = -abs(a(i,j)), i .ne. j, + ! and e = [ 1, 1, ..., 1 ]**t. note m(a) = m(l)*d*m(l)**t. + ! solve m(l) * x = e. + work( 1 ) = one + do i = 2, n + work( i ) = one + work( i-1 )*abs( ef( i-1 ) ) + end do + ! solve d * m(l)**t * x = b. + work( n ) = work( n ) / df( n ) + do i = n - 1, 1, -1 + work( i ) = work( i ) / df( i ) + work( i+1 )*abs( ef( i ) ) + end do + ! compute norm(inv(a)) = max(x(i)), 1<=i<=n. + ix = stdlib_isamax( n, work, 1 ) + ferr( j ) = ferr( j )*abs( work( ix ) ) + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_90 + return + end subroutine stdlib_sptrfs + + !> SPTSV: computes the solution to a real system of linear equations + !> A*X = B, where A is an N-by-N symmetric positive definite tridiagonal + !> matrix, and X and B are N-by-NRHS matrices. + !> A is factored as A = L*D*L**T, and the factored form of A is then + !> used to solve the system of equations. + + pure subroutine stdlib_sptsv( n, nrhs, d, e, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: b(ldb,*), d(*), e(*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb SPTSVX: uses the factorization A = L*D*L**T to compute the solution + !> to a real system of linear equations A*X = B, where A is an N-by-N + !> symmetric positive definite tridiagonal matrix and X and B are + !> N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_sptsvx( fact, n, nrhs, d, e, df, ef, b, ldb, x, ldx,rcond, ferr, berr,& + work, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + real(sp), intent(in) :: b(ldb,*), d(*), e(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + real(sp), intent(inout) :: df(*), ef(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb1 )call stdlib_scopy( n-1, e, 1, ef, 1 ) + call stdlib_spttrf( n, df, ef, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_slanst( '1', n, d, e ) + ! compute the reciprocal of the condition number of a. + call stdlib_sptcon( n, df, ef, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_spttrs( n, nrhs, df, ef, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_sptrfs( n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr, berr,work, info ) + + ! set info = n+1 if the matrix is singular to working precision. + if( rcond SSBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a real symmetric band matrix A. + + subroutine stdlib_ssbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, wantz + integer(ilp) :: iinfo, imax, inde, indwrk, iscale + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_slascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_slascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_ssbtrd to reduce symmetric band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + call stdlib_ssbtrd( jobz, uplo, n, kd, ab, ldab, w, work( inde ), z, ldz,work( indwrk )& + , iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, call stdlib_ssteqr. + if( .not.wantz ) then + call stdlib_ssterf( n, w, work( inde ), info ) + else + call stdlib_ssteqr( jobz, n, w, work( inde ), z, ldz, work( indwrk ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_ssbev + + !> SSBEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric band matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_ssbevx( jobz, range, uplo, n, kd, ab, ldab, q, ldq, vl,vu, il, iu, abstol, & + m, w, z, ldz, work, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, kd, ldab, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(out) :: q(ldq,*), w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indwrk, & + iscale, itmp1, j, jj, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=tmp1 ) )m = 0 + end if + if( m==1 ) then + w( 1 ) = tmp1 + if( wantz )z( 1, 1 ) = one + end if + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if ( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + endif + anrm = stdlib_slansb( 'M', uplo, n, kd, ab, ldab, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_slascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_slascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_ssbtrd to reduce symmetric band matrix to tridiagonal form. + indd = 1 + inde = indd + n + indwrk = inde + n + call stdlib_ssbtrd( jobz, uplo, n, kd, ab, ldab, work( indd ),work( inde ), q, ldq, & + work( indwrk ), iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_ssterf or stdlib_ssteqr. if this fails for some + ! eigenvalue, then try stdlib_sstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_scopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssterf( n, w, work( indee ), info ) + else + call stdlib_slacpy( 'A', n, n, q, ldq, z, ldz ) + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_sstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_sstein. + do j = 1, m + call stdlib_scopy( n, z( 1, j ), 1, work( 1 ), 1 ) + call stdlib_sgemv( 'N', n, n, one, q, ldq, work, 1, zero,z( 1, j ), 1 ) + end do + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) SSBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric + !> and banded, and B is also positive definite. + + pure subroutine stdlib_ssbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, n + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwrk + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab SSBGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric + !> and banded, and B is also positive definite. Eigenvalues and + !> eigenvectors can be selected by specifying either all eigenvalues, + !> a range of values or a range of indices for the desired eigenvalues. + + pure subroutine stdlib_ssbgvx( jobz, range, uplo, n, ka, kb, ab, ldab, bb,ldbb, q, ldq, vl, & + vu, il, iu, abstol, m, w, z,ldz, work, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ka, kb, ldab, ldbb, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + real(sp), intent(out) :: q(ldq,*), w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, upper, valeig, wantz + character :: order, vect + integer(ilp) :: i, iinfo, indd, inde, indee, indibl, indisp, indiwo, indwrk, itmp1, j, & + jj, nsplit + real(sp) :: tmp1 + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ka<0 ) then + info = -5 + else if( kb<0 .or. kb>ka ) then + info = -6 + else if( ldab0 .and. vu<=vl )info = -14 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -15 + else if ( iun ) then + info = -16 + end if + end if + end if + if( info==0) then + if( ldz<1 .or. ( wantz .and. ldz SSPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A in packed storage. + + subroutine stdlib_sspev( jobz, uplo, n, ap, w, z, ldz, work, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_sscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_ssptrd to reduce symmetric packed matrix to tridiagonal form. + inde = 1 + indtau = inde + n + call stdlib_ssptrd( uplo, n, ap, w, work( inde ), work( indtau ), iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_sopgtr to generate the orthogonal matrix, then call stdlib_ssteqr. + if( .not.wantz ) then + call stdlib_ssterf( n, w, work( inde ), info ) + else + indwrk = indtau + n + call stdlib_sopgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_ssteqr( jobz, n, w, work( inde ), z, ldz, work( indtau ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_sspev + + !> SSPEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A in packed storage. Eigenvalues/vectors + !> can be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_sspevx( jobz, range, uplo, n, ap, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, iwork, ifail,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indtau, & + indwrk, iscale, itmp1, j, jj, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=ap( 1 ) ) then + m = 1 + w( 1 ) = ap( 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if ( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + endif + anrm = stdlib_slansp( 'M', uplo, n, ap, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_sscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_ssptrd to reduce symmetric packed matrix to tridiagonal form. + indtau = 1 + inde = indtau + n + indd = inde + n + indwrk = indd + n + call stdlib_ssptrd( uplo, n, ap, work( indd ), work( inde ),work( indtau ), iinfo ) + + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_ssterf or stdlib_sopgtr and stdlib_ssteqr. if this fails + ! for some eigenvalue, then try stdlib_sstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_scopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssterf( n, w, work( indee ), info ) + else + call stdlib_sopgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 20 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_sstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_sstein. + call stdlib_sopmtr( 'L', uplo, 'N', n, m, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 20 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) SSPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric, stored in packed format, + !> and B is also positive definite. + + subroutine stdlib_sspgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, n + ! Array Arguments + real(sp), intent(inout) :: ap(*), bp(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: trans + integer(ilp) :: j, neig + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + do j = 1, neig + call stdlib_stpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_stpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_sspgv + + !> SSPGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A + !> and B are assumed to be symmetric, stored in packed storage, and B + !> is also positive definite. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of indices + !> for the desired eigenvalues. + + subroutine stdlib_sspgvx( itype, jobz, range, uplo, n, ap, bp, vl, vu,il, iu, abstol, m, w, & + z, ldz, work, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(inout) :: ap(*), bp(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: alleig, indeig, upper, valeig, wantz + character :: trans + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + upper = stdlib_lsame( uplo, 'U' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else + if( valeig ) then + if( n>0 .and. vu<=vl ) then + info = -9 + end if + else if( indeig ) then + if( il<1 ) then + info = -10 + else if( iun ) then + info = -11 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + do j = 1, m + call stdlib_stpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + do j = 1, m + call stdlib_stpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_sspgvx + + !> SSYEV: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. + + subroutine stdlib_ssyev( jobz, uplo, n, a, lda, w, work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale, llwork, lwkopt, nb + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_slascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_ssytrd to reduce symmetric matrix to tridiagonal form. + inde = 1 + indtau = inde + n + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_ssytrd( uplo, n, a, lda, w, work( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_sorgtr to generate the orthogonal matrix, then call stdlib_ssteqr. + if( .not.wantz ) then + call stdlib_ssterf( n, w, work( inde ), info ) + else + call stdlib_sorgtr( uplo, n, a, lda, work( indtau ), work( indwrk ),llwork, iinfo ) + + call stdlib_ssteqr( jobz, n, w, work( inde ), a, lda, work( indtau ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! set work(1) to optimal workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_ssyev + + !> SSYEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of indices + !> for the desired eigenvalues. + + subroutine stdlib_ssyevx( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, lwork, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwo, indtau, & + indwkn, indwrk, iscale, itmp1, j, jj, llwork, llwrkn, lwkmin, lwkopt, nb, & + nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=a( 1, 1 ) ) then + m = 1 + w( 1 ) = a( 1, 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + end if + anrm = stdlib_slansy( 'M', uplo, n, a, lda, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_sscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_sscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_ssytrd to reduce symmetric matrix to tridiagonal form. + indtau = 1 + inde = indtau + n + indd = inde + n + indwrk = indd + n + llwork = lwork - indwrk + 1 + call stdlib_ssytrd( uplo, n, a, lda, work( indd ), work( inde ),work( indtau ), work( & + indwrk ), llwork, iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal to + ! zero, then call stdlib_ssterf or stdlib_sorgtr and stdlib_ssteqr. if this fails for + ! some eigenvalue, then try stdlib_sstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_scopy( n, work( indd ), 1, w, 1 ) + indee = indwrk + 2*n + if( .not.wantz ) then + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssterf( n, w, work( indee ), info ) + else + call stdlib_slacpy( 'A', n, n, a, lda, z, ldz ) + call stdlib_sorgtr( uplo, n, z, ldz, work( indtau ),work( indwrk ), llwork, & + iinfo ) + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssteqr( jobz, n, w, work( indee ), z, ldz,work( indwrk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 40 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwo = indisp + n + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwrk ),iwork( indiwo ), info & + ) + if( wantz ) then + call stdlib_sstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwrk ), iwork( indiwo ), ifail, info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_sstein. + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_sormtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 40 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) SSYGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be symmetric and B is also + !> positive definite. + + subroutine stdlib_ssygv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: lwkmin, lwkopt, nb, neig + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + call stdlib_strsm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, one,b, ldb, a, lda ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + call stdlib_strmm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, one,b, ldb, a, lda ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_ssygv + + !> SSYGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A + !> and B are assumed to be symmetric and B is also positive definite. + !> Eigenvalues and eigenvectors can be selected by specifying either a + !> range of values or a range of indices for the desired eigenvalues. + + subroutine stdlib_ssygvx( itype, jobz, range, uplo, n, a, lda, b, ldb,vl, vu, il, iu, abstol,& + m, w, z, ldz, work,lwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, lda, ldb, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, upper, valeig, wantz + character :: trans + integer(ilp) :: lwkmin, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + upper = stdlib_lsame( uplo, 'U' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if (info==0) then + if (ldz<1 .or. (wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t*y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + call stdlib_strsm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, one, b,ldb, z, ldz ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t*y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + call stdlib_strmm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, one, b,ldb, z, ldz ) + + end if + end if + ! set work(1) to optimal workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_ssygvx + + !> SSYSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_ssysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda SSYSVX: uses the diagonal pivoting factorization to compute the + !> solution to a real system of linear equations A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_ssysvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*), b(ldb,*) + real(sp), intent(inout) :: af(ldaf,*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(sp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_slansy( 'I', uplo, n, a, lda, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_ssycon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, iwork,info ) + ! compute the solution vectors x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_ssytrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_ssyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, iwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond SSYTRD_SY2SB: reduces a real symmetric matrix A to real symmetric + !> band-diagonal form AB by a orthogonal similarity transformation: + !> Q**T * A * Q = AB. + + pure subroutine stdlib_ssytrd_sy2sb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldab, lwork, n, kd + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: ab(ldab,*), tau(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: rone = 1.0e+0_sp + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, j, iinfo, lwmin, pn, pk, lk, ldt, ldw, lds2, lds1, ls2, ls1, lw, lt,& + tpos, wpos, s2pos, s1pos + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + ! determine the minimal workspace size required + ! and test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + lwmin = stdlib_ilaenv2stage( 4, 'SSYTRD_SY2SB', '', n, kd, -1, -1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( lda STGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of real matrices (S,P), where S is a quasi-triangular matrix + !> and P is upper triangular. Matrix pairs of this type are produced by + !> the generalized Schur factorization of a matrix pair (A,B): + !> A = Q*S*Z**T, B = Q*P*Z**T + !> as computed by SGGHRD + SHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal blocks of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the orthogonal factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + + pure subroutine stdlib_stgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, & + mm, m, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldp, lds, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(sp), intent(in) :: p(ldp,*), s(lds,*) + real(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: safety = 1.0e+2_sp + + ! Local Scalars + logical(lk) :: compl, compr, il2by2, ilabad, ilall, ilback, ilbbad, ilcomp, ilcplx, & + lsa, lsb + integer(ilp) :: i, ibeg, ieig, iend, ihwmny, iinfo, im, iside, j, ja, jc, je, jr, jw, & + na, nw + real(sp) :: acoef, acoefa, anorm, ascale, bcoefa, bcoefi, bcoefr, big, bignum, bnorm, & + bscale, cim2a, cim2b, cimaga, cimagb, cre2a, cre2b, creala, crealb, dmin, safmin, & + salfar, sbeta, scale, small, temp, temp2, temp2i, temp2r, ulp, xmax, xscale + ! Local Arrays + real(sp) :: bdiag(2), sum(2,2), sums(2,2), sump(2,2) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! decode and test the input parameters + if( stdlib_lsame( howmny, 'A' ) ) then + ihwmny = 1 + ilall = .true. + ilback = .false. + else if( stdlib_lsame( howmny, 'S' ) ) then + ihwmny = 2 + ilall = .false. + ilback = .false. + else if( stdlib_lsame( howmny, 'B' ) ) then + ihwmny = 3 + ilall = .true. + ilback = .true. + else + ihwmny = -1 + ilall = .true. + end if + if( stdlib_lsame( side, 'R' ) ) then + iside = 1 + compl = .false. + compr = .true. + else if( stdlib_lsame( side, 'L' ) ) then + iside = 2 + compl = .true. + compr = .false. + else if( stdlib_lsame( side, 'B' ) ) then + iside = 3 + compl = .true. + compr = .true. + else + iside = -1 + end if + info = 0 + if( iside<0 ) then + info = -1 + else if( ihwmny<0 ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lds1 )anorm = anorm + abs( s( 2, 1 ) ) + bnorm = abs( p( 1, 1 ) ) + work( 1 ) = zero + work( n+1 ) = zero + do j = 2, n + temp = zero + temp2 = zero + if( s( j, j-1 )==zero ) then + iend = j - 1 + else + iend = j - 2 + end if + do i = 1, iend + temp = temp + abs( s( i, j ) ) + temp2 = temp2 + abs( p( i, j ) ) + end do + work( j ) = temp + work( n+j ) = temp2 + do i = iend + 1, min( j+1, n ) + temp = temp + abs( s( i, j ) ) + temp2 = temp2 + abs( p( i, j ) ) + end do + anorm = max( anorm, temp ) + bnorm = max( bnorm, temp2 ) + end do + ascale = one / max( anorm, safmin ) + bscale = one / max( bnorm, safmin ) + ! left eigenvectors + if( compl ) then + ieig = 0 + ! main loop over eigenvalues + ilcplx = .false. + loop_220: do je = 1, n + ! skip this iteration if (a) howmny='s' and select=.false., or + ! (b) this would be the second of a complex pair. + ! check for complex eigenvalue, so as to be sure of which + ! entry(-ies) of select to look at. + if( ilcplx ) then + ilcplx = .false. + cycle loop_220 + end if + nw = 1 + if( je=safmin .and. abs( acoef )=safmin .and. abs( bcoefr )=safmin )scale = ( safmin / ulp ) / & + acoefa + if( bcoefa*ulp=safmin )scale = max( scale, ( safmin / & + ulp ) / bcoefa ) + if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa ) + if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) ) + + if( scale/=one ) then + acoef = scale*acoef + acoefa = abs( acoef ) + bcoefr = scale*bcoefr + bcoefi = scale*bcoefi + bcoefa = abs( bcoefr ) + abs( bcoefi ) + end if + ! compute first two components of eigenvector + temp = acoef*s( je+1, je ) + temp2r = acoef*s( je, je ) - bcoefr*p( je, je ) + temp2i = -bcoefi*p( je, je ) + if( abs( temp )>abs( temp2r )+abs( temp2i ) ) then + work( 2*n+je ) = one + work( 3*n+je ) = zero + work( 2*n+je+1 ) = -temp2r / temp + work( 3*n+je+1 ) = -temp2i / temp + else + work( 2*n+je+1 ) = one + work( 3*n+je+1 ) = zero + temp = acoef*s( je, je+1 ) + work( 2*n+je ) = ( bcoefr*p( je+1, je+1 )-acoef*s( je+1, je+1 ) ) / & + temp + work( 3*n+je ) = bcoefi*p( je+1, je+1 ) / temp + end if + xmax = max( abs( work( 2*n+je ) )+abs( work( 3*n+je ) ),abs( work( 2*n+je+1 ) & + )+abs( work( 3*n+je+1 ) ) ) + end if + dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin ) + ! t + ! triangular solve of (a a - b b) y = 0 + ! t + ! (rowwise in (a a - b b) , or columnwise in (a a - b b) ) + il2by2 = .false. + loop_160: do j = je + nw, n + if( il2by2 ) then + il2by2 = .false. + cycle loop_160 + end if + na = 1 + bdiag( 1 ) = p( j, j ) + if( jbignum*xscale ) then + do jw = 0, nw - 1 + do jr = je, j - 1 + work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr ) + end do + end do + xmax = xmax*xscale + end if + ! compute dot products + ! j-1 + ! sum = sum conjg( a*s(k,j) - b*p(k,j) )*x(k) + ! k=je + ! to reduce the op count, this is done as + ! _ j-1 _ j-1 + ! a*conjg( sum s(k,j)*x(k) ) - b*conjg( sum p(k,j)*x(k) ) + ! k=je k=je + ! which may cause underflow problems if a or b are close + ! to underflow. (e.g., less than small.) + do jw = 1, nw + do ja = 1, na + sums( ja, jw ) = zero + sump( ja, jw ) = zero + do jr = je, j - 1 + sums( ja, jw ) = sums( ja, jw ) +s( jr, j+ja-1 )*work( ( jw+1 )*n+jr & + ) + sump( ja, jw ) = sump( ja, jw ) +p( jr, j+ja-1 )*work( ( jw+1 )*n+jr & + ) + end do + end do + end do + do ja = 1, na + if( ilcplx ) then + sum( ja, 1 ) = -acoef*sums( ja, 1 ) +bcoefr*sump( ja, 1 ) -bcoefi*sump( & + ja, 2 ) + sum( ja, 2 ) = -acoef*sums( ja, 2 ) +bcoefr*sump( ja, 2 ) +bcoefi*sump( & + ja, 1 ) + else + sum( ja, 1 ) = -acoef*sums( ja, 1 ) +bcoefr*sump( ja, 1 ) + end if + end do + ! t + ! solve ( a a - b b ) y = sum(,) + ! with scaling and perturbation of the denominator + call stdlib_slaln2( .true., na, nw, dmin, acoef, s( j, j ), lds,bdiag( 1 ), & + bdiag( 2 ), sum, 2, bcoefr,bcoefi, work( 2*n+j ), n, scale, temp,iinfo ) + + if( scalesafmin ) then + xscale = one / xmax + do jw = 0, nw - 1 + do jr = ibeg, n + vl( jr, ieig+jw ) = xscale*vl( jr, ieig+jw ) + end do + end do + end if + ieig = ieig + nw - 1 + end do loop_220 + end if + ! right eigenvectors + if( compr ) then + ieig = im + 1 + ! main loop over eigenvalues + ilcplx = .false. + loop_500: do je = n, 1, -1 + ! skip this iteration if (a) howmny='s' and select=.false., or + ! (b) this would be the second of a complex pair. + ! check for complex eigenvalue, so as to be sure of which + ! entry(-ies) of select to look at -- if complex, select(je) + ! or select(je-1). + ! if this is a complex pair, the 2-by-2 diagonal block + ! corresponding to the eigenvalue is in rows/columns je-1:je + if( ilcplx ) then + ilcplx = .false. + cycle loop_500 + end if + nw = 1 + if( je>1 ) then + if( s( je, je-1 )/=zero ) then + ilcplx = .true. + nw = 2 + end if + end if + if( ilall ) then + ilcomp = .true. + else if( ilcplx ) then + ilcomp = select( je ) .or. select( je-1 ) + else + ilcomp = select( je ) + end if + if( .not.ilcomp )cycle loop_500 + ! decide if (a) singular pencil, (b) real eigenvalue, or + ! (c) complex eigenvalue. + if( .not.ilcplx ) then + if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then + ! singular matrix pencil -- unit eigenvector + ieig = ieig - 1 + do jr = 1, n + vr( jr, ieig ) = zero + end do + vr( ieig, ieig ) = one + cycle loop_500 + end if + end if + ! clear vector + do jw = 0, nw - 1 + do jr = 1, n + work( ( jw+2 )*n+jr ) = zero + end do + end do + ! compute coefficients in ( a a - b b ) x = 0 + ! a is acoef + ! b is bcoefr + i*bcoefi + if( .not.ilcplx ) then + ! real eigenvalue + temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin & + ) + salfar = ( temp*s( je, je ) )*ascale + sbeta = ( temp*p( je, je ) )*bscale + acoef = sbeta*ascale + bcoefr = salfar*bscale + bcoefi = zero + ! scale to avoid underflow + scale = one + lsa = abs( sbeta )>=safmin .and. abs( acoef )=safmin .and. abs( bcoefr )=safmin )scale = ( safmin / ulp ) / & + acoefa + if( bcoefa*ulp=safmin )scale = max( scale, ( safmin / & + ulp ) / bcoefa ) + if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa ) + if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) ) + + if( scale/=one ) then + acoef = scale*acoef + acoefa = abs( acoef ) + bcoefr = scale*bcoefr + bcoefi = scale*bcoefi + bcoefa = abs( bcoefr ) + abs( bcoefi ) + end if + ! compute first two components of eigenvector + ! and contribution to sums + temp = acoef*s( je, je-1 ) + temp2r = acoef*s( je, je ) - bcoefr*p( je, je ) + temp2i = -bcoefi*p( je, je ) + if( abs( temp )>=abs( temp2r )+abs( temp2i ) ) then + work( 2*n+je ) = one + work( 3*n+je ) = zero + work( 2*n+je-1 ) = -temp2r / temp + work( 3*n+je-1 ) = -temp2i / temp + else + work( 2*n+je-1 ) = one + work( 3*n+je-1 ) = zero + temp = acoef*s( je-1, je ) + work( 2*n+je ) = ( bcoefr*p( je-1, je-1 )-acoef*s( je-1, je-1 ) ) / & + temp + work( 3*n+je ) = bcoefi*p( je-1, je-1 ) / temp + end if + xmax = max( abs( work( 2*n+je ) )+abs( work( 3*n+je ) ),abs( work( 2*n+je-1 ) & + )+abs( work( 3*n+je-1 ) ) ) + ! compute contribution from columns je and je-1 + ! of a and b to the sums. + creala = acoef*work( 2*n+je-1 ) + cimaga = acoef*work( 3*n+je-1 ) + crealb = bcoefr*work( 2*n+je-1 ) -bcoefi*work( 3*n+je-1 ) + cimagb = bcoefi*work( 2*n+je-1 ) +bcoefr*work( 3*n+je-1 ) + cre2a = acoef*work( 2*n+je ) + cim2a = acoef*work( 3*n+je ) + cre2b = bcoefr*work( 2*n+je ) - bcoefi*work( 3*n+je ) + cim2b = bcoefi*work( 2*n+je ) + bcoefr*work( 3*n+je ) + do jr = 1, je - 2 + work( 2*n+jr ) = -creala*s( jr, je-1 ) +crealb*p( jr, je-1 ) -cre2a*s( jr, & + je ) + cre2b*p( jr, je ) + work( 3*n+jr ) = -cimaga*s( jr, je-1 ) +cimagb*p( jr, je-1 ) -cim2a*s( jr, & + je ) + cim2b*p( jr, je ) + end do + end if + dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin ) + ! columnwise triangular solve of (a a - b b) x = 0 + il2by2 = .false. + loop_370: do j = je - nw, 1, -1 + ! if a 2-by-2 block, is in position j-1:j, wait until + ! next iteration to process it (when it will be j:j+1) + if( .not.il2by2 .and. j>1 ) then + if( s( j, j-1 )/=zero ) then + il2by2 = .true. + cycle loop_370 + end if + end if + bdiag( 1 ) = p( j, j ) + if( il2by2 ) then + na = 2 + bdiag( 2 ) = p( j+1, j+1 ) + else + na = 1 + end if + ! compute x(j) (and x(j+1), if 2-by-2 block) + call stdlib_slaln2( .false., na, nw, dmin, acoef, s( j, j ),lds, bdiag( 1 ), & + bdiag( 2 ), work( 2*n+j ),n, bcoefr, bcoefi, sum, 2, scale, temp,iinfo ) + + if( scale1 ) then + ! check whether scaling is necessary for sum. + xscale = one / max( one, xmax ) + temp = acoefa*work( j ) + bcoefa*work( n+j ) + if( il2by2 )temp = max( temp, acoefa*work( j+1 )+bcoefa*work( n+j+1 ) ) + + temp = max( temp, acoefa, bcoefa ) + if( temp>bignum*xscale ) then + do jw = 0, nw - 1 + do jr = 1, je + work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr ) + end do + end do + xmax = xmax*xscale + end if + ! compute the contributions of the off-diagonals of + ! column j (and j+1, if 2-by-2 block) of a and b to the + ! sums. + do ja = 1, na + if( ilcplx ) then + creala = acoef*work( 2*n+j+ja-1 ) + cimaga = acoef*work( 3*n+j+ja-1 ) + crealb = bcoefr*work( 2*n+j+ja-1 ) -bcoefi*work( 3*n+j+ja-1 ) + cimagb = bcoefi*work( 2*n+j+ja-1 ) +bcoefr*work( 3*n+j+ja-1 ) + do jr = 1, j - 1 + work( 2*n+jr ) = work( 2*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(& + jr, j+ja-1 ) + work( 3*n+jr ) = work( 3*n+jr ) -cimaga*s( jr, j+ja-1 ) +cimagb*p(& + jr, j+ja-1 ) + end do + else + creala = acoef*work( 2*n+j+ja-1 ) + crealb = bcoefr*work( 2*n+j+ja-1 ) + do jr = 1, j - 1 + work( 2*n+jr ) = work( 2*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(& + jr, j+ja-1 ) + end do + end if + end do + end if + il2by2 = .false. + end do loop_370 + ! copy eigenvector to vr, back transforming if + ! howmny='b'. + ieig = ieig - nw + if( ilback ) then + do jw = 0, nw - 1 + do jr = 1, n + work( ( jw+4 )*n+jr ) = work( ( jw+2 )*n+1 )*vr( jr, 1 ) + end do + ! a series of compiler directives to defeat + ! vectorization for the next loop + do jc = 2, je + do jr = 1, n + work( ( jw+4 )*n+jr ) = work( ( jw+4 )*n+jr ) +work( ( jw+2 )*n+jc )& + *vr( jr, jc ) + end do + end do + end do + do jw = 0, nw - 1 + do jr = 1, n + vr( jr, ieig+jw ) = work( ( jw+4 )*n+jr ) + end do + end do + iend = n + else + do jw = 0, nw - 1 + do jr = 1, n + vr( jr, ieig+jw ) = work( ( jw+2 )*n+jr ) + end do + end do + iend = je + end if + ! scale eigenvector + xmax = zero + if( ilcplx ) then + do j = 1, iend + xmax = max( xmax, abs( vr( j, ieig ) )+abs( vr( j, ieig+1 ) ) ) + end do + else + do j = 1, iend + xmax = max( xmax, abs( vr( j, ieig ) ) ) + end do + end if + if( xmax>safmin ) then + xscale = one / xmax + do jw = 0, nw - 1 + do jr = 1, iend + vr( jr, ieig+jw ) = xscale*vr( jr, ieig+jw ) + end do + end do + end if + end do loop_500 + end if + return + end subroutine stdlib_stgevc + + !> STGEX2: swaps adjacent diagonal blocks (A11, B11) and (A22, B22) + !> of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair + !> (A, B) by an orthogonal equivalence transformation. + !> (A, B) must be in generalized real Schur canonical form (as returned + !> by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 + !> diagonal blocks. B is upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T + !> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T + + pure subroutine stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, n1, n2, & + work, lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, lda, ldb, ldq, ldz, lwork, n, n1, n2 + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_scopy by calls to stdlib_slaset, or by do + ! loops. sven hammarling, 1/5/02. + ! Parameters + real(sp), parameter :: twenty = 2.0e+01_sp + integer(ilp), parameter :: ldst = 4 + logical(lk), parameter :: wands = .true. + + + + + ! Local Scalars + logical(lk) :: strong, weak + integer(ilp) :: i, idum, linfo, m + real(sp) :: bqra21, brqa21, ddum, dnorma, dnormb, dscale, dsum, eps, f, g, sa, sb, & + scale, smlnum, thresha, threshb + ! Local Arrays + integer(ilp) :: iwork(ldst) + real(sp) :: ai(2), ar(2), be(2), ir(ldst,ldst), ircop(ldst,ldst), li(ldst,ldst), licop(& + ldst,ldst), s(ldst,ldst), scpy(ldst,ldst), t(ldst,ldst), taul(ldst), taur(ldst), tcpy(& + ldst,ldst) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=1 .or. n1<=0 .or. n2<=0 )return + if( n1>n .or. ( j1+n1 )>n )return + m = n1 + n2 + if( lwork=sb ) then + call stdlib_slartg( s( 1, 1 ), s( 2, 1 ), li( 1, 1 ), li( 2, 1 ),ddum ) + else + call stdlib_slartg( t( 1, 1 ), t( 2, 1 ), li( 1, 1 ), li( 2, 1 ),ddum ) + end if + call stdlib_srot( 2, s( 1, 1 ), ldst, s( 2, 1 ), ldst, li( 1, 1 ),li( 2, 1 ) ) + + call stdlib_srot( 2, t( 1, 1 ), ldst, t( 2, 1 ), ldst, li( 1, 1 ),li( 2, 1 ) ) + + li( 2, 2 ) = li( 1, 1 ) + li( 1, 2 ) = -li( 2, 1 ) + ! weak stability test: |s21| <= o(eps f-norm((a))) + ! and |t21| <= o(eps f-norm((b))) + weak = abs( s( 2, 1 ) ) <= thresha .and.abs( t( 2, 1 ) ) <= threshb + if( .not.weak )go to 70 + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_slacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m ) + call stdlib_sgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + + call stdlib_sgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_slassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sa = dscale*sqrt( dsum ) + call stdlib_slacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m ) + call stdlib_sgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + + call stdlib_sgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_slassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sb = dscale*sqrt( dsum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 70 + end if + ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and + ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)). + call stdlib_srot( j1+1, a( 1, j1 ), 1, a( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 ) ) + + call stdlib_srot( j1+1, b( 1, j1 ), 1, b( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 ) ) + + call stdlib_srot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda,li( 1, 1 ), li( 2, 1 & + ) ) + call stdlib_srot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb,li( 1, 1 ), li( 2, 1 & + ) ) + ! set n1-by-n2 (2,1) - blocks to zero. + a( j1+1, j1 ) = zero + b( j1+1, j1 ) = zero + ! accumulate transformations into q and z if requested. + if( wantz )call stdlib_srot( n, z( 1, j1 ), 1, z( 1, j1+1 ), 1, ir( 1, 1 ),ir( 2, 1 & + ) ) + if( wantq )call stdlib_srot( n, q( 1, j1 ), 1, q( 1, j1+1 ), 1, li( 1, 1 ),li( 2, 1 & + ) ) + ! exit with info = 0 if swap was successfully performed. + return + else + ! case 2: swap 1-by-1 and 2-by-2 blocks, or 2-by-2 + ! and 2-by-2 blocks. + ! solve the generalized sylvester equation + ! s11 * r - l * s22 = scale * s12 + ! t11 * r - l * t22 = scale * t12 + ! for r and l. solutions in li and ir. + call stdlib_slacpy( 'FULL', n1, n2, t( 1, n1+1 ), ldst, li, ldst ) + call stdlib_slacpy( 'FULL', n1, n2, s( 1, n1+1 ), ldst,ir( n2+1, n1+1 ), ldst ) + + call stdlib_stgsy2( 'N', 0, n1, n2, s, ldst, s( n1+1, n1+1 ), ldst,ir( n2+1, n1+1 ),& + ldst, t, ldst, t( n1+1, n1+1 ),ldst, li, ldst, scale, dsum, dscale, iwork, idum,& + linfo ) + if( linfo/=0 )go to 70 + ! compute orthogonal matrix ql: + ! ql**t * li = [ tl ] + ! [ 0 ] + ! where + ! li = [ -l ] + ! [ scale * identity(n2) ] + do i = 1, n2 + call stdlib_sscal( n1, -one, li( 1, i ), 1 ) + li( n1+i, i ) = scale + end do + call stdlib_sgeqr2( m, n2, li, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_sorg2r( m, m, n2, li, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + ! compute orthogonal matrix rq: + ! ir * rq**t = [ 0 tr], + ! where ir = [ scale * identity(n1), r ] + do i = 1, n1 + ir( n2+i, i ) = scale + end do + call stdlib_sgerq2( n1, m, ir( n2+1, 1 ), ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_sorgr2( m, m, n1, ir, ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + ! perform the swapping tentatively: + call stdlib_sgemm( 'T', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + call stdlib_sgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, s,ldst ) + call stdlib_sgemm( 'T', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + call stdlib_sgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, t,ldst ) + call stdlib_slacpy( 'F', m, m, s, ldst, scpy, ldst ) + call stdlib_slacpy( 'F', m, m, t, ldst, tcpy, ldst ) + call stdlib_slacpy( 'F', m, m, ir, ldst, ircop, ldst ) + call stdlib_slacpy( 'F', m, m, li, ldst, licop, ldst ) + ! triangularize the b-part by an rq factorization. + ! apply transformation (from left) to a-part, giving s. + call stdlib_sgerq2( m, m, t, ldst, taur, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_sormr2( 'R', 'T', m, m, m, t, ldst, taur, s, ldst, work,linfo ) + if( linfo/=0 )go to 70 + call stdlib_sormr2( 'L', 'N', m, m, m, t, ldst, taur, ir, ldst, work,linfo ) + if( linfo/=0 )go to 70 + ! compute f-norm(s21) in brqa21. (t21 is 0.) + dscale = zero + dsum = one + do i = 1, n2 + call stdlib_slassq( n1, s( n2+1, i ), 1, dscale, dsum ) + end do + brqa21 = dscale*sqrt( dsum ) + ! triangularize the b-part by a qr factorization. + ! apply transformation (from right) to a-part, giving s. + call stdlib_sgeqr2( m, m, tcpy, ldst, taul, work, linfo ) + if( linfo/=0 )go to 70 + call stdlib_sorm2r( 'L', 'T', m, m, m, tcpy, ldst, taul, scpy, ldst,work, info ) + + call stdlib_sorm2r( 'R', 'N', m, m, m, tcpy, ldst, taul, licop, ldst,work, info ) + + if( linfo/=0 )go to 70 + ! compute f-norm(s21) in bqra21. (t21 is 0.) + dscale = zero + dsum = one + do i = 1, n2 + call stdlib_slassq( n1, scpy( n2+1, i ), 1, dscale, dsum ) + end do + bqra21 = dscale*sqrt( dsum ) + ! decide which method to use. + ! weak stability test: + ! f-norm(s21) <= o(eps * f-norm((s))) + if( bqra21<=brqa21 .and. bqra21<=thresha ) then + call stdlib_slacpy( 'F', m, m, scpy, ldst, s, ldst ) + call stdlib_slacpy( 'F', m, m, tcpy, ldst, t, ldst ) + call stdlib_slacpy( 'F', m, m, ircop, ldst, ir, ldst ) + call stdlib_slacpy( 'F', m, m, licop, ldst, li, ldst ) + else if( brqa21>=thresha ) then + go to 70 + end if + ! set lower triangle of b-part to zero + call stdlib_slaset( 'LOWER', m-1, m-1, zero, zero, t(2,1), ldst ) + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_slacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m ) + call stdlib_sgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m ) + + call stdlib_sgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_slassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sa = dscale*sqrt( dsum ) + call stdlib_slacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m ) + call stdlib_sgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m ) + + call stdlib_sgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),& + m ) + dscale = zero + dsum = one + call stdlib_slassq( m*m, work( m*m+1 ), 1, dscale, dsum ) + sb = dscale*sqrt( dsum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 70 + end if + ! if the swap is accepted ("weakly" and "strongly"), apply the + ! transformations and set n1-by-n2 (2,1)-block to zero. + call stdlib_slaset( 'FULL', n1, n2, zero, zero, s(n2+1,1), ldst ) + ! copy back m-by-m diagonal block starting at index j1 of (a, b) + call stdlib_slacpy( 'F', m, m, s, ldst, a( j1, j1 ), lda ) + call stdlib_slacpy( 'F', m, m, t, ldst, b( j1, j1 ), ldb ) + call stdlib_slaset( 'FULL', ldst, ldst, zero, zero, t, ldst ) + ! standardize existing 2-by-2 blocks. + call stdlib_slaset( 'FULL', m, m, zero, zero, work, m ) + work( 1 ) = one + t( 1, 1 ) = one + idum = lwork - m*m - 2 + if( n2>1 ) then + call stdlib_slagv2( a( j1, j1 ), lda, b( j1, j1 ), ldb, ar, ai, be,work( 1 ), & + work( 2 ), t( 1, 1 ), t( 2, 1 ) ) + work( m+1 ) = -work( 2 ) + work( m+2 ) = work( 1 ) + t( n2, n2 ) = t( 1, 1 ) + t( 1, 2 ) = -t( 2, 1 ) + end if + work( m*m ) = one + t( m, m ) = one + if( n1>1 ) then + call stdlib_slagv2( a( j1+n2, j1+n2 ), lda, b( j1+n2, j1+n2 ), ldb,taur, taul, & + work( m*m+1 ), work( n2*m+n2+1 ),work( n2*m+n2+2 ), t( n2+1, n2+1 ),t( m, m-1 ) ) + + work( m*m ) = work( n2*m+n2+1 ) + work( m*m-1 ) = -work( n2*m+n2+2 ) + t( m, m ) = t( n2+1, n2+1 ) + t( m-1, m ) = -t( m, m-1 ) + end if + call stdlib_sgemm( 'T', 'N', n2, n1, n2, one, work, m, a( j1, j1+n2 ),lda, zero, & + work( m*m+1 ), n2 ) + call stdlib_slacpy( 'FULL', n2, n1, work( m*m+1 ), n2, a( j1, j1+n2 ),lda ) + call stdlib_sgemm( 'T', 'N', n2, n1, n2, one, work, m, b( j1, j1+n2 ),ldb, zero, & + work( m*m+1 ), n2 ) + call stdlib_slacpy( 'FULL', n2, n1, work( m*m+1 ), n2, b( j1, j1+n2 ),ldb ) + call stdlib_sgemm( 'N', 'N', m, m, m, one, li, ldst, work, m, zero,work( m*m+1 ), m & + ) + call stdlib_slacpy( 'FULL', m, m, work( m*m+1 ), m, li, ldst ) + call stdlib_sgemm( 'N', 'N', n2, n1, n1, one, a( j1, j1+n2 ), lda,t( n2+1, n2+1 ), & + ldst, zero, work, n2 ) + call stdlib_slacpy( 'FULL', n2, n1, work, n2, a( j1, j1+n2 ), lda ) + call stdlib_sgemm( 'N', 'N', n2, n1, n1, one, b( j1, j1+n2 ), ldb,t( n2+1, n2+1 ), & + ldst, zero, work, n2 ) + call stdlib_slacpy( 'FULL', n2, n1, work, n2, b( j1, j1+n2 ), ldb ) + call stdlib_sgemm( 'T', 'N', m, m, m, one, ir, ldst, t, ldst, zero,work, m ) + call stdlib_slacpy( 'FULL', m, m, work, m, ir, ldst ) + ! accumulate transformations into q and z if requested. + if( wantq ) then + call stdlib_sgemm( 'N', 'N', n, m, m, one, q( 1, j1 ), ldq, li,ldst, zero, work, & + n ) + call stdlib_slacpy( 'FULL', n, m, work, n, q( 1, j1 ), ldq ) + end if + if( wantz ) then + call stdlib_sgemm( 'N', 'N', n, m, m, one, z( 1, j1 ), ldz, ir,ldst, zero, work, & + n ) + call stdlib_slacpy( 'FULL', n, m, work, n, z( 1, j1 ), ldz ) + end if + ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and + ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)). + i = j1 + m + if( i<=n ) then + call stdlib_sgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,a( j1, i ), lda, zero, & + work, m ) + call stdlib_slacpy( 'FULL', m, n-i+1, work, m, a( j1, i ), lda ) + call stdlib_sgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,b( j1, i ), ldb, zero, & + work, m ) + call stdlib_slacpy( 'FULL', m, n-i+1, work, m, b( j1, i ), ldb ) + end if + i = j1 - 1 + if( i>0 ) then + call stdlib_sgemm( 'N', 'N', i, m, m, one, a( 1, j1 ), lda, ir,ldst, zero, work, & + i ) + call stdlib_slacpy( 'FULL', i, m, work, i, a( 1, j1 ), lda ) + call stdlib_sgemm( 'N', 'N', i, m, m, one, b( 1, j1 ), ldb, ir,ldst, zero, work, & + i ) + call stdlib_slacpy( 'FULL', i, m, work, i, b( 1, j1 ), ldb ) + end if + ! exit with info = 0 if swap was successfully performed. + return + end if + ! exit with info = 1 if swap was rejected. + 70 continue + info = 1 + return + end subroutine stdlib_stgex2 + + !> STGEXC: reorders the generalized real Schur decomposition of a real + !> matrix pair (A,B) using an orthogonal equivalence transformation + !> (A, B) = Q * (A, B) * Z**T, + !> so that the diagonal block of (A, B) with row index IFST is moved + !> to row ILST. + !> (A, B) must be in generalized real Schur canonical form (as returned + !> by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 + !> diagonal blocks. B is upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T + !> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T + + pure subroutine stdlib_stgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(inout) :: ifst, ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldq, ldz, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: here, lwmin, nbf, nbl, nbnext + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input arguments. + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -3 + else if( ldan ) then + info = -12 + else if( ilst<1 .or. ilst>n ) then + info = -13 + end if + if( info==0 ) then + if( n<=1 ) then + lwmin = 1 + else + lwmin = 4*n + 16 + end if + work(1) = lwmin + if (lwork1 ) then + if( a( ifst, ifst-1 )/=zero )ifst = ifst - 1 + end if + nbf = 1 + if( ifst1 ) then + if( a( ilst, ilst-1 )/=zero )ilst = ilst - 1 + end if + nbl = 1 + if( ilst=3 ) then + if( a( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, & + nbnext, nbf, work, lwork,info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - nbnext + ! test if 2-by-2 block breaks into two 1-by-1 blocks. + if( nbf==2 ) then + if( a( here+1, here )==zero )nbf = 3 + end if + else + ! current block consists of two 1-by-1 blocks, each of which + ! must be swapped individually. + nbnext = 1 + if( here>=3 ) then + if( a( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, & + nbnext, 1, work, lwork,info ) + if( info/=0 ) then + ilst = here + return + end if + if( nbnext==1 ) then + ! swap two 1-by-1 blocks. + call stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, & + nbnext, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + else + ! recompute nbnext in case of 2-by-2 split. + if( a( here, here-1 )==zero )nbnext = 1 + if( nbnext==2 ) then + ! 2-by-2 block did not split. + call stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here-1,& + 2, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 2 + else + ! 2-by-2 block did split. + call stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, & + 1, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + call stdlib_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, & + 1, 1, work, lwork, info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - 1 + end if + end if + end if + if( here>ilst )go to 20 + end if + ilst = here + work( 1 ) = lwmin + return + end subroutine stdlib_stgexc + + !> STGSEN: reorders the generalized real Schur decomposition of a real + !> matrix pair (A, B) (in terms of an orthonormal equivalence trans- + !> formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the upper quasi-triangular + !> matrix A and the upper triangular B. The leading columns of Q and + !> Z form orthonormal bases of the corresponding left and right eigen- + !> spaces (deflating subspaces). (A, B) must be in generalized real + !> Schur canonical form (as returned by SGGES), i.e. A is block upper + !> triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper + !> triangular. + !> STGSEN also computes the generalized eigenvalues + !> w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, STGSEN computes the estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + + pure subroutine stdlib_stgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, alphai, & + beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(out) :: pl, pr + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), dif(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, pair, swap, wantd, wantd1, wantd2, wantp + integer(ilp) :: i, ierr, ijb, k, kase, kk, ks, liwmin, lwmin, mn2, n1, n2 + real(sp) :: dscale, dsum, eps, rdscal, smlnum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! decode and test the input parameters + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ijob<0 .or. ijob>5 ) then + info = -1 + else if( n<0 ) then + info = -5 + else if( lda=4 + wantd1 = ijob==2 .or. ijob==4 + wantd2 = ijob==3 .or. ijob==5 + wantd = wantd1 .or. wantd2 + ! set m to the dimension of the specified pair of deflating + ! subspaces. + m = 0 + pair = .false. + if( .not.lquery .or. ijob/=0 ) then + do k = 1, n + if( pair ) then + pair = .false. + else + if( k0 ) then + ! swap is rejected: exit. + info = 1 + if( wantp ) then + pl = zero + pr = zero + end if + if( wantd ) then + dif( 1 ) = zero + dif( 2 ) = zero + end if + go to 60 + end if + if( pair )ks = ks + 1 + end if + end if + end do loop_30 + if( wantp ) then + ! solve generalized sylvester equation for r and l + ! and compute pl and pr. + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + call stdlib_slacpy( 'FULL', n1, n2, a( 1, i ), lda, work, n1 ) + call stdlib_slacpy( 'FULL', n1, n2, b( 1, i ), ldb, work( n1*n2+1 ),n1 ) + call stdlib_stgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,& + i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-2*n1*n2, & + iwork, ierr ) + ! estimate the reciprocal of norms of "projections" onto left + ! and right eigenspaces. + rdscal = zero + dsum = one + call stdlib_slassq( n1*n2, work, 1, rdscal, dsum ) + pl = rdscal*sqrt( dsum ) + if( pl==zero ) then + pl = one + else + pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) ) + end if + rdscal = zero + dsum = one + call stdlib_slassq( n1*n2, work( n1*n2+1 ), 1, rdscal, dsum ) + pr = rdscal*sqrt( dsum ) + if( pr==zero ) then + pr = one + else + pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) ) + end if + end if + if( wantd ) then + ! compute estimates of difu and difl. + if( wantd1 ) then + n1 = m + n2 = n - m + i = n1 + 1 + ijb = idifjb + ! frobenius norm-based difu-estimate. + call stdlib_stgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(& + i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1 ), work( 2*n1*n2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + ! frobenius norm-based difl-estimate. + call stdlib_stgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),& + ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2 ), work( 2*n1*n2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + else + ! compute 1-norm-based estimates of difu and difl using + ! reversed communication with stdlib_slacn2. in each step a + ! generalized sylvester equation or a transposed variant + ! is solved. + kase = 0 + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + mn2 = 2*n1*n2 + ! 1-norm-based estimate of difu. + 40 continue + call stdlib_slacn2( mn2, work( mn2+1 ), work, iwork, dif( 1 ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation. + call stdlib_stgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_stgsyl( 'T', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 40 + end if + dif( 1 ) = dscale / dif( 1 ) + ! 1-norm-based estimate of difl. + 50 continue + call stdlib_slacn2( mn2, work( mn2+1 ), work, iwork, dif( 2 ),kase, isave ) + + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation. + call stdlib_stgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_stgsyl( 'T', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( 2*n1*n2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 50 + end if + dif( 2 ) = dscale / dif( 2 ) + end if + end if + 60 continue + ! compute generalized eigenvalues of reordered pair (a, b) and + ! normalize the generalized schur form. + pair = .false. + loop_70: do k = 1, n + if( pair ) then + pair = .false. + else + if( k STGSJA: computes the generalized singular value decomposition (GSVD) + !> of two real upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine SGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), + !> where U, V and Q are orthogonal matrices. + !> R is a nonsingular upper triangular matrix, and D1 and D2 are + !> ``diagonal'' matrices, which are of the following structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the orthogonal transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + + pure subroutine stdlib_stgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobq, jobu, jobv + integer(ilp), intent(out) :: info, ncycle + integer(ilp), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p + real(sp), intent(in) :: tola, tolb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) + real(sp), intent(out) :: alpha(*), beta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + real(sp), parameter :: hugenum = huge(zero) + + + ! Local Scalars + logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv + integer(ilp) :: i, j, kcycle + real(sp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, & + ssmin + ! Intrinsic Functions + intrinsic :: abs,max,min,huge + ! Executable Statements + ! decode and test the input parameters + initu = stdlib_lsame( jobu, 'I' ) + wantu = initu .or. stdlib_lsame( jobu, 'U' ) + initv = stdlib_lsame( jobv, 'I' ) + wantv = initv .or. stdlib_lsame( jobv, 'V' ) + initq = stdlib_lsame( jobq, 'I' ) + wantq = initq .or. stdlib_lsame( jobq, 'Q' ) + info = 0 + if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -1 + else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -2 + else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( p<0 ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda=-hugenum) ) then + ! change sign if necessary + if( gamma=beta( k+i ) ) then + call stdlib_sscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) + else + call stdlib_sscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) + call stdlib_scopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + else + alpha( k+i ) = zero + beta( k+i ) = one + call stdlib_scopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + end do + ! post-assignment + do i = m + 1, k + l + alpha( i ) = zero + beta( i ) = one + end do + if( k+l STGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B) in + !> generalized real Schur canonical form (or of any matrix pair + !> (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where + !> Z**T denotes the transpose of Z. + !> (A, B) must be in generalized real Schur form (as returned by SGGES), + !> i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal + !> blocks. B is upper triangular. + + pure subroutine stdlib_stgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, & + dif, mm, m, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*) + real(sp), intent(out) :: dif(*), s(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: difdri = 3 + + + ! Local Scalars + logical(lk) :: lquery, pair, somcon, wantbh, wantdf, wants + integer(ilp) :: i, ierr, ifst, ilst, iz, k, ks, lwmin, n1, n2 + real(sp) :: alphai, alphar, alprqt, beta, c1, c2, cond, eps, lnrm, rnrm, root1, root2, & + scale, smlnum, tmpii, tmpir, tmpri, tmprr, uhav, uhavi, uhbv, uhbvi + ! Local Arrays + real(sp) :: dummy(1), dummy1(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantdf = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.wants .and. .not.wantdf ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lda0 ) then + ! ill-conditioned problem - swap rejected. + dif( ks ) = zero + else + ! reordering successful, solve generalized sylvester + ! equation for r and l, + ! a22 * r - l * a11 = a12 + ! b22 * r - l * b11 = b12, + ! and compute estimate of difl((a11,b11), (a22, b22)). + n1 = 1 + if( work( 2 )/=zero )n1 = 2 + n2 = n - n1 + if( n2==0 ) then + dif( ks ) = cond + else + i = n*n + 1 + iz = 2*n*n + 1 + call stdlib_stgsyl( 'N', difdri, n2, n1, work( n*n1+n1+1 ),n, work, n, & + work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, & + dif( ks ),work( iz+1 ), lwork-2*n*n, iwork, ierr ) + if( pair )dif( ks ) = min( max( one, alprqt )*dif( ks ),cond ) + end if + end if + if( pair )dif( ks+1 ) = dif( ks ) + end if + if( pair )ks = ks + 1 + end do loop_20 + work( 1 ) = lwmin + return + end subroutine stdlib_stgsna + + !> STPLQT: computes a blocked LQ factorization of a real + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_stplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, mb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, nb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( mb<1 .or. (mb>m .and. m>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_stplqt2( ib, nb, lb, a(i,i), lda, b( i, 1 ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(i+ib:m,:) from the right + if( i+ib<=m ) then + call stdlib_stprfb( 'R', 'N', 'F', 'R', m-i-ib+1, nb, ib, lb,b( i, 1 ), ldb, t( & + 1, i ), ldt,a( i+ib, i ), lda, b( i+ib, 1 ), ldb,work, m-i-ib+1) + end if + end do + return + end subroutine stdlib_stplqt + + !> STPQRT: computes a blocked QR factorization of a real + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_stpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, nb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, mb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( nb<1 .or. (nb>n .and. n>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_stpqrt2( mb, ib, lb, a(i,i), lda, b( 1, i ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h^h to b(:,i+ib:n) from the left + if( i+ib<=n ) then + call stdlib_stprfb( 'L', 'T', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1, i ), ldb, t( & + 1, i ), ldt,a( i, i+ib ), lda, b( 1, i+ib ), ldb,work, ib ) + end if + end do + return + end subroutine stdlib_stpqrt + + !> STREVC: computes some or all of the right and/or left eigenvectors of + !> a real upper quasi-triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a real general matrix: A = Q*T*Q**T, as computed by SHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal blocks of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the orthogonal factor that reduces a matrix + !> A to Schur form T, then Q*X and Q*Y are the matrices of right and + !> left eigenvectors of A. + + pure subroutine stdlib_strevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, over, pair, rightv, somev + integer(ilp) :: i, ierr, ii, ip, is, j, j1, j2, jnxt, k, ki, n2 + real(sp) :: beta, bignum, emax, ovfl, rec, remax, scale, smin, smlnum, ulp, unfl, & + vcrit, vmax, wi, wr, xnorm + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Local Arrays + real(sp) :: x(2,2) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldtjnxt )cycle loop_60 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_slaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_sscal( ki, scale, work( 1+n ), 1 ) + work( j+n ) = x( 1, 1 ) + ! update right-hand side + call stdlib_saxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_slaln2( .false., 2, 1, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+n ), n, wr, zero, x, 2,scale, xnorm, ierr ) + ! scale x(1,1) and x(2,1) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 2, 1 ) = x( 2, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_sscal( ki, scale, work( 1+n ), 1 ) + work( j-1+n ) = x( 1, 1 ) + work( j+n ) = x( 2, 1 ) + ! update right-hand side + call stdlib_saxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+n ), 1 ) + + call stdlib_saxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + end if + end do loop_60 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_scopy( ki, work( 1+n ), 1, vr( 1, is ), 1 ) + ii = stdlib_isamax( ki, vr( 1, is ), 1 ) + remax = one / abs( vr( ii, is ) ) + call stdlib_sscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = zero + end do + else + if( ki>1 )call stdlib_sgemv( 'N', n, ki-1, one, vr, ldvr,work( 1+n ), 1, & + work( ki+n ),vr( 1, ki ), 1 ) + ii = stdlib_isamax( n, vr( 1, ki ), 1 ) + remax = one / abs( vr( ii, ki ) ) + call stdlib_sscal( n, remax, vr( 1, ki ), 1 ) + end if + else + ! complex right eigenvector. + ! initial solve + ! [ (t(ki-1,ki-1) t(ki-1,ki) ) - (wr + i* wi)]*x = 0. + ! [ (t(ki,ki-1) t(ki,ki) ) ] + if( abs( t( ki-1, ki ) )>=abs( t( ki, ki-1 ) ) ) then + work( ki-1+n ) = one + work( ki+n2 ) = wi / t( ki-1, ki ) + else + work( ki-1+n ) = -wi / t( ki, ki-1 ) + work( ki+n2 ) = one + end if + work( ki+n ) = zero + work( ki-1+n2 ) = zero + ! form right-hand side + do k = 1, ki - 2 + work( k+n ) = -work( ki-1+n )*t( k, ki-1 ) + work( k+n2 ) = -work( ki+n2 )*t( k, ki ) + end do + ! solve upper quasi-triangular system: + ! (t(1:ki-2,1:ki-2) - (wr+i*wi))*x = scale*(work+i*work2) + jnxt = ki - 2 + loop_90: do j = ki - 2, 1, -1 + if( j>jnxt )cycle loop_90 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_slaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr, wi,x, 2, scale, xnorm, ierr ) + ! scale x(1,1) and x(1,2) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 1, 2 ) = x( 1, 2 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( ki, scale, work( 1+n ), 1 ) + call stdlib_sscal( ki, scale, work( 1+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + ! update the right-hand side + call stdlib_saxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + call stdlib_saxpy( j-1, -x( 1, 2 ), t( 1, j ), 1,work( 1+n2 ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_slaln2( .false., 2, 2, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+n ), n, wr, wi, x, 2, scale,xnorm, ierr ) + ! scale x to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + rec = one / xnorm + x( 1, 1 ) = x( 1, 1 )*rec + x( 1, 2 ) = x( 1, 2 )*rec + x( 2, 1 ) = x( 2, 1 )*rec + x( 2, 2 ) = x( 2, 2 )*rec + scale = scale*rec + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( ki, scale, work( 1+n ), 1 ) + call stdlib_sscal( ki, scale, work( 1+n2 ), 1 ) + end if + work( j-1+n ) = x( 1, 1 ) + work( j+n ) = x( 2, 1 ) + work( j-1+n2 ) = x( 1, 2 ) + work( j+n2 ) = x( 2, 2 ) + ! update the right-hand side + call stdlib_saxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+n ), 1 ) + + call stdlib_saxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+n ), 1 ) + + call stdlib_saxpy( j-2, -x( 1, 2 ), t( 1, j-1 ), 1,work( 1+n2 ), 1 ) + + call stdlib_saxpy( j-2, -x( 2, 2 ), t( 1, j ), 1,work( 1+n2 ), 1 ) + + end if + end do loop_90 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_scopy( ki, work( 1+n ), 1, vr( 1, is-1 ), 1 ) + call stdlib_scopy( ki, work( 1+n2 ), 1, vr( 1, is ), 1 ) + emax = zero + do k = 1, ki + emax = max( emax, abs( vr( k, is-1 ) )+abs( vr( k, is ) ) ) + end do + remax = one / emax + call stdlib_sscal( ki, remax, vr( 1, is-1 ), 1 ) + call stdlib_sscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is-1 ) = zero + vr( k, is ) = zero + end do + else + if( ki>2 ) then + call stdlib_sgemv( 'N', n, ki-2, one, vr, ldvr,work( 1+n ), 1, work( ki-& + 1+n ),vr( 1, ki-1 ), 1 ) + call stdlib_sgemv( 'N', n, ki-2, one, vr, ldvr,work( 1+n2 ), 1, work( & + ki+n2 ),vr( 1, ki ), 1 ) + else + call stdlib_sscal( n, work( ki-1+n ), vr( 1, ki-1 ), 1 ) + call stdlib_sscal( n, work( ki+n2 ), vr( 1, ki ), 1 ) + end if + emax = zero + do k = 1, n + emax = max( emax, abs( vr( k, ki-1 ) )+abs( vr( k, ki ) ) ) + end do + remax = one / emax + call stdlib_sscal( n, remax, vr( 1, ki-1 ), 1 ) + call stdlib_sscal( n, remax, vr( 1, ki ), 1 ) + end if + end if + is = is - 1 + if( ip/=0 )is = is - 1 + 130 continue + if( ip==1 )ip = 0 + if( ip==-1 )ip = 1 + end do loop_140 + end if + if( leftv ) then + ! compute left eigenvectors. + ip = 0 + is = 1 + loop_260: do ki = 1, n + if( ip==-1 )go to 250 + if( ki==n )go to 150 + if( t( ki+1, ki )==zero )go to 150 + ip = 1 + 150 continue + if( somev ) then + if( .not.select( ki ) )go to 250 + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki+1 ) ) )*sqrt( abs( t( ki+1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! real left eigenvector. + work( ki+n ) = one + ! form right-hand side + do k = ki + 1, n + work( k+n ) = -t( ki, k ) + end do + ! solve the quasi-triangular system: + ! (t(ki+1:n,ki+1:n) - wr)**t*x = scale*work + vmax = one + vcrit = bignum + jnxt = ki + 1 + loop_170: do j = ki + 1, n + if( jvcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work( ki+n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_sdot( j-ki-1, t( ki+1, j ), 1,work( & + ki+1+n ), 1 ) + ! solve (t(j,j)-wr)**t*x = work + call stdlib_slaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_sscal( n-ki+1, scale, work( ki+n ), 1 ) + + work( j+n ) = x( 1, 1 ) + vmax = max( abs( work( j+n ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work( ki+n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_sdot( j-ki-1, t( ki+1, j ), 1,work( & + ki+1+n ), 1 ) + work( j+1+n ) = work( j+1+n ) -stdlib_sdot( j-ki-1, t( ki+1, j+1 ), 1,& + work( ki+1+n ), 1 ) + ! solve + ! [t(j,j)-wr t(j,j+1) ]**t* x = scale*( work1 ) + ! [t(j+1,j) t(j+1,j+1)-wr] ( work2 ) + call stdlib_slaln2( .true., 2, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_sscal( n-ki+1, scale, work( ki+n ), 1 ) + + work( j+n ) = x( 1, 1 ) + work( j+1+n ) = x( 2, 1 ) + vmax = max( abs( work( j+n ) ),abs( work( j+1+n ) ), vmax ) + vcrit = bignum / vmax + end if + end do loop_170 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + call stdlib_scopy( n-ki+1, work( ki+n ), 1, vl( ki, is ), 1 ) + ii = stdlib_isamax( n-ki+1, vl( ki, is ), 1 ) + ki - 1 + remax = one / abs( vl( ii, is ) ) + call stdlib_sscal( n-ki+1, remax, vl( ki, is ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + end do + else + if( ki=abs( t( ki+1, ki ) ) ) then + work( ki+n ) = wi / t( ki, ki+1 ) + work( ki+1+n2 ) = one + else + work( ki+n ) = one + work( ki+1+n2 ) = -wi / t( ki+1, ki ) + end if + work( ki+1+n ) = zero + work( ki+n2 ) = zero + ! form right-hand side + do k = ki + 2, n + work( k+n ) = -work( ki+n )*t( ki, k ) + work( k+n2 ) = -work( ki+1+n2 )*t( ki+1, k ) + end do + ! solve complex quasi-triangular system: + ! ( t(ki+2,n:ki+2,n) - (wr-i*wi) )*x = work1+i*work2 + vmax = one + vcrit = bignum + jnxt = ki + 2 + loop_200: do j = ki + 2, n + if( jvcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work( ki+n ), 1 ) + call stdlib_sscal( n-ki+1, rec, work( ki+n2 ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_sdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n ), 1 ) + work( j+n2 ) = work( j+n2 ) -stdlib_sdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n2 ), 1 ) + ! solve (t(j,j)-(wr-i*wi))*(x11+i*x12)= wk+i*wk2 + call stdlib_slaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( n-ki+1, scale, work( ki+n ), 1 ) + call stdlib_sscal( n-ki+1, scale, work( ki+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + vmax = max( abs( work( j+n ) ),abs( work( j+n2 ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side elements. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work( ki+n ), 1 ) + call stdlib_sscal( n-ki+1, rec, work( ki+n2 ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+n ) = work( j+n ) -stdlib_sdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n ), 1 ) + work( j+n2 ) = work( j+n2 ) -stdlib_sdot( j-ki-2, t( ki+2, j ), 1,work( & + ki+2+n2 ), 1 ) + work( j+1+n ) = work( j+1+n ) -stdlib_sdot( j-ki-2, t( ki+2, j+1 ), 1,& + work( ki+2+n ), 1 ) + work( j+1+n2 ) = work( j+1+n2 ) -stdlib_sdot( j-ki-2, t( ki+2, j+1 ), 1,& + work( ki+2+n2 ), 1 ) + ! solve 2-by-2 complex linear equation + ! ([t(j,j) t(j,j+1) ]**t-(wr-i*wi)*i)*x = scale*b + ! ([t(j+1,j) t(j+1,j+1)] ) + call stdlib_slaln2( .true., 2, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( n-ki+1, scale, work( ki+n ), 1 ) + call stdlib_sscal( n-ki+1, scale, work( ki+n2 ), 1 ) + end if + work( j+n ) = x( 1, 1 ) + work( j+n2 ) = x( 1, 2 ) + work( j+1+n ) = x( 2, 1 ) + work( j+1+n2 ) = x( 2, 2 ) + vmax = max( abs( x( 1, 1 ) ), abs( x( 1, 2 ) ),abs( x( 2, 1 ) ), abs( x(& + 2, 2 ) ), vmax ) + vcrit = bignum / vmax + end if + end do loop_200 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + call stdlib_scopy( n-ki+1, work( ki+n ), 1, vl( ki, is ), 1 ) + call stdlib_scopy( n-ki+1, work( ki+n2 ), 1, vl( ki, is+1 ),1 ) + emax = zero + do k = ki, n + emax = max( emax, abs( vl( k, is ) )+abs( vl( k, is+1 ) ) ) + end do + remax = one / emax + call stdlib_sscal( n-ki+1, remax, vl( ki, is ), 1 ) + call stdlib_sscal( n-ki+1, remax, vl( ki, is+1 ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + vl( k, is+1 ) = zero + end do + else + if( ki STREVC3: computes some or all of the right and/or left eigenvectors of + !> a real upper quasi-triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a real general matrix: A = Q*T*Q**T, as computed by SHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**T)*T = w*(y**T) + !> where y**T denotes the transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal blocks of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the orthogonal factor that reduces a matrix + !> A to Schur form T, then Q*X and Q*Y are the matrices of right and + !> left eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + + pure subroutine stdlib_strevc3( side, howmny, select, n, t, ldt, vl, ldvl,vr, ldvr, mm, m, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + real(sp), intent(in) :: t(ldt,*) + real(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmin = 8 + integer(ilp), parameter :: nbmax = 128 + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, lquery, over, pair, rightv, somev + integer(ilp) :: i, ierr, ii, ip, is, j, j1, j2, jnxt, k, ki, iv, maxwrk, nb, & + ki2 + real(sp) :: beta, bignum, emax, ovfl, rec, remax, scale, smin, smlnum, ulp, unfl, & + vcrit, vmax, wi, wr, xnorm + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Local Arrays + real(sp) :: x(2,2) + integer(ilp) :: iscomplex(nbmax) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + info = 0 + nb = stdlib_ilaenv( 1, 'STREVC', side // howmny, n, -1, -1, -1 ) + maxwrk = n + 2*n*nb + work(1) = maxwrk + lquery = ( lwork==-1 ) + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt= n + 2*n*nbmin ) then + nb = (lwork - n) / (2*n) + nb = min( nb, nbmax ) + call stdlib_slaset( 'F', n, 1+2*nb, zero, zero, work, n ) + else + nb = 1 + end if + ! set the constants to control overflow. + unfl = stdlib_slamch( 'SAFE MINIMUM' ) + ovfl = one / unfl + call stdlib_slabad( unfl, ovfl ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = unfl*( n / ulp ) + bignum = ( one-ulp ) / smlnum + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + work( 1 ) = zero + do j = 2, n + work( j ) = zero + do i = 1, j - 1 + work( j ) = work( j ) + abs( t( i, j ) ) + end do + end do + ! index ip is used to specify the real or complex eigenvalue: + ! ip = 0, real eigenvalue, + ! 1, first of conjugate complex pair: (wr,wi) + ! -1, second of conjugate complex pair: (wr,wi) + ! iscomplex array stores ip for each column in current block. + if( rightv ) then + ! ============================================================ + ! compute right eigenvectors. + ! iv is index of column in current block. + ! for complex right vector, uses iv-1 for real part and iv for complex part. + ! non-blocked version always uses iv=2; + ! blocked version starts with iv=nb, goes down to 1 or 2. + ! (note the "0-th" column is used for 1-norms computed above.) + iv = 2 + if( nb>2 ) then + iv = nb + end if + ip = 0 + is = m + loop_140: do ki = n, 1, -1 + if( ip==-1 ) then + ! previous iteration (ki+1) was second of conjugate pair, + ! so this ki is first of conjugate pair; skip to end of loop + ip = 1 + cycle loop_140 + else if( ki==1 ) then + ! last column, so this ki must be real eigenvalue + ip = 0 + else if( t( ki, ki-1 )==zero ) then + ! zero on sub-diagonal, so this ki is real eigenvalue + ip = 0 + else + ! non-zero on sub-diagonal, so this ki is second of conjugate pair + ip = -1 + end if + if( somev ) then + if( ip==0 ) then + if( .not.select( ki ) )cycle loop_140 + else + if( .not.select( ki-1 ) )cycle loop_140 + end if + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki-1 ) ) )*sqrt( abs( t( ki-1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! -------------------------------------------------------- + ! real right eigenvector + work( ki + iv*n ) = one + ! form right-hand side. + do k = 1, ki - 1 + work( k + iv*n ) = -t( k, ki ) + end do + ! solve upper quasi-triangular system: + ! [ t(1:ki-1,1:ki-1) - wr ]*x = scale*work. + jnxt = ki - 1 + loop_60: do j = ki - 1, 1, -1 + if( j>jnxt )cycle loop_60 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_slaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_sscal( ki, scale, work( 1+iv*n ), 1 ) + + work( j+iv*n ) = x( 1, 1 ) + ! update right-hand side + call stdlib_saxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+iv*n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_slaln2( .false., 2, 1, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+iv*n ), n, wr, zero, x, 2,scale, xnorm, ierr ) + ! scale x(1,1) and x(2,1) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 2, 1 ) = x( 2, 1 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one )call stdlib_sscal( ki, scale, work( 1+iv*n ), 1 ) + + work( j-1+iv*n ) = x( 1, 1 ) + work( j +iv*n ) = x( 2, 1 ) + ! update right-hand side + call stdlib_saxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+iv*n ), 1 ) + + call stdlib_saxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+iv*n ), 1 ) + + end if + end do loop_60 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_scopy( ki, work( 1 + iv*n ), 1, vr( 1, is ), 1 ) + ii = stdlib_isamax( ki, vr( 1, is ), 1 ) + remax = one / abs( vr( ii, is ) ) + call stdlib_sscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>1 )call stdlib_sgemv( 'N', n, ki-1, one, vr, ldvr,work( 1 + iv*n ), & + 1, work( ki + iv*n ),vr( 1, ki ), 1 ) + ii = stdlib_isamax( n, vr( 1, ki ), 1 ) + remax = one / abs( vr( ii, ki ) ) + call stdlib_sscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + iv*n ) = zero + end do + iscomplex( iv ) = ip + ! back-transform and normalization is done below + end if + else + ! -------------------------------------------------------- + ! complex right eigenvector. + ! initial solve + ! [ ( t(ki-1,ki-1) t(ki-1,ki) ) - (wr + i*wi) ]*x = 0. + ! [ ( t(ki, ki-1) t(ki, ki) ) ] + if( abs( t( ki-1, ki ) )>=abs( t( ki, ki-1 ) ) ) then + work( ki-1 + (iv-1)*n ) = one + work( ki + (iv )*n ) = wi / t( ki-1, ki ) + else + work( ki-1 + (iv-1)*n ) = -wi / t( ki, ki-1 ) + work( ki + (iv )*n ) = one + end if + work( ki + (iv-1)*n ) = zero + work( ki-1 + (iv )*n ) = zero + ! form right-hand side. + do k = 1, ki - 2 + work( k+(iv-1)*n ) = -work( ki-1+(iv-1)*n )*t(k,ki-1) + work( k+(iv )*n ) = -work( ki +(iv )*n )*t(k,ki ) + end do + ! solve upper quasi-triangular system: + ! [ t(1:ki-2,1:ki-2) - (wr+i*wi) ]*x = scale*(work+i*work2) + jnxt = ki - 2 + loop_90: do j = ki - 2, 1, -1 + if( j>jnxt )cycle loop_90 + j1 = j + j2 = j + jnxt = j - 1 + if( j>1 ) then + if( t( j, j-1 )/=zero ) then + j1 = j - 1 + jnxt = j - 2 + end if + end if + if( j1==j2 ) then + ! 1-by-1 diagonal block + call stdlib_slaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+(iv-1)*n ), n,wr, wi, x, 2, scale, xnorm, ierr ) + ! scale x(1,1) and x(1,2) to avoid overflow when + ! updating the right-hand side. + if( xnorm>one ) then + if( work( j )>bignum / xnorm ) then + x( 1, 1 ) = x( 1, 1 ) / xnorm + x( 1, 2 ) = x( 1, 2 ) / xnorm + scale = scale / xnorm + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( ki, scale, work( 1+(iv-1)*n ), 1 ) + call stdlib_sscal( ki, scale, work( 1+(iv )*n ), 1 ) + end if + work( j+(iv-1)*n ) = x( 1, 1 ) + work( j+(iv )*n ) = x( 1, 2 ) + ! update the right-hand side + call stdlib_saxpy( j-1, -x( 1, 1 ), t( 1, j ), 1,work( 1+(iv-1)*n ), 1 ) + + call stdlib_saxpy( j-1, -x( 1, 2 ), t( 1, j ), 1,work( 1+(iv )*n ), 1 ) + + else + ! 2-by-2 diagonal block + call stdlib_slaln2( .false., 2, 2, smin, one,t( j-1, j-1 ), ldt, one, & + one,work( j-1+(iv-1)*n ), n, wr, wi, x, 2,scale, xnorm, ierr ) + ! scale x to avoid overflow when updating + ! the right-hand side. + if( xnorm>one ) then + beta = max( work( j-1 ), work( j ) ) + if( beta>bignum / xnorm ) then + rec = one / xnorm + x( 1, 1 ) = x( 1, 1 )*rec + x( 1, 2 ) = x( 1, 2 )*rec + x( 2, 1 ) = x( 2, 1 )*rec + x( 2, 2 ) = x( 2, 2 )*rec + scale = scale*rec + end if + end if + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( ki, scale, work( 1+(iv-1)*n ), 1 ) + call stdlib_sscal( ki, scale, work( 1+(iv )*n ), 1 ) + end if + work( j-1+(iv-1)*n ) = x( 1, 1 ) + work( j +(iv-1)*n ) = x( 2, 1 ) + work( j-1+(iv )*n ) = x( 1, 2 ) + work( j +(iv )*n ) = x( 2, 2 ) + ! update the right-hand side + call stdlib_saxpy( j-2, -x( 1, 1 ), t( 1, j-1 ), 1,work( 1+(iv-1)*n ),& + 1 ) + call stdlib_saxpy( j-2, -x( 2, 1 ), t( 1, j ), 1,work( 1+(iv-1)*n ), & + 1 ) + call stdlib_saxpy( j-2, -x( 1, 2 ), t( 1, j-1 ), 1,work( 1+(iv )*n ), & + 1 ) + call stdlib_saxpy( j-2, -x( 2, 2 ), t( 1, j ), 1,work( 1+(iv )*n ), 1 ) + + end if + end do loop_90 + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_scopy( ki, work( 1+(iv-1)*n ), 1, vr(1,is-1), 1 ) + call stdlib_scopy( ki, work( 1+(iv )*n ), 1, vr(1,is ), 1 ) + emax = zero + do k = 1, ki + emax = max( emax, abs( vr( k, is-1 ) )+abs( vr( k, is ) ) ) + end do + remax = one / emax + call stdlib_sscal( ki, remax, vr( 1, is-1 ), 1 ) + call stdlib_sscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is-1 ) = zero + vr( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>2 ) then + call stdlib_sgemv( 'N', n, ki-2, one, vr, ldvr,work( 1 + (iv-1)*n ), & + 1,work( ki-1 + (iv-1)*n ), vr(1,ki-1), 1) + call stdlib_sgemv( 'N', n, ki-2, one, vr, ldvr,work( 1 + (iv)*n ), 1,& + work( ki + (iv)*n ), vr( 1, ki ), 1 ) + else + call stdlib_sscal( n, work(ki-1+(iv-1)*n), vr(1,ki-1), 1) + call stdlib_sscal( n, work(ki +(iv )*n), vr(1,ki ), 1) + end if + emax = zero + do k = 1, n + emax = max( emax, abs( vr( k, ki-1 ) )+abs( vr( k, ki ) ) ) + end do + remax = one / emax + call stdlib_sscal( n, remax, vr( 1, ki-1 ), 1 ) + call stdlib_sscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + (iv-1)*n ) = zero + work( k + (iv )*n ) = zero + end do + iscomplex( iv-1 ) = -ip + iscomplex( iv ) = ip + iv = iv - 1 + ! back-transform and normalization is done below + end if + end if + if( nb>1 ) then + ! -------------------------------------------------------- + ! blocked version of back-transform + ! for complex case, ki2 includes both vectors (ki-1 and ki) + if( ip==0 ) then + ki2 = ki + else + ki2 = ki - 1 + end if + ! columns iv:nb of work are valid vectors. + ! when the number of vectors stored reaches nb-1 or nb, + ! or if this was last vector, do the gemm + if( (iv<=2) .or. (ki2==1) ) then + call stdlib_sgemm( 'N', 'N', n, nb-iv+1, ki2+nb-iv, one,vr, ldvr,work( 1 + & + (iv)*n ), n,zero,work( 1 + (nb+iv)*n ), n ) + ! normalize vectors + do k = iv, nb + if( iscomplex(k)==0 ) then + ! real eigenvector + ii = stdlib_isamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / abs( work( ii + (nb+k)*n ) ) + else if( iscomplex(k)==1 ) then + ! first eigenvector of conjugate pair + emax = zero + do ii = 1, n + emax = max( emax,abs( work( ii + (nb+k )*n ) )+abs( work( ii + (& + nb+k+1)*n ) ) ) + end do + remax = one / emax + ! else if iscomplex(k)==-1 + ! second eigenvector of conjugate pair + ! reuse same remax as previous k + end if + call stdlib_sscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_slacpy( 'F', n, nb-iv+1,work( 1 + (nb+iv)*n ), n,vr( 1, ki2 ), & + ldvr ) + iv = nb + else + iv = iv - 1 + end if + end if ! blocked back-transform + is = is - 1 + if( ip/=0 )is = is - 1 + end do loop_140 + end if + if( leftv ) then + ! ============================================================ + ! compute left eigenvectors. + ! iv is index of column in current block. + ! for complex left vector, uses iv for real part and iv+1 for complex part. + ! non-blocked version always uses iv=1; + ! blocked version starts with iv=1, goes up to nb-1 or nb. + ! (note the "0-th" column is used for 1-norms computed above.) + iv = 1 + ip = 0 + is = 1 + loop_260: do ki = 1, n + if( ip==1 ) then + ! previous iteration (ki-1) was first of conjugate pair, + ! so this ki is second of conjugate pair; skip to end of loop + ip = -1 + cycle loop_260 + else if( ki==n ) then + ! last column, so this ki must be real eigenvalue + ip = 0 + else if( t( ki+1, ki )==zero ) then + ! zero on sub-diagonal, so this ki is real eigenvalue + ip = 0 + else + ! non-zero on sub-diagonal, so this ki is first of conjugate pair + ip = 1 + end if + if( somev ) then + if( .not.select( ki ) )cycle loop_260 + end if + ! compute the ki-th eigenvalue (wr,wi). + wr = t( ki, ki ) + wi = zero + if( ip/=0 )wi = sqrt( abs( t( ki, ki+1 ) ) )*sqrt( abs( t( ki+1, ki ) ) ) + smin = max( ulp*( abs( wr )+abs( wi ) ), smlnum ) + if( ip==0 ) then + ! -------------------------------------------------------- + ! real left eigenvector + work( ki + iv*n ) = one + ! form right-hand side. + do k = ki + 1, n + work( k + iv*n ) = -t( ki, k ) + end do + ! solve transposed quasi-triangular system: + ! [ t(ki+1:n,ki+1:n) - wr ]**t * x = scale*work + vmax = one + vcrit = bignum + jnxt = ki + 1 + loop_170: do j = ki + 1, n + if( jvcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work( ki+iv*n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+iv*n ) = work( j+iv*n ) -stdlib_sdot( j-ki-1, t( ki+1, j ), 1,& + work( ki+1+iv*n ), 1 ) + ! solve [ t(j,j) - wr ]**t * x = work + call stdlib_slaln2( .false., 1, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_sscal( n-ki+1, scale, work( ki+iv*n ), 1 ) + + work( j+iv*n ) = x( 1, 1 ) + vmax = max( abs( work( j+iv*n ) ), vmax ) + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work( ki+iv*n ), 1 ) + vmax = one + vcrit = bignum + end if + work( j+iv*n ) = work( j+iv*n ) -stdlib_sdot( j-ki-1, t( ki+1, j ), 1,& + work( ki+1+iv*n ), 1 ) + work( j+1+iv*n ) = work( j+1+iv*n ) -stdlib_sdot( j-ki-1, t( ki+1, j+1 )& + , 1,work( ki+1+iv*n ), 1 ) + ! solve + ! [ t(j,j)-wr t(j,j+1) ]**t * x = scale*( work1 ) + ! [ t(j+1,j) t(j+1,j+1)-wr ] ( work2 ) + call stdlib_slaln2( .true., 2, 1, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,zero, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one )call stdlib_sscal( n-ki+1, scale, work( ki+iv*n ), 1 ) + + work( j +iv*n ) = x( 1, 1 ) + work( j+1+iv*n ) = x( 2, 1 ) + vmax = max( abs( work( j +iv*n ) ),abs( work( j+1+iv*n ) ), vmax ) + + vcrit = bignum / vmax + end if + end do loop_170 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vl and normalize. + call stdlib_scopy( n-ki+1, work( ki + iv*n ), 1,vl( ki, is ), 1 ) + ii = stdlib_isamax( n-ki+1, vl( ki, is ), 1 ) + ki - 1 + remax = one / abs( vl( ii, is ) ) + call stdlib_sscal( n-ki+1, remax, vl( ki, is ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki=abs( t( ki+1, ki ) ) ) then + work( ki + (iv )*n ) = wi / t( ki, ki+1 ) + work( ki+1 + (iv+1)*n ) = one + else + work( ki + (iv )*n ) = one + work( ki+1 + (iv+1)*n ) = -wi / t( ki+1, ki ) + end if + work( ki+1 + (iv )*n ) = zero + work( ki + (iv+1)*n ) = zero + ! form right-hand side. + do k = ki + 2, n + work( k+(iv )*n ) = -work( ki +(iv )*n )*t(ki, k) + work( k+(iv+1)*n ) = -work( ki+1+(iv+1)*n )*t(ki+1,k) + end do + ! solve transposed quasi-triangular system: + ! [ t(ki+2:n,ki+2:n)**t - (wr-i*wi) ]*x = work1+i*work2 + vmax = one + vcrit = bignum + jnxt = ki + 2 + loop_200: do j = ki + 2, n + if( jvcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work(ki+(iv )*n), 1 ) + call stdlib_sscal( n-ki+1, rec, work(ki+(iv+1)*n), 1 ) + vmax = one + vcrit = bignum + end if + work( j+(iv )*n ) = work( j+(iv)*n ) -stdlib_sdot( j-ki-2, t( ki+2, j )& + , 1,work( ki+2+(iv)*n ), 1 ) + work( j+(iv+1)*n ) = work( j+(iv+1)*n ) -stdlib_sdot( j-ki-2, t( ki+2, & + j ), 1,work( ki+2+(iv+1)*n ), 1 ) + ! solve [ t(j,j)-(wr-i*wi) ]*(x11+i*x12)= wk+i*wk2 + call stdlib_slaln2( .false., 1, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( n-ki+1, scale, work(ki+(iv )*n), 1) + call stdlib_sscal( n-ki+1, scale, work(ki+(iv+1)*n), 1) + end if + work( j+(iv )*n ) = x( 1, 1 ) + work( j+(iv+1)*n ) = x( 1, 2 ) + vmax = max( abs( work( j+(iv )*n ) ),abs( work( j+(iv+1)*n ) ), vmax ) + + vcrit = bignum / vmax + else + ! 2-by-2 diagonal block + ! scale if necessary to avoid overflow when forming + ! the right-hand side elements. + beta = max( work( j ), work( j+1 ) ) + if( beta>vcrit ) then + rec = one / vmax + call stdlib_sscal( n-ki+1, rec, work(ki+(iv )*n), 1 ) + call stdlib_sscal( n-ki+1, rec, work(ki+(iv+1)*n), 1 ) + vmax = one + vcrit = bignum + end if + work( j +(iv )*n ) = work( j+(iv)*n ) -stdlib_sdot( j-ki-2, t( ki+2, & + j ), 1,work( ki+2+(iv)*n ), 1 ) + work( j +(iv+1)*n ) = work( j+(iv+1)*n ) -stdlib_sdot( j-ki-2, t( ki+2,& + j ), 1,work( ki+2+(iv+1)*n ), 1 ) + work( j+1+(iv )*n ) = work( j+1+(iv)*n ) -stdlib_sdot( j-ki-2, t( ki+2,& + j+1 ), 1,work( ki+2+(iv)*n ), 1 ) + work( j+1+(iv+1)*n ) = work( j+1+(iv+1)*n ) -stdlib_sdot( j-ki-2, t( ki+& + 2, j+1 ), 1,work( ki+2+(iv+1)*n ), 1 ) + ! solve 2-by-2 complex linear equation + ! [ (t(j,j) t(j,j+1) )**t - (wr-i*wi)*i ]*x = scale*b + ! [ (t(j+1,j) t(j+1,j+1)) ] + call stdlib_slaln2( .true., 2, 2, smin, one, t( j, j ),ldt, one, one, & + work( j+iv*n ), n, wr,-wi, x, 2, scale, xnorm, ierr ) + ! scale if necessary + if( scale/=one ) then + call stdlib_sscal( n-ki+1, scale, work(ki+(iv )*n), 1) + call stdlib_sscal( n-ki+1, scale, work(ki+(iv+1)*n), 1) + end if + work( j +(iv )*n ) = x( 1, 1 ) + work( j +(iv+1)*n ) = x( 1, 2 ) + work( j+1+(iv )*n ) = x( 2, 1 ) + work( j+1+(iv+1)*n ) = x( 2, 2 ) + vmax = max( abs( x( 1, 1 ) ), abs( x( 1, 2 ) ),abs( x( 2, 1 ) ), abs( x(& + 2, 2 ) ),vmax ) + vcrit = bignum / vmax + end if + end do loop_200 + ! copy the vector x or q*x to vl and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vl and normalize. + call stdlib_scopy( n-ki+1, work( ki + (iv )*n ), 1,vl( ki, is ), 1 ) + + call stdlib_scopy( n-ki+1, work( ki + (iv+1)*n ), 1,vl( ki, is+1 ), 1 ) + + emax = zero + do k = ki, n + emax = max( emax, abs( vl( k, is ) )+abs( vl( k, is+1 ) ) ) + end do + remax = one / emax + call stdlib_sscal( n-ki+1, remax, vl( ki, is ), 1 ) + call stdlib_sscal( n-ki+1, remax, vl( ki, is+1 ), 1 ) + do k = 1, ki - 1 + vl( k, is ) = zero + vl( k, is+1 ) = zero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki1 ) then + ! -------------------------------------------------------- + ! blocked version of back-transform + ! for complex case, ki2 includes both vectors (ki and ki+1) + if( ip==0 ) then + ki2 = ki + else + ki2 = ki + 1 + end if + ! columns 1:iv of work are valid vectors. + ! when the number of vectors stored reaches nb-1 or nb, + ! or if this was last vector, do the gemm + if( (iv>=nb-1) .or. (ki2==n) ) then + call stdlib_sgemm( 'N', 'N', n, iv, n-ki2+iv, one,vl( 1, ki2-iv+1 ), ldvl,& + work( ki2-iv+1 + (1)*n ), n,zero,work( 1 + (nb+1)*n ), n ) + ! normalize vectors + do k = 1, iv + if( iscomplex(k)==0) then + ! real eigenvector + ii = stdlib_isamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / abs( work( ii + (nb+k)*n ) ) + else if( iscomplex(k)==1) then + ! first eigenvector of conjugate pair + emax = zero + do ii = 1, n + emax = max( emax,abs( work( ii + (nb+k )*n ) )+abs( work( ii + (& + nb+k+1)*n ) ) ) + end do + remax = one / emax + ! else if iscomplex(k)==-1 + ! second eigenvector of conjugate pair + ! reuse same remax as previous k + end if + call stdlib_sscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_slacpy( 'F', n, iv,work( 1 + (nb+1)*n ), n,vl( 1, ki2-iv+1 ), & + ldvl ) + iv = 1 + else + iv = iv + 1 + end if + end if ! blocked back-transform + is = is + 1 + if( ip/=0 )is = is + 1 + end do loop_260 + end if + return + end subroutine stdlib_strevc3 + + !> STRSYL: solves the real Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**T, and A and B are both upper quasi- + !> triangular. A is M-by-M and B is N-by-N; the right hand side C and + !> the solution X are M-by-N; and scale is an output scale factor, set + !> <= 1 to avoid overflow in X. + !> A and B must be in Schur canonical form (as returned by SHSEQR), that + !> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; + !> each 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_strsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trana, tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, lda, ldb, ldc, m, n + real(sp), intent(out) :: scale + ! Array Arguments + real(sp), intent(in) :: a(lda,*), b(ldb,*) + real(sp), intent(inout) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notrna, notrnb + integer(ilp) :: ierr, j, k, k1, k2, knext, l, l1, l2, lnext + real(sp) :: a11, bignum, da11, db, eps, scaloc, sgn, smin, smlnum, suml, sumr, & + xnorm + ! Local Arrays + real(sp) :: dum(1), vec(2,2), x(2,2) + ! Intrinsic Functions + intrinsic :: abs,max,min,real + ! Executable Statements + ! decode and test input parameters + notrna = stdlib_lsame( trana, 'N' ) + notrnb = stdlib_lsame( tranb, 'N' ) + info = 0 + if( .not.notrna .and. .not.stdlib_lsame( trana, 'T' ) .and. .not.stdlib_lsame( trana, & + 'C' ) ) then + info = -1 + else if( .not.notrnb .and. .not.stdlib_lsame( tranb, 'T' ) .and. .not.stdlib_lsame( & + tranb, 'C' ) ) then + info = -2 + else if( isgn/=1 .and. isgn/=-1 ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldaknext )cycle loop_60 + if( k==1 ) then + k1 = k + k2 = k + else + if( a( k, k-1 )/=zero ) then + k1 = k - 1 + k2 = k + knext = k - 2 + else + k1 = k + k2 = k + knext = k - 1 + end if + end if + if( l1==l2 .and. k1==k2 ) then + suml = stdlib_sdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + scaloc = one + a11 = a( k1, k1 ) + sgn*b( l1, l1 ) + da11 = abs( a11 ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( vec( 1, 1 ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_sdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_slaln2( .false., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_sdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_sdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l2 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_slaln2( .true., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_sdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k2, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_slasy2( .false., .false., isgn, 2, 2,a( k1, k1 ), lda, b( l1, & + l1 ), ldb, vec,2, scaloc, x, 2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_60 + end do loop_70 + else if( .not.notrna .and. notrnb ) then + ! solve a**t *x + isgn*x*b = scale*c. + ! the (k,l)th block of x is determined starting from + ! upper-left corner column by column by + ! a(k,k)**t*x(k,l) + isgn*x(k,l)*b(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 l-1 + ! r(k,l) = sum [a(i,k)**t*x(i,l)] +isgn*sum [x(k,j)*b(j,l)] + ! i=1 j=1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = 1 + loop_130: do l = 1, n + if( lone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_slaln2( .true., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_slaln2( .true., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k1, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k2, 1 ), ldc, b( 1, l1 ), 1 ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k2 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_sdot( l1-1, c( k2, 1 ), ldc, b( 1, l2 ), 1 ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_slasy2( .true., .false., isgn, 2, 2, a( k1, k1 ),lda, b( l1, & + l1 ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_120 + end do loop_130 + else if( .not.notrna .and. .not.notrnb ) then + ! solve a**t*x + isgn*x*b**t = scale*c. + ! the (k,l)th block of x is determined starting from + ! top-right corner column by column by + ! a(k,k)**t*x(k,l) + isgn*x(k,l)*b(l,l)**t = c(k,l) - r(k,l) + ! where + ! k-1 n + ! r(k,l) = sum [a(i,k)**t*x(i,l)] + isgn*sum [x(k,j)*b(l,j)**t]. + ! i=1 j=l+1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = n + loop_190: do l = n, 1, -1 + if( l>lnext )cycle loop_190 + if( l==1 ) then + l1 = l + l2 = l + else + if( b( l, l-1 )/=zero ) then + l1 = l - 1 + l2 = l + lnext = l - 2 + else + l1 = l + l2 = l + lnext = l - 1 + end if + end if + ! start row loop (index = k) + ! k1 (k2): row index of the first (last) row of x(k,l) + knext = 1 + loop_180: do k = 1, m + if( kone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_slaln2( .true., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_slaln2( .false., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k1 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k2 ), 1, c( 1, l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( k1-1, a( 1, k2 ), 1, c( 1, l2 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l2, min(l2+1, n )& + ), ldb ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_slasy2( .true., .true., isgn, 2, 2, a( k1, k1 ),lda, b( l1, l1 & + ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_180 + end do loop_190 + else if( notrna .and. .not.notrnb ) then + ! solve a*x + isgn*x*b**t = scale*c. + ! the (k,l)th block of x is determined starting from + ! bottom-right corner column by column by + ! a(k,k)*x(k,l) + isgn*x(k,l)*b(l,l)**t = c(k,l) - r(k,l) + ! where + ! m n + ! r(k,l) = sum [a(k,i)*x(i,l)] + isgn*sum [x(k,j)*b(l,j)**t]. + ! i=k+1 j=l+1 + ! start column loop (index = l) + ! l1 (l2): column index of the first (last) row of x(k,l) + lnext = n + loop_250: do l = n, 1, -1 + if( l>lnext )cycle loop_250 + if( l==1 ) then + l1 = l + l2 = l + else + if( b( l, l-1 )/=zero ) then + l1 = l - 1 + l2 = l + lnext = l - 2 + else + l1 = l + l2 = l + lnext = l - 1 + end if + end if + ! start row loop (index = k) + ! k1 (k2): row index of the first (last) row of x(k,l) + knext = m + loop_240: do k = m, 1, -1 + if( k>knext )cycle loop_240 + if( k==1 ) then + k1 = k + k2 = k + else + if( a( k, k-1 )/=zero ) then + k1 = k - 1 + k2 = k + knext = k - 2 + else + k1 = k + k2 = k + knext = k - 1 + end if + end if + if( l1==l2 .and. k1==k2 ) then + suml = stdlib_sdot( m-k1, a( k1, min(k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( n-l1, c( k1, min( l1+1, n ) ), ldc,b( l1, min( l1+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + scaloc = one + a11 = a( k1, k1 ) + sgn*b( l1, l1 ) + da11 = abs( a11 ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( vec( 1, 1 ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x( 1, 1 ) = ( vec( 1, 1 )*scaloc ) / a11 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + else if( l1==l2 .and. k1/=k2 ) then + suml = stdlib_sdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + call stdlib_slaln2( .false., 2, 1, smin, one, a( k1, k1 ),lda, one, one, & + vec, 2, -sgn*b( l1, l1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k2, l1 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1==k2 ) then + suml = stdlib_sdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = sgn*( c( k1, l1 )-( suml+sgn*sumr ) ) + suml = stdlib_sdot( m-k1, a( k1, min( k1+1, m ) ), lda,c( min( k1+1, m ), & + l2 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = sgn*( c( k1, l2 )-( suml+sgn*sumr ) ) + call stdlib_slaln2( .false., 2, 1, smin, one, b( l1, l1 ),ldb, one, one, & + vec, 2, -sgn*a( k1, k1 ),zero, x, 2, scaloc, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 2, 1 ) + else if( l1/=l2 .and. k1/=k2 ) then + suml = stdlib_sdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 1, 1 ) = c( k1, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k1, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k1, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 1, 2 ) = c( k1, l2 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l1 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l1, min( l2+1, n & + ) ), ldb ) + vec( 2, 1 ) = c( k2, l1 ) - ( suml+sgn*sumr ) + suml = stdlib_sdot( m-k2, a( k2, min( k2+1, m ) ), lda,c( min( k2+1, m ), & + l2 ), 1 ) + sumr = stdlib_sdot( n-l2, c( k2, min( l2+1, n ) ), ldc,b( l2, min( l2+1, n & + ) ), ldb ) + vec( 2, 2 ) = c( k2, l2 ) - ( suml+sgn*sumr ) + call stdlib_slasy2( .false., .true., isgn, 2, 2, a( k1, k1 ),lda, b( l1, & + l1 ), ldb, vec, 2, scaloc, x,2, xnorm, ierr ) + if( ierr/=0 )info = 1 + if( scaloc/=one ) then + do j = 1, n + call stdlib_sscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k1, l1 ) = x( 1, 1 ) + c( k1, l2 ) = x( 1, 2 ) + c( k2, l1 ) = x( 2, 1 ) + c( k2, l2 ) = x( 2, 2 ) + end if + end do loop_240 + end do loop_250 + end if + return + end subroutine stdlib_strsyl + + !> SGEBRD: reduces a general real M-by-N matrix A to upper or lower + !> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_sgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! test the input parameters + info = 0 + nb = max( 1, stdlib_ilaenv( 1, 'SGEBRD', ' ', m, n, -1, -1 ) ) + lwkopt = ( m+n )*nb + work( 1 ) = real( lwkopt,KIND=sp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=( m+n )*nbmin ) then + nb = lwork / ( m+n ) + else + nb = 1 + nx = minmn + end if + end if + end if + else + nx = minmn + end if + do i = 1, minmn - nx, nb + ! reduce rows and columns i:i+nb-1 to bidiagonal form and return + ! the matrices x and y which are needed to update the unreduced + ! part of the matrix + call stdlib_slabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & + taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) + ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update + ! of the form a := a - v*y**t - x*u**t + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+& + nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda ) + call stdlib_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, & + work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda ) + ! copy diagonal and off-diagonal elements of b back into a + if( m>=n ) then + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j, j+1 ) = e( j ) + end do + else + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j+1, j ) = e( j ) + end do + end if + end do + ! use unblocked code to reduce the remainder of the matrix + call stdlib_sgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & + work, iinfo ) + work( 1 ) = ws + return + end subroutine stdlib_sgebrd + + !> SGEHRD: reduces a real general matrix A to upper Hessenberg form H by + !> an orthogonal similarity transformation: Q**T * A * Q = H . + + pure subroutine stdlib_sgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iwt, j, ldwork, lwkopt, nb, nbmin, nh, nx + real(sp) :: ei + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda1 .and. nb=(n*nbmin + tsize) ) then + nb = (lwork-tsize) / n + else + nb = 1 + end if + end if + end if + end if + ldwork = n + if( nb=nh ) then + ! use unblocked code below + i = ilo + else + ! use blocked code + iwt = 1 + n*nb + do i = ilo, ihi - 1 - nx, nb + ib = min( nb, ihi-i ) + ! reduce columns i:i+ib-1 to hessenberg form, returning the + ! matrices v and t of the block reflector h = i - v*t*v**t + ! which performs the reduction, and also the matrix y = a*v*t + call stdlib_slahr2( ihi, i, ib, a( 1, i ), lda, tau( i ),work( iwt ), ldt, work, & + ldwork ) + ! apply the block reflector h to a(1:ihi,i+ib:ihi) from the + ! right, computing a := a - y * v**t. v(i+ib,ib-1) must be set + ! to 1 + ei = a( i+ib, i+ib-1 ) + a( i+ib, i+ib-1 ) = one + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE',ihi, ihi-i-ib+1,ib, -one, work, & + ldwork, a( i+ib, i ), lda, one,a( 1, i+ib ), lda ) + a( i+ib, i+ib-1 ) = ei + ! apply the block reflector h to a(1:i,i+1:i+ib-1) from the + ! right + call stdlib_strmm( 'RIGHT', 'LOWER', 'TRANSPOSE','UNIT', i, ib-1,one, a( i+1, i )& + , lda, work, ldwork ) + do j = 0, ib-2 + call stdlib_saxpy( i, -one, work( ldwork*j+1 ), 1,a( 1, i+j+1 ), 1 ) + end do + ! apply the block reflector h to a(i+1:ihi,i+ib:n) from the + ! left + call stdlib_slarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE',ihi-i, n-i-ib+1, & + ib, a( i+1, i ), lda,work( iwt ), ldt, a( i+1, i+ib ), lda,work, ldwork ) + end do + end if + ! use unblocked code to reduce the rest of the matrix + call stdlib_sgehd2( n, i, ihi, a, lda, tau, work, iinfo ) + work( 1 ) = lwkopt + return + end subroutine stdlib_sgehrd + + !> DGELQT computes a blocked LQ factorization of a real M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_sgelqt( m, n, mb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, mb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, iinfo, k + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( mb<1 .or. ( mb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda SGELS: solves overdetermined or underdetermined real linear systems + !> involving an M-by-N matrix A, or its transpose, using a QR or LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of + !> an underdetermined system A**T * X = B. + !> 4. If TRANS = 'T' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_sgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, tpsd + integer(ilp) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize + real(sp) :: anrm, bignum, bnrm, smlnum + ! Local Arrays + real(sp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! test the input arguments. + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + nb = stdlib_ilaenv( 1, 'SGEQRF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'SORMQR', 'LN', m, nrhs, n,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'SORMQR', 'LT', m, nrhs, n,-1 ) ) + end if + else + nb = stdlib_ilaenv( 1, 'SGELQF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'SORMLQ', 'LT', n, nrhs, m,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'SORMLQ', 'LN', n, nrhs, m,-1 ) ) + end if + end if + wsize = max( 1, mn + max( mn, nrhs )*nb ) + work( 1 ) = real( wsize,KIND=sp) + end if + if( info/=0 ) then + call stdlib_xerbla( 'SGELS ', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( min( m, n, nrhs )==0 ) then + call stdlib_slaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb ) + return + end if + ! get machine parameters + smlnum = stdlib_slamch( 'S' ) / stdlib_slamch( 'P' ) + bignum = one / smlnum + call stdlib_slabad( smlnum, bignum ) + ! scale a, b if max element outside range [smlnum,bignum] + anrm = stdlib_slange( 'M', m, n, a, lda, rwork ) + iascl = 0 + if( anrm>zero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + go to 50 + end if + brow = m + if( tpsd )brow = n + bnrm = stdlib_slange( 'M', brow, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if( m>=n ) then + ! compute qr factorization of a + call stdlib_sgeqrf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least n, optimally n*nb + if( .not.tpsd ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_sormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb, & + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = n + else + ! underdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_strtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = zero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_sormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_sgelqf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least m, optimally m*nb. + if( .not.tpsd ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_strtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = zero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_sormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb, & + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_sormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_strtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_slascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_slascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_slascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_slascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( wsize,KIND=sp) + return + end subroutine stdlib_sgels + + !> SGEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by short wide LQ + !> factorization (SGELQ) + + pure subroutine stdlib_sgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + real(sp), intent(in) :: a(lda,*), t(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * mb + mn = m + else + lw = m * mb + mn = n + end if + if( ( nb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, nb - k ) == 0 ) then + nblcks = ( mn - k ) / ( nb - k ) + else + nblcks = ( mn - k ) / ( nb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_sgemlqt( side, trans, m, n, k, mb, a, lda,t( 6 ), mb, c, ldc, work, info & + ) + else + call stdlib_slamswlq( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),mb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = real( lw,KIND=sp) + return + end subroutine stdlib_sgemlq + + !> SGEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> where Q is a real orthogonal matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (SGEQR) + + pure subroutine stdlib_sgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + real(sp), intent(in) :: a(lda,*), t(*) + real(sp), intent(inout) :: c(ldc,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'T' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * nb + mn = m + else + lw = mb * nb + mn = n + end if + if( ( mb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, mb - k )==0 ) then + nblcks = ( mn - k ) / ( mb - k ) + else + nblcks = ( mn - k ) / ( mb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_sgemqrt( side, trans, m, n, k, nb, a, lda, t( 6 ),nb, c, ldc, work, info & + ) + else + call stdlib_slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),nb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_sgemqr + + !> SGEQP3: computes a QR factorization with column pivoting of a + !> matrix A: A*P = Q*R using Level 3 BLAS. + + pure subroutine stdlib_sgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: inb = 1 + integer(ilp), parameter :: inbmin = 2 + integer(ilp), parameter :: ixover = 3 + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, & + sminmn, sn, topbmn + ! Intrinsic Functions + intrinsic :: int,max,min + ! test input arguments + ! ==================== + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 ) then + na = min( m, nfxd ) + ! cc call stdlib_sgeqr2( m, na, a, lda, tau, work, info ) + call stdlib_sgeqrf( m, na, a, lda, tau, work, lwork, info ) + iws = max( iws, int( work( 1 ),KIND=ilp) ) + if( na1 ) .and. ( nb=nbmin ) .and. ( nb SGEQRT: computes a blocked QR factorization of a real M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_sgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk), parameter :: use_recursive_qr = .true. + integer(ilp) :: i, ib, iinfo, k + + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nb<1 .or. ( nb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda SGESV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_sgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( lda SGESVX: uses the LU factorization to compute the solution to a real + !> system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_sgesvx( fact, trans, n, nrhs, a, lda, af, ldaf, ipiv,equed, r, c, b, ldb, & + x, ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*), c(*), r(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j + real(sp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -12 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + rpvgrw = stdlib_slantr( 'M', 'U', 'N', info, info, af, ldaf,work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_slange( 'M', n, info, a, lda, work ) / rpvgrw + end if + work( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_slange( norm, n, n, a, lda, work ) + rpvgrw = stdlib_slantr( 'M', 'U', 'N', n, n, af, ldaf, work ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_slange( 'M', n, n, a, lda, work ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_sgecon( norm, n, af, ldaf, anorm, rcond, work, iwork, info ) + ! compute the solution matrix x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_sgetrs( trans, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_sgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond SGGES: computes for a pair of N-by-N real nonsymmetric matrices (A,B), + !> the generalized eigenvalues, the generalized real Schur form (S,T), + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T.The + !> leading columns of VSL and VSR then form an orthonormal basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> SGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_sgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alphar, & + alphai, beta, vsl, ldvsl, vsr,ldvsr, work, lwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), & + work(*) + ! Function Arguments + procedure(stdlib_selctg_s) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, & + wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, ip, iright, irows, & + itau, iwrk, maxwrk, minwrk + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, safmax, safmin, & + smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( lda0 )then + minwrk = max( 8*n, 6*n + 16 ) + maxwrk = minwrk - n +n*stdlib_ilaenv( 1, 'SGEQRF', ' ', n, 1, n, 0 ) + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'SORMQR', ' ', n, 1, n, -1 & + ) ) + if( ilvsl ) then + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'SORGQR', ' ', n, 1, n, & + -1 ) ) + end if + else + minwrk = 1 + maxwrk = 1 + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_slascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need 6*n + 2*n space for storing balancing factors) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_sggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_sgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_sormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_slaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_slacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_sorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_slaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_sgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + ! perform qz algorithm, computing schur vectors if desired + ! (workspace: need n) + iwrk = itau + call stdlib_shgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 40 + end if + ! sort eigenvalues alpha/beta if desired + ! (workspace: need 4*n+16 ) + sdim = 0 + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + call stdlib_stgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alphar,alphai, beta, & + vsl, ldvsl, vsr, ldvsr, sdim, pvsl,pvsr, dif, work( iwrk ), lwork-iwrk+1, idum, 1,& + ierr ) + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_sggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_sggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl )then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i )/safmax )>( anrmto/anrm ) .or.( safmin/alphar( i ) )>( & + anrm/anrmto ) ) then + work( 1 ) = abs( a( i, i )/alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i )/safmax )>( anrmto/anrm ) .or.( safmin/alphai( i ) )>( & + anrm/anrmto ) ) then + work( 1 ) = abs( a( i, i+1 )/alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl )then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i )/safmax )>( bnrmto/bnrm ) .or.( safmin/beta( i ) )>( & + bnrm/bnrmto ) ) then + work( 1 ) = abs(b( i, i )/beta( i )) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_slascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_slascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 40 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_sgges + + !> SGGESX: computes for a pair of N-by-N real nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the real Schur form (S,T), and, + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T; computes + !> a reciprocal condition number for the average of the selected + !> eigenvalues (RCONDE); and computes a reciprocal condition number for + !> the right and left deflating subspaces corresponding to the selected + !> eigenvalues (RCONDV). The leading columns of VSL and VSR then form + !> an orthonormal basis for the corresponding left and right eigenspaces + !> (deflating subspaces). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or for both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_sggesx( jobvsl, jobvsr, sort, selctg, sense, n, a, lda,b, ldb, sdim, & + alphar, alphai, beta, vsl, ldvsl,vsr, ldvsr, rconde, rcondv, work, lwork, iwork,liwork, & + bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, liwork, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), rconde(2), rcondv(2), vsl(& + ldvsl,*), vsr(ldvsr,*), work(*) + ! Function Arguments + procedure(stdlib_selctg_s) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, wantsb, & + wantse, wantsn, wantst, wantsv + integer(ilp) :: i, icols, ierr, ihi, ijob, ijobvl, ijobvr, ileft, ilo, ip, iright, & + irows, itau, iwrk, liwmin, lwrk, maxwrk, minwrk + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pl, pr, safmax, safmin, & + smlnum + ! Local Arrays + real(sp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( wantsn ) then + ijob = 0 + else if( wantse ) then + ijob = 1 + else if( wantsv ) then + ijob = 2 + else if( wantsb ) then + ijob = 4 + end if + ! test the input arguments + info = 0 + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda0) then + minwrk = max( 8*n, 6*n + 16 ) + maxwrk = minwrk - n +n*stdlib_ilaenv( 1, 'SGEQRF', ' ', n, 1, n, 0 ) + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'SORMQR', ' ', n, 1, n, -1 & + ) ) + if( ilvsl ) then + maxwrk = max( maxwrk, minwrk - n +n*stdlib_ilaenv( 1, 'SORGQR', ' ', n, 1, n, & + -1 ) ) + end if + lwrk = maxwrk + if( ijob>=1 )lwrk = max( lwrk, n*n/2 ) + else + minwrk = 1 + maxwrk = 1 + lwrk = 1 + end if + work( 1 ) = lwrk + if( wantsn .or. n==0 ) then + liwmin = 1 + else + liwmin = n + 6 + end if + iwork( 1 ) = liwmin + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_slascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need 6*n + 2*n for permutation parameters) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_sggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_sgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_sormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_slaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_slacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_sorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_slaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_sgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (workspace: need n) + iwrk = itau + call stdlib_shgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 50 + end if + ! sort eigenvalues alpha/beta and compute the reciprocal of + ! condition number(s) + ! (workspace: if ijob >= 1, need max( 8*(n+1), 2*sdim*(n-sdim) ) + ! otherwise, need 8*(n+1) ) + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + ! reorder eigenvalues, transform generalized schur vectors, and + ! compute reciprocal condition numbers + call stdlib_stgsen( ijob, ilvsl, ilvsr, bwork, n, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,sdim, pl, pr, dif, work( iwrk ), lwork-iwrk+1,iwork, & + liwork, ierr ) + if( ijob>=1 )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( ierr==-22 ) then + ! not enough real workspace + info = -22 + else + if( ijob==1 .or. ijob==4 ) then + rconde( 1 ) = pl + rconde( 2 ) = pr + end if + if( ijob==2 .or. ijob==4 ) then + rcondv( 1 ) = dif( 1 ) + rcondv( 2 ) = dif( 2 ) + end if + if( ierr==1 )info = n + 3 + end if + end if + ! apply permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_sggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_sggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i ) / safmax )>( anrmto / anrm ) .or.( safmin / alphar( i ) )>( & + anrm / anrmto ) )then + work( 1 ) = abs( a( i, i ) / alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i ) / safmax )>( anrmto / anrm ).or. ( safmin / alphai( i )& + )>( anrm / anrmto ) )then + work( 1 ) = abs( a( i, i+1 ) / alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl ) then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i ) / safmax )>( bnrmto / bnrm ) .or.( safmin / beta( i ) )>( & + bnrm / bnrmto ) ) then + work( 1 ) = abs( b( i, i ) / beta( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_slascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_slascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 50 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwmin + return + end subroutine stdlib_sggesx + + !> SGGEV: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B . + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_sggev( jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai,beta, vl, ldvl, vr, & + ldvr, work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, itau, & + iwrk, jc, jr, maxwrk, minwrk + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_slascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ! (workspace: need 6*n) + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_sggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = iwrk + iwrk = itau + irows + call stdlib_sgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (workspace: need n, prefer n*nb) + call stdlib_sormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_slaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_slacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_sorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_slaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_sgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_sgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (workspace: need n) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_shgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 110 + end if + ! compute eigenvectors + ! (workspace: need 6*n) + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_stgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 110 + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_sggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, vl, & + ldvl, ierr ) + loop_50: do jc = 1, n + if( alphai( jc ) SGGEVX: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for + !> the eigenvalues (RCONDE), and reciprocal condition numbers for the + !> right eigenvectors (RCONDV). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j) . + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B. + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_sggevx( balanc, jobvl, jobvr, sense, n, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr, ilo,ihi, lscale, rscale, abnrm, bbnrm, rconde,rcondv, work, lwork, & + iwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + real(sp), intent(out) :: abnrm, bbnrm + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), lscale(*), rconde(*), rcondv(*)& + , rscale(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery, noscl, pair, wantsb, wantse, & + wantsn, wantsv + character :: chtemp + integer(ilp) :: i, icols, ierr, ijobvl, ijobvr, in, irows, itau, iwrk, iwrk1, j, jc, & + jr, m, maxwrk, minwrk, mm + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + noscl = stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'P' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( .not.( noscl .or. stdlib_lsame( balanc, 'S' ) .or.stdlib_lsame( balanc, 'B' ) ) ) & + then + info = -1 + else if( ijobvl<=0 ) then + info = -2 + else if( ijobvr<=0 ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsb .or. wantsv ) )then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_slascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute and/or balance the matrix pair (a,b) + ! (workspace: need 6*n if balanc = 's' or 'b', 1 otherwise) + call stdlib_sggbal( balanc, n, a, lda, b, ldb, ilo, ihi, lscale, rscale,work, ierr ) + + ! compute abnrm and bbnrm + abnrm = stdlib_slange( '1', n, n, a, lda, work( 1 ) ) + if( ilascl ) then + work( 1 ) = abnrm + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, 1, 1, work( 1 ), 1,ierr ) + abnrm = work( 1 ) + end if + bbnrm = stdlib_slange( '1', n, n, b, ldb, work( 1 ) ) + if( ilbscl ) then + work( 1 ) = bbnrm + call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, 1, 1, work( 1 ), 1,ierr ) + bbnrm = work( 1 ) + end if + ! reduce b to triangular form (qr decomposition of b) + ! (workspace: need n, prefer n*nb ) + irows = ihi + 1 - ilo + if( ilv .or. .not.wantsn ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_sgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to a + ! (workspace: need n, prefer n*nb) + call stdlib_sormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl and/or vr + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_slaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_slacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_sorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + if( ilvr )call stdlib_slaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv .or. .not.wantsn ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_sgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_sgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (workspace: need n) + if( ilv .or. .not.wantsn ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_shgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr, work,lwork, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 130 + end if + ! compute eigenvectors and estimate condition numbers if desired + ! (workspace: stdlib_stgevc: need 6*n + ! stdlib_stgsna: need 2*n*(n+2)+16 if sense = 'v' or 'b', + ! need n otherwise ) + if( ilv .or. .not.wantsn ) then + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_stgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, n,& + in, work, ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 130 + end if + end if + if( .not.wantsn ) then + ! compute eigenvectors (stdlib_stgevc) and estimate condition + ! numbers (stdlib_stgsna). note that the definition of the condition + ! number is not invariant under transformation (u,v) to + ! (q*u, z*v), where (u,v) are eigenvectors of the generalized + ! schur form (s,t), q and z are orthogonal matrices. in order + ! to avoid using extra 2*n*n workspace, we have to recalculate + ! eigenvectors and estimate one condition numbers at a time. + pair = .false. + loop_20: do i = 1, n + if( pair ) then + pair = .false. + cycle loop_20 + end if + mm = 1 + if( i SGGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + + pure subroutine stdlib_sggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), d(*) + real(sp), intent(out) :: work(*), x(*), y(*) + ! =================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, lopt, lwkmin, lwkopt, nb, nb1, nb2, nb3, nb4, np + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + np = min( n, p ) + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -2 + else if( p<0 .or. pm ) then + call stdlib_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', n-m, 1,b( m+1, m+p-n+1 ), & + ldb, d( m+1 ), n-m, info ) + if( info>0 ) then + info = 1 + return + end if + call stdlib_scopy( n-m, d( m+1 ), 1, y( m+p-n+1 ), 1 ) + end if + ! set y1 = 0 + do i = 1, m + p - n + y( i ) = zero + end do + ! update d1 = d1 - t12*y2 + call stdlib_sgemv( 'NO TRANSPOSE', m, n-m, -one, b( 1, m+p-n+1 ), ldb,y( m+p-n+1 ), 1, & + one, d, 1 ) + ! solve triangular system: r11*x = d1 + if( m>0 ) then + call stdlib_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', m, 1, a, lda,d, m, info ) + + if( info>0 ) then + info = 2 + return + end if + ! copy d to x + call stdlib_scopy( m, d, 1, x, 1 ) + end if + ! backward transformation y = z**t *y + call stdlib_sormrq( 'LEFT', 'TRANSPOSE', p, 1, np,b( max( 1, n-p+1 ), 1 ), ldb, work( & + m+1 ), y,max( 1, p ), work( m+np+1 ), lwork-m-np, info ) + work( 1 ) = m + np + max( lopt, int( work( m+np+1 ),KIND=ilp) ) + return + end subroutine stdlib_sggglm + + !> SGGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + + pure subroutine stdlib_sgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*), c(*), d(*) + real(sp), intent(out) :: work(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkmin, lwkopt, mn, nb, nb1, nb2, nb3, nb4, nr + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( p<0 .or. p>n .or. p0 ) then + call stdlib_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', p, 1,b( 1, n-p+1 ), ldb, d,& + p, info ) + if( info>0 ) then + info = 1 + return + end if + ! put the solution in x + call stdlib_scopy( p, d, 1, x( n-p+1 ), 1 ) + ! update c1 + call stdlib_sgemv( 'NO TRANSPOSE', n-p, p, -one, a( 1, n-p+1 ), lda,d, 1, one, c, 1 & + ) + end if + ! solve r11*x1 = c1 for x1 + if( n>p ) then + call stdlib_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n-p, 1,a, lda, c, n-p, & + info ) + if( info>0 ) then + info = 2 + return + end if + ! put the solutions in x + call stdlib_scopy( n-p, c, 1, x, 1 ) + end if + ! compute the residual vector: + if( m0 )call stdlib_sgemv( 'NO TRANSPOSE', nr, n-m, -one, a( n-p+1, m+1 ),lda, d( & + nr+1 ), 1, one, c( n-p+1 ), 1 ) + else + nr = p + end if + if( nr>0 ) then + call stdlib_strmv( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', nr,a( n-p+1, n-p+1 ), lda, & + d, 1 ) + call stdlib_saxpy( nr, -one, d, 1, c( n-p+1 ), 1 ) + end if + ! backward transformation x = q**t*x + call stdlib_sormrq( 'LEFT', 'TRANSPOSE', n, 1, p, b, ldb, work( 1 ), x,n, work( p+mn+1 & + ), lwork-p-mn, info ) + work( 1 ) = p + mn + max( lopt, int( work( p+mn+1 ),KIND=ilp) ) + return + end subroutine stdlib_sgglse + + !> SHSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a real upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + + subroutine stdlib_shsein( side, eigsrc, initv, select, n, h, ldh, wr, wi,vl, ldvl, vr, ldvr, & + mm, m, work, ifaill,ifailr, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: eigsrc, initv, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldh, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(inout) :: select(*) + integer(ilp), intent(out) :: ifaill(*), ifailr(*) + real(sp), intent(in) :: h(ldh,*), wi(*) + real(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*), wr(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: bothv, fromqr, leftv, noinit, pair, rightv + integer(ilp) :: i, iinfo, k, kl, kln, kr, ksi, ksr, ldwork + real(sp) :: bignum, eps3, hnorm, smlnum, ulp, unfl, wki, wkr + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! decode and test the input parameters. + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + fromqr = stdlib_lsame( eigsrc, 'Q' ) + noinit = stdlib_lsame( initv, 'N' ) + ! set m to the number of columns required to store the selected + ! eigenvectors, and standardize the array select. + m = 0 + pair = .false. + do k = 1, n + if( pair ) then + pair = .false. + select( k ) = .false. + else + if( wi( k )==zero ) then + if( select( k ) )m = m + 1 + else + pair = .true. + if( select( k ) .or. select( k+1 ) ) then + select( k ) = .true. + m = m + 2 + end if + end if + end if + end do + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.fromqr .and. .not.stdlib_lsame( eigsrc, 'N' ) ) then + info = -2 + else if( .not.noinit .and. .not.stdlib_lsame( initv, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldhkr ) then + do i = k, n - 1 + if( h( i+1, i )==zero )go to 50 + end do + 50 continue + kr = i + end if + end if + if( kl/=kln ) then + kln = kl + ! compute infinity-norm of submatrix h(kl:kr,kl:kr) if it + ! has not ben computed before. + hnorm = stdlib_slanhs( 'I', kr-kl+1, h( kl, kl ), ldh, work ) + if( stdlib_sisnan( hnorm ) ) then + info = -6 + return + else if( hnorm>zero ) then + eps3 = hnorm*ulp + else + eps3 = smlnum + end if + end if + ! perturb eigenvalue if it is close to any previous + ! selected eigenvalues affiliated to the submatrix + ! h(kl:kr,kl:kr). close roots are modified by eps3. + wkr = wr( k ) + wki = wi( k ) + 60 continue + do i = k - 1, kl, -1 + if( select( i ) .and. abs( wr( i )-wkr )+abs( wi( i )-wki )0 ) then + if( pair ) then + info = info + 2 + else + info = info + 1 + end if + ifaill( ksr ) = k + ifaill( ksi ) = k + else + ifaill( ksr ) = 0 + ifaill( ksi ) = 0 + end if + do i = 1, kl - 1 + vl( i, ksr ) = zero + end do + if( pair ) then + do i = 1, kl - 1 + vl( i, ksi ) = zero + end do + end if + end if + if( rightv ) then + ! compute right eigenvector. + call stdlib_slaein( .true., noinit, kr, h, ldh, wkr, wki,vr( 1, ksr ), vr( 1, & + ksi ), work, ldwork,work( n*n+n+1 ), eps3, smlnum, bignum,iinfo ) + if( iinfo>0 ) then + if( pair ) then + info = info + 2 + else + info = info + 1 + end if + ifailr( ksr ) = k + ifailr( ksi ) = k + else + ifailr( ksr ) = 0 + ifailr( ksi ) = 0 + end if + do i = kr + 1, n + vr( i, ksr ) = zero + end do + if( pair ) then + do i = kr + 1, n + vr( i, ksi ) = zero + end do + end if + end if + if( pair ) then + ksr = ksr + 2 + else + ksr = ksr + 1 + end if + end if + end do loop_120 + return + end subroutine stdlib_shsein + + !> SLA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(sp) function stdlib_sla_porpvgrw( uplo, ncols, a, lda, af, ldaf, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols, lda, ldaf + ! Array Arguments + real(sp), intent(in) :: a(lda,*), af(ldaf,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(sp) :: amax, umax, rpvgrw + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + ! stdlib_spotrf will have factored only the ncolsxncols leading minor, so + ! we restrict the growth search to that minor and use only the first + ! 2*ncols workspace entries. + rpvgrw = one + do i = 1, 2*ncols + work( i ) = zero + end do + ! find the max magnitude entry of each column. + if ( upper ) then + do j = 1, ncols + do i = 1, j + work( ncols+j ) =max( abs( a( i, j ) ), work( ncols+j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( ncols+j ) =max( abs( a( i, j ) ), work( ncols+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of the factor in + ! af. no pivoting, so no permutations. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do j = 1, ncols + do i = 1, j + work( j ) = max( abs( af( i, j ) ), work( j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( j ) = max( abs( af( i, j ) ), work( j ) ) + end do + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= 0.0_sp ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_sla_porpvgrw = rpvgrw + end function stdlib_sla_porpvgrw + + !> SLAED3: finds the roots of the secular equation, as defined by the + !> values in D, W, and RHO, between 1 and K. It makes the + !> appropriate calls to SLAED4 and then updates the eigenvectors by + !> multiplying the matrix of eigenvectors of the pair of eigensystems + !> being combined by the matrix of eigenvectors of the K-by-K system + !> which is solved here. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_slaed3( k, n, n1, d, q, ldq, rho, dlamda, q2, indx,ctot, w, s, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldq, n, n1 + real(sp), intent(in) :: rho + ! Array Arguments + integer(ilp), intent(in) :: ctot(*), indx(*) + real(sp), intent(out) :: d(*), q(ldq,*), s(*) + real(sp), intent(inout) :: dlamda(*), w(*) + real(sp), intent(in) :: q2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, iq2, j, n12, n2, n23 + real(sp) :: temp + ! Intrinsic Functions + intrinsic :: max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( k<0 ) then + info = -1 + else if( n SLAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense symmetric matrix + !> that has been reduced to tridiagonal form. SLAED1 handles + !> the case in which all eigenvalues and eigenvectors of a symmetric + !> tridiagonal matrix are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**Tu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine SLAED8. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine SLAED4 (as called by SLAED9). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_slaed7( icompq, n, qsiz, tlvls, curlvl, curpbm, d, q,ldq, indxq, rho, & + cutpnt, qstore, qptr, prmptr,perm, givptr, givcol, givnum, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, cutpnt, icompq, ldq, n, qsiz, tlvls + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + + integer(ilp), intent(out) :: indxq(*), iwork(*) + real(sp), intent(inout) :: d(*), givnum(2,*), q(ldq,*), qstore(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: coltyp, curr, i, idlmda, indx, indxc, indxp, iq2, is, iw, iz, k, ldq2, & + n1, n2, ptr + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>1 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( icompq==1 .and. qsizcutpnt .or. n SLAEXC: swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in + !> an upper quasi-triangular matrix T by an orthogonal similarity + !> transformation. + !> T must be in Schur canonical form, that is, block upper triangular + !> with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block + !> has its diagonal elements equal and its off-diagonal elements of + !> opposite sign. + + subroutine stdlib_slaexc( wantq, n, t, ldt, q, ldq, j1, n1, n2, work,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, ldq, ldt, n, n1, n2 + ! Array Arguments + real(sp), intent(inout) :: q(ldq,*), t(ldt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ldd = 4 + integer(ilp), parameter :: ldx = 2 + + + + ! Local Scalars + integer(ilp) :: ierr, j2, j3, j4, k, nd + real(sp) :: cs, dnorm, eps, scale, smlnum, sn, t11, t22, t33, tau, tau1, tau2, temp, & + thresh, wi1, wi2, wr1, wr2, xnorm + ! Local Arrays + real(sp) :: d(ldd,4), u(3), u1(3), u2(3), x(ldx,2) + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 .or. n1==0 .or. n2==0 )return + if( j1+n1>n )return + j2 = j1 + 1 + j3 = j1 + 2 + j4 = j1 + 3 + if( n1==1 .and. n2==1 ) then + ! swap two 1-by-1 blocks. + t11 = t( j1, j1 ) + t22 = t( j2, j2 ) + ! determine the transformation to perform the interchange. + call stdlib_slartg( t( j1, j2 ), t22-t11, cs, sn, temp ) + ! apply transformation to the matrix t. + if( j3<=n )call stdlib_srot( n-j1-1, t( j1, j3 ), ldt, t( j2, j3 ), ldt, cs,sn ) + + call stdlib_srot( j1-1, t( 1, j1 ), 1, t( 1, j2 ), 1, cs, sn ) + t( j1, j1 ) = t22 + t( j2, j2 ) = t11 + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_srot( n, q( 1, j1 ), 1, q( 1, j2 ), 1, cs, sn ) + end if + else + ! swapping involves at least one 2-by-2 block. + ! copy the diagonal block of order n1+n2 to the local array d + ! and compute its norm. + nd = n1 + n2 + call stdlib_slacpy( 'FULL', nd, nd, t( j1, j1 ), ldt, d, ldd ) + dnorm = stdlib_slange( 'MAX', nd, nd, d, ldd, work ) + ! compute machine-dependent threshold for test for accepting + ! swap. + eps = stdlib_slamch( 'P' ) + smlnum = stdlib_slamch( 'S' ) / eps + thresh = max( ten*eps*dnorm, smlnum ) + ! solve t11*x - x*t22 = scale*t12 for x. + call stdlib_slasy2( .false., .false., -1, n1, n2, d, ldd,d( n1+1, n1+1 ), ldd, d( 1,& + n1+1 ), ldd, scale, x,ldx, xnorm, ierr ) + ! swap the adjacent diagonal blocks. + k = n1 + n1 + n2 - 3 + go to ( 10, 20, 30 )k + 10 continue + ! n1 = 1, n2 = 2: generate elementary reflector h so that: + ! ( scale, x11, x12 ) h = ( 0, 0, * ) + u( 1 ) = scale + u( 2 ) = x( 1, 1 ) + u( 3 ) = x( 1, 2 ) + call stdlib_slarfg( 3, u( 3 ), u, 1, tau ) + u( 3 ) = one + t11 = t( j1, j1 ) + ! perform swap provisionally on diagonal block in d. + call stdlib_slarfx( 'L', 3, 3, u, tau, d, ldd, work ) + call stdlib_slarfx( 'R', 3, 3, u, tau, d, ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 3, 1 ) ), abs( d( 3, 2 ) ), abs( d( 3,3 )-t11 ) )>thresh )go to & + 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_slarfx( 'L', 3, n-j1+1, u, tau, t( j1, j1 ), ldt, work ) + call stdlib_slarfx( 'R', j2, 3, u, tau, t( 1, j1 ), ldt, work ) + t( j3, j1 ) = zero + t( j3, j2 ) = zero + t( j3, j3 ) = t11 + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_slarfx( 'R', n, 3, u, tau, q( 1, j1 ), ldq, work ) + end if + go to 40 + 20 continue + ! n1 = 2, n2 = 1: generate elementary reflector h so that: + ! h ( -x11 ) = ( * ) + ! ( -x21 ) = ( 0 ) + ! ( scale ) = ( 0 ) + u( 1 ) = -x( 1, 1 ) + u( 2 ) = -x( 2, 1 ) + u( 3 ) = scale + call stdlib_slarfg( 3, u( 1 ), u( 2 ), 1, tau ) + u( 1 ) = one + t33 = t( j3, j3 ) + ! perform swap provisionally on diagonal block in d. + call stdlib_slarfx( 'L', 3, 3, u, tau, d, ldd, work ) + call stdlib_slarfx( 'R', 3, 3, u, tau, d, ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 2, 1 ) ), abs( d( 3, 1 ) ), abs( d( 1,1 )-t33 ) )>thresh )go to & + 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_slarfx( 'R', j3, 3, u, tau, t( 1, j1 ), ldt, work ) + call stdlib_slarfx( 'L', 3, n-j1, u, tau, t( j1, j2 ), ldt, work ) + t( j1, j1 ) = t33 + t( j2, j1 ) = zero + t( j3, j1 ) = zero + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_slarfx( 'R', n, 3, u, tau, q( 1, j1 ), ldq, work ) + end if + go to 40 + 30 continue + ! n1 = 2, n2 = 2: generate elementary reflectors h(1) and h(2) so + ! that: + ! h(2) h(1) ( -x11 -x12 ) = ( * * ) + ! ( -x21 -x22 ) ( 0 * ) + ! ( scale 0 ) ( 0 0 ) + ! ( 0 scale ) ( 0 0 ) + u1( 1 ) = -x( 1, 1 ) + u1( 2 ) = -x( 2, 1 ) + u1( 3 ) = scale + call stdlib_slarfg( 3, u1( 1 ), u1( 2 ), 1, tau1 ) + u1( 1 ) = one + temp = -tau1*( x( 1, 2 )+u1( 2 )*x( 2, 2 ) ) + u2( 1 ) = -temp*u1( 2 ) - x( 2, 2 ) + u2( 2 ) = -temp*u1( 3 ) + u2( 3 ) = scale + call stdlib_slarfg( 3, u2( 1 ), u2( 2 ), 1, tau2 ) + u2( 1 ) = one + ! perform swap provisionally on diagonal block in d. + call stdlib_slarfx( 'L', 3, 4, u1, tau1, d, ldd, work ) + call stdlib_slarfx( 'R', 4, 3, u1, tau1, d, ldd, work ) + call stdlib_slarfx( 'L', 3, 4, u2, tau2, d( 2, 1 ), ldd, work ) + call stdlib_slarfx( 'R', 4, 3, u2, tau2, d( 1, 2 ), ldd, work ) + ! test whether to reject swap. + if( max( abs( d( 3, 1 ) ), abs( d( 3, 2 ) ), abs( d( 4, 1 ) ),abs( d( 4, 2 ) ) )& + >thresh )go to 50 + ! accept swap: apply transformation to the entire matrix t. + call stdlib_slarfx( 'L', 3, n-j1+1, u1, tau1, t( j1, j1 ), ldt, work ) + call stdlib_slarfx( 'R', j4, 3, u1, tau1, t( 1, j1 ), ldt, work ) + call stdlib_slarfx( 'L', 3, n-j1+1, u2, tau2, t( j2, j1 ), ldt, work ) + call stdlib_slarfx( 'R', j4, 3, u2, tau2, t( 1, j2 ), ldt, work ) + t( j3, j1 ) = zero + t( j3, j2 ) = zero + t( j4, j1 ) = zero + t( j4, j2 ) = zero + if( wantq ) then + ! accumulate transformation in the matrix q. + call stdlib_slarfx( 'R', n, 3, u1, tau1, q( 1, j1 ), ldq, work ) + call stdlib_slarfx( 'R', n, 3, u2, tau2, q( 1, j2 ), ldq, work ) + end if + 40 continue + if( n2==2 ) then + ! standardize new 2-by-2 block t11 + call stdlib_slanv2( t( j1, j1 ), t( j1, j2 ), t( j2, j1 ),t( j2, j2 ), wr1, wi1, & + wr2, wi2, cs, sn ) + call stdlib_srot( n-j1-1, t( j1, j1+2 ), ldt, t( j2, j1+2 ), ldt,cs, sn ) + call stdlib_srot( j1-1, t( 1, j1 ), 1, t( 1, j2 ), 1, cs, sn ) + if( wantq )call stdlib_srot( n, q( 1, j1 ), 1, q( 1, j2 ), 1, cs, sn ) + end if + if( n1==2 ) then + ! standardize new 2-by-2 block t22 + j3 = j1 + n2 + j4 = j3 + 1 + call stdlib_slanv2( t( j3, j3 ), t( j3, j4 ), t( j4, j3 ),t( j4, j4 ), wr1, wi1, & + wr2, wi2, cs, sn ) + if( j3+2<=n )call stdlib_srot( n-j3-1, t( j3, j3+2 ), ldt, t( j4, j3+2 ),ldt, cs,& + sn ) + call stdlib_srot( j3-1, t( 1, j3 ), 1, t( 1, j4 ), 1, cs, sn ) + if( wantq )call stdlib_srot( n, q( 1, j3 ), 1, q( 1, j4 ), 1, cs, sn ) + end if + end if + return + ! exit with info = 1 if swap was rejected. + 50 info = 1 + return + end subroutine stdlib_slaexc + + !> SLAHQR: is an auxiliary routine called by SHSEQR to update the + !> eigenvalues and Schur decomposition already computed by SHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + + pure subroutine stdlib_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), z(ldz,*) + real(sp), intent(out) :: wi(*), wr(*) + ! ========================================================= + ! Parameters + real(sp), parameter :: dat1 = 3.0_sp/4.0_sp + real(sp), parameter :: dat2 = -0.4375_sp + integer(ilp), parameter :: kexsh = 10 + + + + ! Local Scalars + real(sp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, & + rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3 + integer(ilp) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl + ! Local Arrays + real(sp) :: v(3) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + if( ilo==ihi ) then + wr( ilo ) = h( ilo, ilo ) + wi( ilo ) = zero + return + end if + ! ==== clear out the trash ==== + do j = ilo, ihi - 3 + h( j+2, j ) = zero + h( j+3, j ) = zero + end do + if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero + nh = ihi - ilo + 1 + nz = ihiz - iloz + 1 + ! set machine-dependent constants for the stopping criterion. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( nh,KIND=sp) / ulp ) + ! i1 and i2 are the indices of the first row and last column of h + ! to which transformations must be applied. if eigenvalues only are + ! being computed, i1 and i2 are set inside the main loop. + if( wantt ) then + i1 = 1 + i2 = n + end if + ! itmax is the total number of qr iterations allowed. + itmax = 30 * max( 10, nh ) + ! kdefl counts the number of iterations since a deflation + kdefl = 0 + ! the main loop begins here. i is the loop index and decreases from + ! ihi to ilo in steps of 1 or 2. each iteration of the loop works + ! with the active submatrix in rows and columns l to i. + ! eigenvalues i+1 to ihi have already converged. either l = ilo or + ! h(l,l-1) is negligible so that the matrix splits. + i = ihi + 20 continue + l = ilo + if( i=ilo )tst = tst + abs( h( k-1, k-2 ) ) + if( k+1<=ihi )tst = tst + abs( h( k+1, k ) ) + end if + ! ==== the following is a conservative small subdiagonal + ! . deflation criterion due to ahues + ! . 1997). it has better mathematical foundation and + ! . improves accuracy in some cases. ==== + if( abs( h( k, k-1 ) )<=ulp*tst ) then + ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) + ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) + aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) + bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) + s = aa + ab + if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40 + end if + end do + 40 continue + l = k + if( l>ilo ) then + ! h(l,l-1) is negligible + h( l, l-1 ) = zero + end if + ! exit from loop if a submatrix of order 1 or 2 has split off. + if( l>=i-1 )go to 150 + kdefl = kdefl + 1 + ! now the active submatrix is in rows and columns l to i. if + ! eigenvalues only are being computed, only the active submatrix + ! need be transformed. + if( .not.wantt ) then + i1 = l + i2 = i + end if + if( mod(kdefl,2*kexsh)==0 ) then + ! exceptional shift. + s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + h11 = dat1*s + h( i, i ) + h12 = dat2*s + h21 = s + h22 = h11 + else if( mod(kdefl,kexsh)==0 ) then + ! exceptional shift. + s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) ) + h11 = dat1*s + h( l, l ) + h12 = dat2*s + h21 = s + h22 = h11 + else + ! prepare to use francis' double shift + ! (i.e. 2nd degree generalized rayleigh quotient) + h11 = h( i-1, i-1 ) + h21 = h( i, i-1 ) + h12 = h( i-1, i ) + h22 = h( i, i ) + end if + s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 ) + if( s==zero ) then + rt1r = zero + rt1i = zero + rt2r = zero + rt2i = zero + else + h11 = h11 / s + h21 = h21 / s + h12 = h12 / s + h22 = h22 / s + tr = ( h11+h22 ) / two + det = ( h11-tr )*( h22-tr ) - h12*h21 + rtdisc = sqrt( abs( det ) ) + if( det>=zero ) then + ! ==== complex conjugate shifts ==== + rt1r = tr*s + rt2r = rt1r + rt1i = rtdisc*s + rt2i = -rt1i + else + ! ==== realshifts (use only one of them,KIND=sp) ==== + rt1r = tr + rtdisc + rt2r = tr - rtdisc + if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then + rt1r = rt1r*s + rt2r = rt1r + else + rt2r = rt2r*s + rt1r = rt2r + end if + rt1i = zero + rt2i = zero + end if + end if + ! look for two consecutive small subdiagonal elements. + do m = i - 2, l, -1 + ! determine the effect of starting the double-shift qr + ! iteration at row m, and see if this would make h(m,m-1) + ! negligible. (the following uses scaling to avoid + ! overflows and most underflows.) + h21s = h( m+1, m ) + s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s ) + h21s = h( m+1, m ) / s + v( 1 ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - & + rt1i*( rt2i / s ) + v( 2 ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r ) + v( 3 ) = h21s*h( m+2, m+1 ) + s = abs( v( 1 ) ) + abs( v( 2 ) ) + abs( v( 3 ) ) + v( 1 ) = v( 1 ) / s + v( 2 ) = v( 2 ) / s + v( 3 ) = v( 3 ) / s + if( m==l )go to 60 + if( abs( h( m, m-1 ) )*( abs( v( 2 ) )+abs( v( 3 ) ) )<=ulp*abs( v( 1 ) )*( abs( & + h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60 + end do + 60 continue + ! double-shift qr step + loop_130: do k = m, i - 1 + ! the first iteration of this loop determines a reflection g + ! from the vector v and applies it from left and right to h, + ! thus creating a nonzero bulge below the subdiagonal. + ! each subsequent iteration determines a reflection g to + ! restore the hessenberg form in the (k-1)th column, and thus + ! chases the bulge one step toward the bottom of the active + ! submatrix. nr is the order of g. + nr = min( 3, i-k+1 ) + if( k>m )call stdlib_scopy( nr, h( k, k-1 ), 1, v, 1 ) + call stdlib_slarfg( nr, v( 1 ), v( 2 ), 1, t1 ) + if( k>m ) then + h( k, k-1 ) = v( 1 ) + h( k+1, k-1 ) = zero + if( kl ) then + ! ==== use the following instead of + ! . h( k, k-1 ) = -h( k, k-1 ) to + ! . avoid a bug when v(2) and v(3) + ! . underflow. ==== + h( k, k-1 ) = h( k, k-1 )*( one-t1 ) + end if + v2 = v( 2 ) + t2 = t1*v2 + if( nr==3 ) then + v3 = v( 3 ) + t3 = t1*v3 + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) + h( k, j ) = h( k, j ) - sum*t1 + h( k+1, j ) = h( k+1, j ) - sum*t2 + h( k+2, j ) = h( k+2, j ) - sum*t3 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+3,i). + do j = i1, min( k+3, i ) + sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) + h( j, k ) = h( j, k ) - sum*t1 + h( j, k+1 ) = h( j, k+1 ) - sum*t2 + h( j, k+2 ) = h( j, k+2 ) - sum*t3 + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 ) + z( j, k ) = z( j, k ) - sum*t1 + z( j, k+1 ) = z( j, k+1 ) - sum*t2 + z( j, k+2 ) = z( j, k+2 ) - sum*t3 + end do + end if + else if( nr==2 ) then + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = h( k, j ) + v2*h( k+1, j ) + h( k, j ) = h( k, j ) - sum*t1 + h( k+1, j ) = h( k+1, j ) - sum*t2 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+3,i). + do j = i1, i + sum = h( j, k ) + v2*h( j, k+1 ) + h( j, k ) = h( j, k ) - sum*t1 + h( j, k+1 ) = h( j, k+1 ) - sum*t2 + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = z( j, k ) + v2*z( j, k+1 ) + z( j, k ) = z( j, k ) - sum*t1 + z( j, k+1 ) = z( j, k+1 ) - sum*t2 + end do + end if + end if + end do loop_130 + end do loop_140 + ! failure to converge in remaining number of iterations + info = i + return + 150 continue + if( l==i ) then + ! h(i,i-1) is negligible: one eigenvalue has converged. + wr( i ) = h( i, i ) + wi( i ) = zero + else if( l==i-1 ) then + ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged. + ! transform the 2-by-2 submatrix to standard schur form, + ! and compute and store the eigenvalues. + call stdlib_slanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), & + wi( i-1 ), wr( i ), wi( i ),cs, sn ) + if( wantt ) then + ! apply the transformation to the rest of h. + if( i2>i )call stdlib_srot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn ) + + call stdlib_srot( i-i1-1, h( i1, i-1 ), 1, h( i1, i ), 1, cs, sn ) + end if + if( wantz ) then + ! apply the transformation to z. + call stdlib_srot( nz, z( iloz, i-1 ), 1, z( iloz, i ), 1, cs, sn ) + end if + end if + ! reset deflation counter + kdefl = 0 + ! return to start of the main loop with new value of i. + i = l - 1 + go to 20 + 160 continue + return + end subroutine stdlib_slahqr + + !> SLASD2: merges the two sets of singular values together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> singular values are close together or if there is a tiny entry in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + !> SLASD2 is called from SLASD1. + + pure subroutine stdlib_slasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, & + u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, k + integer(ilp), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre + real(sp), intent(in) :: alpha, beta + ! Array Arguments + integer(ilp), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*) + integer(ilp), intent(inout) :: idxq(*) + real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) + real(sp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*) + ! ===================================================================== + + ! Local Arrays + integer(ilp) :: ctot(4), psm(4) + ! Local Scalars + integer(ilp) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 + real(sp) :: c, eps, hlftol, s, tau, tol, z1 + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre/=1 ) .and. ( sqre/=0 ) ) then + info = -3 + end if + n = nl + nr + 1 + m = n + sqre + if( ldun )go to 110 + if( abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + idxp( k2 ) = j + coltyp( j ) = 4 + else + ! check if singular values are close enough to allow deflation. + if( abs( d( j )-d( jprev ) )<=tol ) then + ! deflation is possible. + s = z( jprev ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_slapy2( c, s ) + c = c / tau + s = -s / tau + z( j ) = tau + z( jprev ) = zero + ! apply back the givens rotation to the left and right + ! singular vector matrices. + idxjp = idxq( idx( jprev )+1 ) + idxj = idxq( idx( j )+1 ) + if( idxjp<=nlp1 ) then + idxjp = idxjp - 1 + end if + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + call stdlib_srot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s ) + call stdlib_srot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt, c,s ) + if( coltyp( j )/=coltyp( jprev ) ) then + coltyp( j ) = 3 + end if + coltyp( jprev ) = 4 + k2 = k2 - 1 + idxp( k2 ) = jprev + jprev = j + else + k = k + 1 + u2( k, 1 ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + jprev = j + end if + end if + go to 100 + 110 continue + ! record the last singular value. + k = k + 1 + u2( k, 1 ) = z( jprev ) + dsigma( k ) = d( jprev ) + idxp( k ) = jprev + 120 continue + ! count up the total number of the various types of columns, then + ! form a permutation which positions the four column types into + ! four groups of uniform structure (although one or more of these + ! groups may be empty). + do j = 1, 4 + ctot( j ) = 0 + end do + do j = 2, n + ct = coltyp( j ) + ctot( ct ) = ctot( ct ) + 1 + end do + ! psm(*) = position in submatrix (of types 1 through 4) + psm( 1 ) = 2 + psm( 2 ) = 2 + ctot( 1 ) + psm( 3 ) = psm( 2 ) + ctot( 2 ) + psm( 4 ) = psm( 3 ) + ctot( 3 ) + ! fill out the idxc array so that the permutation which it induces + ! will place all type-1 columns first, all type-2 columns next, + ! then all type-3's, and finally all type-4's, starting from the + ! second column. this applies similarly to the rows of vt. + do j = 2, n + jp = idxp( j ) + ct = coltyp( jp ) + idxc( psm( ct ) ) = j + psm( ct ) = psm( ct ) + 1 + end do + ! sort the singular values and corresponding singular vectors into + ! dsigma, u2, and vt2 respectively. the singular values/vectors + ! which were not deflated go into the first k slots of dsigma, u2, + ! and vt2 respectively, while those which were deflated go into the + ! last n - k slots, except that the first column/row will be treated + ! separately. + do j = 2, n + jp = idxp( j ) + dsigma( j ) = d( jp ) + idxj = idxq( idx( idxp( idxc( j ) ) )+1 ) + if( idxj<=nlp1 ) then + idxj = idxj - 1 + end if + call stdlib_scopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 ) + call stdlib_scopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 ) + end do + ! determine dsigma(1), dsigma(2) and z(1) + dsigma( 1 ) = zero + hlftol = tol / two + if( abs( dsigma( 2 ) )<=hlftol )dsigma( 2 ) = hlftol + if( m>n ) then + z( 1 ) = stdlib_slapy2( z1, z( m ) ) + if( z( 1 )<=tol ) then + c = one + s = zero + z( 1 ) = tol + else + c = z1 / z( 1 ) + s = z( m ) / z( 1 ) + end if + else + if( abs( z1 )<=tol ) then + z( 1 ) = tol + else + z( 1 ) = z1 + end if + end if + ! move the rest of the updating row to z. + call stdlib_scopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 ) + ! determine the first column of u2, the first row of vt2 and the + ! last row of vt. + call stdlib_slaset( 'A', n, 1, zero, zero, u2, ldu2 ) + u2( nlp1, 1 ) = one + if( m>n ) then + do i = 1, nlp1 + vt( m, i ) = -s*vt( nlp1, i ) + vt2( 1, i ) = c*vt( nlp1, i ) + end do + do i = nlp2, m + vt2( 1, i ) = s*vt( m, i ) + vt( m, i ) = c*vt( m, i ) + end do + else + call stdlib_scopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 ) + end if + if( m>n ) then + call stdlib_scopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 ) + end if + ! the deflated singular values and their corresponding vectors go + ! into the back of d, u, and v respectively. + if( n>k ) then + call stdlib_scopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 ) + call stdlib_slacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),ldu ) + call stdlib_slacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1, 1 ),ldvt ) + end if + ! copy ctot into coltyp for referencing in stdlib_slasd3. + do j = 1, 4 + coltyp( j ) = ctot( j ) + end do + return + end subroutine stdlib_slasd2 + + !> SLASWLQ: computes a blocked Tall-Skinny LQ factorization of + !> a real M-by-N matrix A for M <= N: + !> A = ( L 0 ) * Q, + !> where: + !> Q is a n-by-N orthogonal matrix, stored on exit in an implicit + !> form in the elements above the diagonal of the array A and in + !> the elements of the array T; + !> L is a lower-triangular M-by-M matrix stored on exit in + !> the elements on and below the diagonal of the array A. + !> 0 is a M-by-(N-M) zero matrix, if M < N, and is not stored. + + pure subroutine stdlib_slaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, lwork, ldt + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. nm .and. m>0 )) then + info = -3 + else if( nb<=0 ) then + info = -4 + else if( lda=n).or.(nb<=m).or.(nb>=n)) then + call stdlib_sgelqt( m, n, mb, a, lda, t, ldt, work, info) + return + end if + kk = mod((n-m),(nb-m)) + ii=n-kk+1 + ! compute the lq factorization of the first block a(1:m,1:nb) + call stdlib_sgelqt( m, nb, mb, a(1,1), lda, t, ldt, work, info) + ctr = 1 + do i = nb+1, ii-nb+m , (nb-m) + ! compute the qr factorization of the current block a(1:m,i:i+nb-m) + call stdlib_stplqt( m, nb-m, 0, mb, a(1,1), lda, a( 1, i ),lda, t(1, ctr * m + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(1:m,ii:n) + if (ii<=n) then + call stdlib_stplqt( m, kk, 0, mb, a(1,1), lda, a( 1, ii ),lda, t(1, ctr * m + 1), & + ldt,work, info ) + end if + work( 1 ) = m * mb + return + end subroutine stdlib_slaswlq + + !> SLATSQR: computes a blocked Tall-Skinny QR factorization of + !> a real M-by-N matrix A for M >= N: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix, stored on exit in an implicit + !> form in the elements below the diagonal of the array A and in + !> the elements of the array T; + !> R is an upper-triangular N-by-N matrix, stored on exit in + !> the elements on and above the diagonal of the array A. + !> 0 is a (M-N)-by-N zero matrix, and is not stored. + + pure subroutine stdlib_slatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, ldt, lwork + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. mn .and. n>0 )) then + info = -4 + else if( lda=m)) then + call stdlib_sgeqrt( m, n, nb, a, lda, t, ldt, work, info) + return + end if + kk = mod((m-n),(mb-n)) + ii=m-kk+1 + ! compute the qr factorization of the first block a(1:mb,1:n) + call stdlib_sgeqrt( mb, n, nb, a(1,1), lda, t, ldt, work, info ) + ctr = 1 + do i = mb+1, ii-mb+n , (mb-n) + ! compute the qr factorization of the current block a(i:i+mb-n,1:n) + call stdlib_stpqrt( mb-n, n, 0, nb, a(1,1), lda, a( i, 1 ), lda,t(1, ctr * n + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(ii:m,1:n) + if (ii<=m) then + call stdlib_stpqrt( kk, n, 0, nb, a(1,1), lda, a( ii, 1 ), lda,t(1, ctr * n + 1), & + ldt,work, info ) + end if + work( 1 ) = n*nb + return + end subroutine stdlib_slatsqr + + !> SORGBR: generates one of the real orthogonal matrices Q or P**T + !> determined by SGEBRD when reducing a real matrix A to bidiagonal + !> form: A = Q * B * P**T. Q and P**T are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + !> is of order N: + !> if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m + !> rows of P**T, where n >= m >= k; + !> if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as + !> an N-by-N matrix. + + pure subroutine stdlib_sorgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantq + integer(ilp) :: i, iinfo, j, lwkopt, mn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + wantq = stdlib_lsame( vect, 'Q' ) + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 .or. ( wantq .and. ( n>m .or. nn .or. m=k ) then + call stdlib_sorgqr( m, n, k, a, lda, tau, work, -1, iinfo ) + else + if( m>1 ) then + call stdlib_sorgqr( m-1, m-1, m-1, a, lda, tau, work, -1,iinfo ) + end if + end if + else + if( k1 ) then + call stdlib_sorglq( n-1, n-1, n-1, a, lda, tau, work, -1,iinfo ) + end if + end if + end if + lwkopt = work( 1 ) + lwkopt = max (lwkopt, mn) + end if + if( info/=0 ) then + call stdlib_xerbla( 'SORGBR', -info ) + return + else if( lquery ) then + work( 1 ) = lwkopt + return + end if + ! quick return if possible + if( m==0 .or. n==0 ) then + work( 1 ) = 1 + return + end if + if( wantq ) then + ! form q, determined by a call to stdlib_sgebrd to reduce an m-by-k + ! matrix + if( m>=k ) then + ! if m >= k, assume m >= n >= k + call stdlib_sorgqr( m, n, k, a, lda, tau, work, lwork, iinfo ) + else + ! if m < k, assume m = n + ! shift the vectors which define the elementary reflectors one + ! column to the right, and set the first row and column of q + ! to those of the unit matrix + do j = m, 2, -1 + a( 1, j ) = zero + do i = j + 1, m + a( i, j ) = a( i, j-1 ) + end do + end do + a( 1, 1 ) = one + do i = 2, m + a( i, 1 ) = zero + end do + if( m>1 ) then + ! form q(2:m,2:m) + call stdlib_sorgqr( m-1, m-1, m-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + else + ! form p**t, determined by a call to stdlib_sgebrd to reduce a k-by-n + ! matrix + if( k= n, assume m = n + ! shift the vectors which define the elementary reflectors one + ! row downward, and set the first row and column of p**t to + ! those of the unit matrix + a( 1, 1 ) = one + do i = 2, n + a( i, 1 ) = zero + end do + do j = 2, n + do i = j - 1, 2, -1 + a( i, j ) = a( i-1, j ) + end do + a( 1, j ) = zero + end do + if( n>1 ) then + ! form p**t(2:n,2:n) + call stdlib_sorglq( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sorgbr + + !> If VECT = 'Q', SORMBR: overwrites the general real M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**T * C C * Q**T + !> If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'T': P**T * C C * P**T + !> Here Q and P**T are the orthogonal matrices determined by SGEBRD when + !> reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + !> P**T are defined as products of elementary reflectors H(i) and G(i) + !> respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the orthogonal matrix Q or P**T that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + + pure subroutine stdlib_sormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), c(ldc,*) + real(sp), intent(in) :: tau(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: applyq, left, lquery, notran + character :: transt + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + applyq = stdlib_lsame( vect, 'Q' ) + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q or p and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( k<0 ) then + info = -6 + else if( ( applyq .and. lda=k ) then + ! q was determined by a call to stdlib_sgebrd with nq >= k + call stdlib_sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & + ) + else if( nq>1 ) then + ! q was determined by a call to stdlib_sgebrd with nq < k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_sormqr( side, trans, mi, ni, nq-1, a( 2, 1 ), lda, tau,c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + else + ! apply p + if( notran ) then + transt = 'T' + else + transt = 'N' + end if + if( nq>k ) then + ! p was determined by a call to stdlib_sgebrd with nq > k + call stdlib_sormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & + iinfo ) + else if( nq>1 ) then + ! p was determined by a call to stdlib_sgebrd with nq <= k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_sormlq( side, transt, mi, ni, nq-1, a( 1, 2 ), lda,tau, c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_sormbr + + !> SPBSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular band matrix, and L is a lower + !> triangular band matrix, with the same number of superdiagonals or + !> subdiagonals as A. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_spbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab SPBSVX: uses the Cholesky factorization A = U**T*U or A = L*L**T to + !> compute the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_spbsvx( fact, uplo, n, kd, nrhs, ab, ldab, afb, ldafb,equed, s, b, ldb, x, & + ldx, rcond, ferr, berr,work, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*), s(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ, upper + integer(ilp) :: i, infequ, j, j1, j2 + real(sp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + upper = stdlib_lsame( uplo, 'U' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_slansb( '1', uplo, n, kd, ab, ldab, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_spbcon( uplo, n, kd, afb, ldafb, anorm, rcond, work, iwork,info ) + ! compute the solution matrix x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_spbtrs( uplo, n, kd, nrhs, afb, ldafb, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_spbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b, ldb, x,ldx, ferr, berr,& + work, iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond SPFTRF: computes the Cholesky factorization of a real symmetric + !> positive definite matrix A. + !> The factorization has the form + !> A = U**T * U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_spftrf( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + real(sp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'T' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SPFTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_spotrf( 'L', n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_strsm( 'R', 'L', 'T', 'N', n2, n1, one, a( 0 ), n,a( n1 ), n ) + + call stdlib_ssyrk( 'U', 'N', n2, n1, -one, a( n1 ), n, one,a( n ), n ) + call stdlib_spotrf( 'U', n2, a( n ), n, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_spotrf( 'L', n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_strsm( 'L', 'L', 'N', 'N', n1, n2, one, a( n2 ), n,a( 0 ), n ) + + call stdlib_ssyrk( 'U', 'T', n2, n1, -one, a( 0 ), n, one,a( n1 ), n ) + call stdlib_spotrf( 'U', n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + end if + else + ! n is odd and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + call stdlib_spotrf( 'U', n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_strsm( 'L', 'U', 'T', 'N', n1, n2, one, a( 0 ), n1,a( n1*n1 ), n1 & + ) + call stdlib_ssyrk( 'L', 'T', n2, n1, -one, a( n1*n1 ), n1, one,a( 1 ), n1 ) + + call stdlib_spotrf( 'L', n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + call stdlib_spotrf( 'U', n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_strsm( 'R', 'U', 'N', 'N', n2, n1, one, a( n2*n2 ),n2, a( 0 ), n2 & + ) + call stdlib_ssyrk( 'L', 'N', n2, n1, -one, a( 0 ), n2, one,a( n1*n2 ), n2 ) + + call stdlib_spotrf( 'L', n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_spotrf( 'L', k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_strsm( 'R', 'L', 'T', 'N', k, k, one, a( 1 ), n+1,a( k+1 ), n+1 ) + + call stdlib_ssyrk( 'U', 'N', k, k, -one, a( k+1 ), n+1, one,a( 0 ), n+1 ) + + call stdlib_spotrf( 'U', k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_spotrf( 'L', k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_strsm( 'L', 'L', 'N', 'N', k, k, one, a( k+1 ),n+1, a( 0 ), n+1 ) + + call stdlib_ssyrk( 'U', 'T', k, k, -one, a( 0 ), n+1, one,a( k ), n+1 ) + + call stdlib_spotrf( 'U', k, a( k ), n+1, info ) + if( info>0 )info = info + k + end if + else + ! n is even and transr = 't' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_spotrf( 'U', k, a( 0+k ), k, info ) + if( info>0 )return + call stdlib_strsm( 'L', 'U', 'T', 'N', k, k, one, a( k ), n1,a( k*( k+1 ) ), & + k ) + call stdlib_ssyrk( 'L', 'T', k, k, -one, a( k*( k+1 ) ), k, one,a( 0 ), k ) + + call stdlib_spotrf( 'L', k, a( 0 ), k, info ) + if( info>0 )info = info + k + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_spotrf( 'U', k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_strsm( 'R', 'U', 'N', 'N', k, k, one,a( k*( k+1 ) ), k, a( 0 ), k & + ) + call stdlib_ssyrk( 'L', 'N', k, k, -one, a( 0 ), k, one,a( k*k ), k ) + call stdlib_spotrf( 'L', k, a( k*k ), k, info ) + if( info>0 )info = info + k + end if + end if + end if + return + end subroutine stdlib_spftrf + + !> SPOSV: computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**T* U, if UPLO = 'U', or + !> A = L * L**T, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_sposv( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda SPOSVX: uses the Cholesky factorization A = U**T*U or A = L*L**T to + !> compute the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_sposvx( fact, uplo, n, nrhs, a, lda, af, ldaf, equed,s, b, ldb, x, ldx, & + rcond, ferr, berr, work,iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(sp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*), s(*) + real(sp), intent(out) :: berr(*), ferr(*), work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(sp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_slamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_slansy( '1', uplo, n, a, lda, work ) + ! compute the reciprocal of the condition number of a. + call stdlib_spocon( uplo, n, af, ldaf, anorm, rcond, work, iwork, info ) + ! compute the solution matrix x. + call stdlib_slacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_spotrs( uplo, n, nrhs, af, ldaf, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_sporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x, ldx,ferr, berr, work, & + iwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond STREXC: reorders the real Schur factorization of a real matrix + !> A = Q*T*Q**T, so that the diagonal block of T with row index IFST is + !> moved to row ILST. + !> The real Schur form T is reordered by an orthogonal similarity + !> transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors + !> is updated by postmultiplying it with Z. + !> T must be in Schur canonical form (as returned by SHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_strexc( compq, n, t, ldt, q, ldq, ifst, ilst, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq + integer(ilp), intent(inout) :: ifst, ilst + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, ldt, n + ! Array Arguments + real(sp), intent(inout) :: q(ldq,*), t(ldt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantq + integer(ilp) :: here, nbf, nbl, nbnext + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test the input arguments. + info = 0 + wantq = stdlib_lsame( compq, 'V' ) + if( .not.wantq .and. .not.stdlib_lsame( compq, 'N' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldtn ).and.( n>0 )) then + info = -7 + else if(( ilst<1 .or. ilst>n ).and.( n>0 )) then + info = -8 + end if + if( info/=0 ) then + call stdlib_xerbla( 'STREXC', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + ! determine the first row of specified block + ! and find out it is 1 by 1 or 2 by 2. + if( ifst>1 ) then + if( t( ifst, ifst-1 )/=zero )ifst = ifst - 1 + end if + nbf = 1 + if( ifst1 ) then + if( t( ilst, ilst-1 )/=zero )ilst = ilst - 1 + end if + nbl = 1 + if( ilst=3 ) then + if( t( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_slaexc( wantq, n, t, ldt, q, ldq, here-nbnext, nbnext,nbf, work, & + info ) + if( info/=0 ) then + ilst = here + return + end if + here = here - nbnext + ! test if 2 by 2 block breaks into two 1 by 1 blocks + if( nbf==2 ) then + if( t( here+1, here )==zero )nbf = 3 + end if + else + ! current block consists of two 1 by 1 blocks each of which + ! must be swapped individually + nbnext = 1 + if( here>=3 ) then + if( t( here-1, here-2 )/=zero )nbnext = 2 + end if + call stdlib_slaexc( wantq, n, t, ldt, q, ldq, here-nbnext, nbnext,1, work, info ) + + if( info/=0 ) then + ilst = here + return + end if + if( nbnext==1 ) then + ! swap two 1 by 1 blocks, no problems possible + call stdlib_slaexc( wantq, n, t, ldt, q, ldq, here, nbnext, 1,work, info ) + + here = here - 1 + else + ! recompute nbnext in case 2 by 2 split + if( t( here, here-1 )==zero )nbnext = 1 + if( nbnext==2 ) then + ! 2 by 2 block did not split + call stdlib_slaexc( wantq, n, t, ldt, q, ldq, here-1, 2, 1,work, info ) + + if( info/=0 ) then + ilst = here + return + end if + here = here - 2 + else + ! 2 by 2 block did split + call stdlib_slaexc( wantq, n, t, ldt, q, ldq, here, 1, 1,work, info ) + + call stdlib_slaexc( wantq, n, t, ldt, q, ldq, here-1, 1, 1,work, info ) + + here = here - 2 + end if + end if + end if + if( here>ilst )go to 20 + end if + ilst = here + return + end subroutine stdlib_strexc + + !> STRSEN: reorders the real Schur factorization of a real matrix + !> A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in + !> the leading diagonal blocks of the upper quasi-triangular matrix T, + !> and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + !> T must be in Schur canonical form (as returned by SHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_strsen( job, compq, select, n, t, ldt, q, ldq, wr, wi,m, s, sep, work, & + lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldq, ldt, liwork, lwork, n + real(sp), intent(out) :: s, sep + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: q(ldq,*), t(ldt,*) + real(sp), intent(out) :: wi(*), work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, pair, swap, wantbh, wantq, wants, wantsp + integer(ilp) :: ierr, k, kase, kk, ks, liwmin, lwmin, n1, n2, nn + real(sp) :: est, rnorm, scale + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + wantq = stdlib_lsame( compq, 'V' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( job, 'N' ) .and. .not.wants .and. .not.wantsp )then + info = -1 + else if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt STRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a real upper + !> quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q + !> orthogonal). + !> T must be in Schur canonical form (as returned by SHSEQR), that is, + !> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + !> 2-by-2 diagonal block has its diagonal elements equal and its + !> off-diagonal elements of opposite sign. + + subroutine stdlib_strsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm, m, & + work, ldwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, ldwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(out) :: s(*), sep(*), work(ldwork,*) + real(sp), intent(in) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: pair, somcon, wantbh, wants, wantsp + integer(ilp) :: i, ierr, ifst, ilst, j, k, kase, ks, n2, nn + real(sp) :: bignum, cond, cs, delta, dumm, eps, est, lnrm, mu, prod, prod1, prod2, & + rnrm, scale, smlnum, sn + ! Local Arrays + integer(ilp) :: isave(3) + real(sp) :: dummy(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + if( .not.wants .and. .not.wantsp ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt SGELQ: computes an LQ factorization of a real M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_sgelq( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks, lwmin, lwopt, lwreq + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'SGELQ ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'SGELQ ', ' ', m, n, 2, -1 ) + else + mb = 1 + nb = n + end if + if( mb>min( m, n ) .or. mb<1 ) mb = 1 + if( nb>n .or. nb<=m ) nb = n + mintsz = m + 5 + if ( nb>m .and. n>m ) then + if( mod( n - m, nb - m )==0 ) then + nblcks = ( n - m ) / ( nb - m ) + else + nblcks = ( n - m ) / ( nb - m ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + if( ( n<=m ) .or. ( nb<=m ) .or. ( nb>=n ) ) then + lwmin = max( 1, n ) + lwopt = max( 1, mb*n ) + else + lwmin = max( 1, m ) + lwopt = max( 1, mb*m ) + end if + lminws = .false. + if( ( tsize=lwmin ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=n ) ) then + lwreq = max( 1, mb*n ) + else + lwreq = max( 1, mb*m ) + end if + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) ) then + call stdlib_sgelqt( m, n, mb, a, lda, t( 6 ), mb, work, info ) + else + call stdlib_slaswlq( m, n, mb, nb, a, lda, t( 6 ), mb, work,lwork, info ) + end if + work( 1 ) = lwreq + return + end subroutine stdlib_sgelq + + !> SGELSY: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by orthogonal transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**T [ inv(T11)*Q1**T*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + + subroutine stdlib_sgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(sp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: imax = 1 + integer(ilp), parameter :: imin = 2 + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, & + nb3, nb4 + real(sp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, & + wsize + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + mn = min( m, n ) + ismin = mn + 1 + ismax = 2*mn + 1 + ! test the input arguments. + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + rank = 0 + go to 70 + end if + bnrm = stdlib_slange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! compute qr factorization with column pivoting of a: + ! a * p = q * r + call stdlib_sgeqp3( m, n, a, lda, jpvt, work( 1 ), work( mn+1 ),lwork-mn, info ) + + wsize = mn + work( mn+1 ) + ! workspace: mn+2*n+nb*(n+1). + ! details of householder rotations stored in work(1:mn). + ! determine rank using incremental condition estimation + work( ismin ) = one + work( ismax ) = one + smax = abs( a( 1, 1 ) ) + smin = smax + if( abs( a( 1, 1 ) )==zero ) then + rank = 0 + call stdlib_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + go to 70 + else + rank = 1 + end if + 10 continue + if( rank SGEQR: computes a QR factorization of a real M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_sgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'SGEQR ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'SGEQR ', ' ', m, n, 2, -1 ) + else + mb = m + nb = 1 + end if + if( mb>m .or. mb<=n ) mb = m + if( nb>min( m, n ) .or. nb<1 ) nb = 1 + mintsz = n + 5 + if ( mb>n .and. m>n ) then + if( mod( m - n, mb - n )==0 ) then + nblcks = ( m - n ) / ( mb - n ) + else + nblcks = ( m - n ) / ( mb - n ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + lminws = .false. + if( ( tsize=n ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=m ) ) then + call stdlib_sgeqrt( m, n, nb, a, lda, t( 6 ), nb, work, info ) + else + call stdlib_slatsqr( m, n, mb, nb, a, lda, t( 6 ), nb, work,lwork, info ) + end if + work( 1 ) = max( 1, nb*n ) + return + end subroutine stdlib_sgeqr + + !> SGETSLS: solves overdetermined or underdetermined real linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'T' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_sgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, tran + integer(ilp) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & + lw2, wsizeo, wsizem, info2 + real(sp) :: anrm, bignum, bnrm, smlnum, tq(5), workq(1) + ! Intrinsic Functions + intrinsic :: real,max,min,int + ! Executable Statements + ! test the input arguments. + info = 0 + maxmn = max( m, n ) + tran = stdlib_lsame( trans, 'T' ) + lquery = ( lwork==-1 .or. lwork==-2 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + call stdlib_sgeqr( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_sgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_sgeqr( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_sgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + else + call stdlib_sgelq( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_sgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_sgelq( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_sgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + end if + if( ( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_slaset( 'F', maxmn, nrhs, zero, zero, b, ldb ) + go to 50 + end if + brow = m + if ( tran ) then + brow = n + end if + bnrm = stdlib_slange( 'M', brow, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if ( m>=n ) then + ! compute qr factorization of a + call stdlib_sgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + if ( .not.tran ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_sgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& + 1 ), lw2,info ) + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_strtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = n + else + ! overdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_strtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = zero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_sgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_sgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + ! workspace at least m, optimally m*nb. + if( .not.tran ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_strtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = zero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_sgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_sgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_strtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_slascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_slascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_slascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_slascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( tszo + lwo,KIND=sp) + return + end subroutine stdlib_sgetsls + + !> SGETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a complex M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in SGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of SGEQRT for more details on the format. + + pure subroutine stdlib_sgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1 + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, & + num_all_row_blocks + ! Intrinsic Functions + intrinsic :: ceiling,max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m SLAED2: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny entry in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_slaed2( k, n, n1, d, q, ldq, indxq, rho, z, dlamda, w,q2, indx, indxc,& + indxp, coltyp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, k + integer(ilp), intent(in) :: ldq, n, n1 + real(sp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: coltyp(*), indx(*), indxc(*), indxp(*) + integer(ilp), intent(inout) :: indxq(*) + real(sp), intent(inout) :: d(*), q(ldq,*), z(*) + real(sp), intent(out) :: dlamda(*), q2(*), w(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: mone = -1.0_sp + + ! Local Arrays + integer(ilp) :: ctot(4), psm(4) + ! Local Scalars + integer(ilp) :: ct, i, imax, iq1, iq2, j, jmax, js, k2, n1p1, n2, nj, pj + real(sp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -2 + else if( ldqn1 .or. ( n / 2 )n )go to 100 + if( rho*abs( z( nj ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + coltyp( nj ) = 4 + indxp( k2 ) = nj + else + ! check if eigenvalues are close enough to allow deflation. + s = z( pj ) + c = z( nj ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_slapy2( c, s ) + t = d( nj ) - d( pj ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( nj ) = tau + z( pj ) = zero + if( coltyp( nj )/=coltyp( pj ) )coltyp( nj ) = 2 + coltyp( pj ) = 4 + call stdlib_srot( n, q( 1, pj ), 1, q( 1, nj ), 1, c, s ) + t = d( pj )*c**2 + d( nj )*s**2 + d( nj ) = d( pj )*s**2 + d( nj )*c**2 + d( pj ) = t + k2 = k2 - 1 + i = 1 + 90 continue + if( k2+i<=n ) then + if( d( pj ) SLAQR2: is identical to SLAQR3 except that it avoids + !> recursion by calling SLAHQR instead of SLAQR4. + !> Aggressive early deflation: + !> This subroutine accepts as input an upper Hessenberg matrix + !> H and performs an orthogonal similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an orthogonal similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + subroutine stdlib_slaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& + sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), z(ldz,*) + real(sp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(sp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & + tau, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & + ltop, lwk1, lwk2, lwkopt + logical(lk) :: bulge, sorted + ! Intrinsic Functions + intrinsic :: abs,int,max,min,real,sqrt + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_sgehrd ==== + call stdlib_sgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_sormhr ==== + call stdlib_sormhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = jw + max( lwk1, lwk2 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=sp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = one + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = zero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sr( kwtop ) = h( kwtop, kwtop ) + si( kwtop ) = zero + ns = 1 + nd = 0 + if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero + end if + work( 1 ) = one + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_slacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_scopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_slaset( 'A', jw, jw, zero, one, v, ldv ) + call stdlib_slahqr( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, jw, & + v, ldv, infqr ) + ! ==== stdlib_strexc needs a clean margin near the diagonal ==== + do j = 1, jw - 3 + t( j+2, j ) = zero + t( j+3, j ) = zero + end do + if( jw>2 )t( jw, jw-2 ) = zero + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + 20 continue + if( ilst<=ns ) then + if( ns==1 ) then + bulge = .false. + else + bulge = t( ns, ns-1 )/=zero + end if + ! ==== small spike tip test for deflation ==== + if( .not.bulge ) then + ! ==== real eigenvalue ==== + foo = abs( t( ns, ns ) ) + if( foo==zero )foo = abs( s ) + if( abs( s*v( 1, ns ) )<=max( smlnum, ulp*foo ) ) then + ! ==== deflatable ==== + ns = ns - 1 + else + ! ==== undeflatable. move it up out of the way. + ! . (stdlib_strexc can not fail in this case.) ==== + ifst = ns + call stdlib_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 1 + end if + else + ! ==== complex conjugate pair ==== + foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & + ) ) + if( foo==zero )foo = abs( s ) + if( max( abs( s*v( 1, ns ) ), abs( s*v( 1, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & + then + ! ==== deflatable ==== + ns = ns - 2 + else + ! ==== undeflatable. move them up out of the way. + ! . fortunately, stdlib_strexc does the right thing with + ! . ilst in case of a rare exchange failure. ==== + ifst = ns + call stdlib_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 2 + end if + end if + ! ==== end deflation detection loop ==== + go to 20 + end if + ! ==== return to hessenberg form ==== + if( ns==0 )s = zero + if( ns=evk ) then + i = k + else + sorted = .false. + ifst = i + ilst = k + call stdlib_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + if( info==0 ) then + i = ilst + else + i = k + end if + end if + if( i==kend ) then + k = i + 1 + else if( t( i+1, i )==zero ) then + k = i + 1 + else + k = i + 2 + end if + go to 40 + end if + go to 30 + 50 continue + end if + ! ==== restore shift/eigenvalue array from t ==== + i = jw + 60 continue + if( i>=infqr+1 ) then + if( i==infqr+1 ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else if( t( i, i-1 )==zero ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else + aa = t( i-1, i-1 ) + cc = t( i, i-1 ) + bb = t( i-1, i ) + dd = t( i, i ) + call stdlib_slanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& + 1 ),si( kwtop+i-1 ), cs, sn ) + i = i - 2 + end if + go to 60 + end if + if( ns1 .and. s/=zero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_scopy( ns, v, ldv, work, 1 ) + beta = work( 1 ) + call stdlib_slarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = one + call stdlib_slaset( 'L', jw-2, jw-2, zero, zero, t( 3, 1 ), ldt ) + call stdlib_slarf( 'L', ns, jw, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_slarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_slarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_sgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*v( 1, 1 ) + call stdlib_slacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_scopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=zero )call stdlib_sormhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_sgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & + zero, wv, ldwv ) + call stdlib_slacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_sgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & + zero, t, ldt ) + call stdlib_slacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_sgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & + zero, wv, ldwv ) + call stdlib_slacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = real( lwkopt,KIND=sp) + end subroutine stdlib_slaqr2 + + !> SLASD1: computes the SVD of an upper bidiagonal N-by-M matrix B, + !> where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. + !> A related subroutine SLASD7 handles the case in which the singular + !> values (and the singular vectors in factored form) are desired. + !> SLASD1 computes the SVD as follows: + !> ( D1(in) 0 0 0 ) + !> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) + !> ( 0 0 D2(in) 0 ) + !> = U(out) * ( D(out) 0) * VT(out) + !> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M + !> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + !> elsewhere; and the entry b is empty if SQRE = 0. + !> The left singular vectors of the original matrix are stored in U, and + !> the transpose of the right singular vectors are stored in VT, and the + !> singular values are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple singular values or when there are zeros in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine SLASD2. + !> The second stage consists of calculating the updated + !> singular values. This is done by finding the square roots of the + !> roots of the secular equation via the routine SLASD4 (as called + !> by SLASD3). This routine also calculates the singular vectors of + !> the current problem. + !> The final stage consists of computing the updated singular vectors + !> directly using the updated singular values. The singular vectors + !> for the current problem are multiplied with the singular vectors + !> from the overall problem. + + pure subroutine stdlib_slasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, & + work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, nl, nr, sqre + real(sp), intent(inout) :: alpha, beta + ! Array Arguments + integer(ilp), intent(inout) :: idxq(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, & + ldvt2, m, n, n1, n2 + real(sp) :: orgnrm + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + if( nl<1 ) then + info = -1 + else if( nr<1 ) then + info = -2 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLASD1', -info ) + return + end if + n = nl + nr + 1 + m = n + sqre + ! the following values are for bookkeeping purposes only. they are + ! integer pointers which indicate the portion of the workspace + ! used by a particular array in stdlib_slasd2 and stdlib_slasd3. + ldu2 = n + ldvt2 = m + iz = 1 + isigma = iz + m + iu2 = isigma + n + ivt2 = iu2 + ldu2*n + iq = ivt2 + ldvt2*m + idx = 1 + idxc = idx + n + coltyp = idxc + n + idxp = coltyp + n + ! scale. + orgnrm = max( abs( alpha ), abs( beta ) ) + d( nl+1 ) = zero + do i = 1, n + if( abs( d( i ) )>orgnrm ) then + orgnrm = abs( d( i ) ) + end if + end do + call stdlib_slascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + alpha = alpha / orgnrm + beta = beta / orgnrm + ! deflate singular values. + call stdlib_slasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(& + isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( & + idxc ), idxq, iwork( coltyp ), info ) + ! solve secular equation and update singular vectors. + ldq = k + call stdlib_slasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( & + iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),& + info ) + ! report the possible convergence failure. + if( info/=0 ) then + return + end if + ! unscale. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + ! prepare the idxq sorting permutation. + n1 = k + n2 = n - k + call stdlib_slamrg( n1, n2, d, 1, -1, idxq ) + return + end subroutine stdlib_slasd1 + + !> SLAED1: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles + !> the case in which eigenvalues only or eigenvalues and eigenvectors + !> of a full symmetric matrix (which was reduced to tridiagonal form) + !> are desired. + !> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) + !> where Z = Q**T*u, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine SLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine SLAED4 (as called by SLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_slaed1( n, d, q, ldq, indxq, rho, cutpnt, work, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, ldq, n + integer(ilp), intent(out) :: info + real(sp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: indxq(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), q(ldq,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: coltyp, cpp1, i, idlmda, indx, indxc, indxp, iq2, is, iw, iz, k, n1, & + n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( ldqcutpnt .or. ( n / 2 ) SLAED0: computes all eigenvalues and corresponding eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + + pure subroutine stdlib_slaed0( icompq, qsiz, n, d, e, q, ldq, qstore, ldqs,work, iwork, info & + ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldq, ldqs, n, qsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*), q(ldq,*) + real(sp), intent(out) :: qstore(ldqs,*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: curlvl, curprb, curr, i, igivcl, igivnm, igivpt, indxq, iperm, iprmpt, & + iq, iqptr, iwrem, j, k, lgn, matsiz, msd2, smlsiz, smm1, spm1, spm2, submat, subpbs, & + tlvls + real(sp) :: temp + ! Intrinsic Functions + intrinsic :: abs,int,log,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + if( icompq<0 .or. icompq>2 ) then + info = -1 + else if( ( icompq==1 ) .and. ( qsizsmlsiz ) then + do j = subpbs, 1, -1 + iwork( 2*j ) = ( iwork( j )+1 ) / 2 + iwork( 2*j-1 ) = iwork( j ) / 2 + end do + tlvls = tlvls + 1 + subpbs = 2*subpbs + go to 10 + end if + do j = 2, subpbs + iwork( j ) = iwork( j ) + iwork( j-1 ) + end do + ! divide the matrix into subpbs submatrices of size at most smlsiz+1 + ! using rank-1 modifications (cuts). + spm1 = subpbs - 1 + do i = 1, spm1 + submat = iwork( i ) + 1 + smm1 = submat - 1 + d( smm1 ) = d( smm1 ) - abs( e( smm1 ) ) + d( submat ) = d( submat ) - abs( e( smm1 ) ) + end do + indxq = 4*n + 3 + if( icompq/=2 ) then + ! set up workspaces for eigenvalues only/accumulate new vectors + ! routine + temp = log( real( n,KIND=sp) ) / log( two ) + lgn = int( temp,KIND=ilp) + if( 2**lgn 1 ) + curlvl = 1 + 80 continue + if( subpbs>1 ) then + spm2 = subpbs - 2 + loop_90: do i = 0, spm2, 2 + if( i==0 ) then + submat = 1 + matsiz = iwork( 2 ) + msd2 = iwork( 1 ) + curprb = 0 + else + submat = iwork( i ) + 1 + matsiz = iwork( i+2 ) - iwork( i ) + msd2 = matsiz / 2 + curprb = curprb + 1 + end if + ! merge lower order eigensystems (of size msd2 and matsiz - msd2) + ! into an eigensystem of size matsiz. + ! stdlib_slaed1 is used only for the full eigensystem of a tridiagonal + ! matrix. + ! stdlib_slaed7 handles the cases in which eigenvalues only or eigenvalues + ! and eigenvectors of a full symmetric matrix (which was reduced to + ! tridiagonal form) are desired. + if( icompq==2 ) then + call stdlib_slaed1( matsiz, d( submat ), q( submat, submat ),ldq, iwork( & + indxq+submat ),e( submat+msd2-1 ), msd2, work,iwork( subpbs+1 ), info ) + + else + call stdlib_slaed7( icompq, matsiz, qsiz, tlvls, curlvl, curprb,d( submat ), & + qstore( 1, submat ), ldqs,iwork( indxq+submat ), e( submat+msd2-1 ),msd2, & + work( iq ), iwork( iqptr ),iwork( iprmpt ), iwork( iperm ),iwork( igivpt ), & + iwork( igivcl ),work( igivnm ), work( iwrem ),iwork( subpbs+1 ), info ) + + end if + if( info/=0 )go to 130 + iwork( i / 2+1 ) = iwork( i+2 ) + end do loop_90 + subpbs = subpbs / 2 + curlvl = curlvl + 1 + go to 80 + end if + ! end while + ! re-merge the eigenvalues/vectors which were deflated at the final + ! merge step. + if( icompq==1 ) then + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + call stdlib_scopy( qsiz, qstore( 1, j ), 1, q( 1, i ), 1 ) + end do + call stdlib_scopy( n, work, 1, d, 1 ) + else if( icompq==2 ) then + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + call stdlib_scopy( n, q( 1, j ), 1, work( n*i+1 ), 1 ) + end do + call stdlib_scopy( n, work, 1, d, 1 ) + call stdlib_slacpy( 'A', n, n, work( n+1 ), n, q, ldq ) + else + do i = 1, n + j = iwork( indxq+i ) + work( i ) = d( j ) + end do + call stdlib_scopy( n, work, 1, d, 1 ) + end if + go to 140 + 130 continue + info = submat*( n+1 ) + submat + matsiz - 1 + 140 continue + return + end subroutine stdlib_slaed0 + + !> SSTEDC: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + !> The eigenvectors of a full or band real symmetric matrix can also be + !> found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this + !> matrix to tridiagonal form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See SLAED3 for details. + + pure subroutine stdlib_sstedc( compz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*), z(ldz,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: finish, i, icompz, ii, j, k, lgn, liwmin, lwmin, m, smlsiz, start, & + storez, strtrw + real(sp) :: eps, orgnrm, p, tiny + ! Intrinsic Functions + intrinsic :: abs,int,log,max,mod,real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or.( icompz>0 .and. ldztiny ) then + finish = finish + 1 + go to 20 + end if + end if + ! (sub) problem determined. compute its size and solve it. + m = finish - start + 1 + if( m==1 ) then + start = finish + 1 + go to 10 + end if + if( m>smlsiz ) then + ! scale. + orgnrm = stdlib_slanst( 'M', m, d( start ), e( start ) ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, m, 1, d( start ), m,info ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, m-1, 1, e( start ),m-1, info ) + + if( icompz==1 ) then + strtrw = 1 + else + strtrw = start + end if + call stdlib_slaed0( icompz, n, m, d( start ), e( start ),z( strtrw, start ), & + ldz, work( 1 ), n,work( storez ), iwork, info ) + if( info/=0 ) then + info = ( info / ( m+1 )+start-1 )*( n+1 ) +mod( info, ( m+1 ) ) + start - & + 1 + go to 50 + end if + ! scale back. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, m, 1, d( start ), m,info ) + else + if( icompz==1 ) then + ! since qr won't update a z matrix which is larger than + ! the length of d, we must solve the sub-problem in a + ! workspace and then multiply back into z. + call stdlib_ssteqr( 'I', m, d( start ), e( start ), work, m,work( m*m+1 ), & + info ) + call stdlib_slacpy( 'A', n, m, z( 1, start ), ldz,work( storez ), n ) + + call stdlib_sgemm( 'N', 'N', n, m, m, one,work( storez ), n, work, m, zero,& + z( 1, start ), ldz ) + else if( icompz==2 ) then + call stdlib_ssteqr( 'I', m, d( start ), e( start ),z( start, start ), ldz, & + work, info ) + else + call stdlib_ssterf( m, d( start ), e( start ), info ) + end if + if( info/=0 ) then + info = start*( n+1 ) + finish + go to 50 + end if + end if + start = finish + 1 + go to 10 + end if + ! endwhile + if( icompz==0 ) then + ! use quick sort + call stdlib_slasrt( 'I', n, d, info ) + else + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

SSTEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric tridiagonal matrix. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_sstevd( jobz, n, d, e, z, ldz, work, lwork, iwork,liwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iscale, liwmin, lwmin + real(sp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tnrm + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + liwmin = 1 + lwmin = 1 + if( n>1 .and. wantz ) then + lwmin = 1 + 4*n + n**2 + liwmin = 3 + 5*n + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_sscal( n, sigma, d, 1 ) + call stdlib_sscal( n-1, sigma, e( 1 ), 1 ) + end if + ! for eigenvalues only, call stdlib_ssterf. for eigenvalues and + ! eigenvectors, call stdlib_sstedc. + if( .not.wantz ) then + call stdlib_ssterf( n, d, e, info ) + else + call stdlib_sstedc( 'I', n, d, e, z, ldz, work, lwork, iwork, liwork,info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_sscal( n, one / sigma, d, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_sstevd + + !> SSYEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> real symmetric matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + !> Because of large use of BLAS of level 3, SSYEVD needs N**2 more + !> workspace than SSYEVX. + + subroutine stdlib_ssyevd( jobz, uplo, n, a, lda, w, work, lwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, inde, indtau, indwk2, indwrk, iscale, liopt, liwmin, llwork, & + llwrk2, lopt, lwmin + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_slascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_ssytrd to reduce symmetric matrix to tridiagonal form. + inde = 1 + indtau = inde + n + indwrk = indtau + n + llwork = lwork - indwrk + 1 + indwk2 = indwrk + n*n + llwrk2 = lwork - indwk2 + 1 + call stdlib_ssytrd( uplo, n, a, lda, w, work( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_sstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_sormtr to multiply it by the + ! householder transformations stored in a. + if( .not.wantz ) then + call stdlib_ssterf( n, w, work( inde ), info ) + else + call stdlib_sstedc( 'I', n, w, work( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, iwork, liwork, info ) + call stdlib_sormtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),work( indwrk ), n, & + work( indwk2 ), llwrk2, iinfo ) + call stdlib_slacpy( 'A', n, n, work( indwrk ), n, a, lda ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_sscal( n, one / sigma, w, 1 ) + work( 1 ) = lopt + iwork( 1 ) = liopt + return + end subroutine stdlib_ssyevd + + !> SSYGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_ssygvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: liopt, liwmin, lopt, lwmin + ! Intrinsic Functions + intrinsic :: max,real + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 6*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + 1 + end if + lopt = lwmin + liopt = liwmin + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda SSBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a real symmetric band matrix A. If eigenvectors are desired, it uses + !> a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_ssbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, iwork, liwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: ab(ldab,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, inde, indwk2, indwrk, iscale, liwmin, llwrk2, lwmin + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else + if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 5*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + end if + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_slascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_slascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_ssbtrd to reduce symmetric band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + indwk2 = indwrk + n*n + llwrk2 = lwork - indwk2 + 1 + call stdlib_ssbtrd( jobz, uplo, n, kd, ab, ldab, w, work( inde ), z, ldz,work( indwrk )& + , iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, call stdlib_sstedc. + if( .not.wantz ) then + call stdlib_ssterf( n, w, work( inde ), info ) + else + call stdlib_sstedc( 'I', n, w, work( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, iwork, liwork, info ) + call stdlib_sgemm( 'N', 'N', n, n, n, one, z, ldz, work( indwrk ), n,zero, work( & + indwk2 ), n ) + call stdlib_slacpy( 'A', n, n, work( indwk2 ), n, z, ldz ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_sscal( n, one / sigma, w, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_ssbevd + + !> SSBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite banded eigenproblem, of the + !> form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and + !> banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_ssbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwk2, indwrk, liwmin, llwrk2, lwmin + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + liwmin = 1 + lwmin = 1 + else if( wantz ) then + liwmin = 3 + 5*n + lwmin = 1 + 5*n + 2*n**2 + else + liwmin = 1 + lwmin = 2*n + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab SSPEVD: computes all the eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_sspevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,iwork, liwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: ap(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iinfo, inde, indtau, indwrk, iscale, liwmin, llwork, lwmin + real(sp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_sscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_ssptrd to reduce symmetric packed matrix to tridiagonal form. + inde = 1 + indtau = inde + n + call stdlib_ssptrd( uplo, n, ap, w, work( inde ), work( indtau ), iinfo ) + ! for eigenvalues only, call stdlib_ssterf. for eigenvectors, first call + ! stdlib_sstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_sopmtr to multiply it by the + ! householder transformations represented in ap. + if( .not.wantz ) then + call stdlib_ssterf( n, w, work( inde ), info ) + else + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_sstedc( 'I', n, w, work( inde ), z, ldz, work( indwrk ),llwork, iwork, & + liwork, info ) + call stdlib_sopmtr( 'L', uplo, 'N', n, n, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 )call stdlib_sscal( n, one / sigma, w, 1 ) + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_sspevd + + !> SSPGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a real generalized symmetric-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be symmetric, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_sspgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, liwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: ap(*), bp(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: j, liwmin, lwmin, neig + ! Intrinsic Functions + intrinsic :: max,real + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**t *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'T' + end if + do j = 1, neig + call stdlib_stpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**t *y + if( upper ) then + trans = 'T' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_stpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_sspgvd + + !> SBDSDC: computes the singular value decomposition (SVD) of a real + !> N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + !> using a divide and conquer method, where S is a diagonal matrix + !> with non-negative diagonal elements (the singular values of B), and + !> U and VT are orthogonal matrices of left and right singular vectors, + !> respectively. SBDSDC can be used to compute all singular values, + !> and optionally, singular vectors or singular vectors in compact form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See SLASD3 for details. + !> The code currently calls SLASDQ if singular values only are desired. + !> However, it can be slightly modified to compute singular values + !> using the divide and conquer method. + + pure subroutine stdlib_sbdsdc( uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq,work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, n + ! Array Arguments + integer(ilp), intent(out) :: iq(*), iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: q(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + ! changed dimension statement in comment describing e from (n) to + ! (n-1). sven, 17 feb 05. + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: difl, difr, givcol, givnum, givptr, i, ic, icompq, ierr, ii, is, iu, & + iuplo, ivt, j, k, kk, mlvl, nm1, nsize, perm, poles, qstart, smlsiz, smlszp, sqre, & + start, wstart, z + real(sp) :: cs, eps, orgnrm, p, r, sn + ! Intrinsic Functions + intrinsic :: real,abs,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + iuplo = 0 + if( stdlib_lsame( uplo, 'U' ) )iuplo = 1 + if( stdlib_lsame( uplo, 'L' ) )iuplo = 2 + if( stdlib_lsame( compq, 'N' ) ) then + icompq = 0 + else if( stdlib_lsame( compq, 'P' ) ) then + icompq = 1 + else if( stdlib_lsame( compq, 'I' ) ) then + icompq = 2 + else + icompq = -1 + end if + if( iuplo==0 ) then + info = -1 + else if( icompq<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ( ldu<1 ) .or. ( ( icompq==2 ) .and. ( ldu=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - start + 1 + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n). solve this 1-by-1 problem + ! first. + nsize = i - start + 1 + if( icompq==2 ) then + u( n, n ) = sign( one, d( n ) ) + vt( n, n ) = one + else if( icompq==1 ) then + q( n+( qstart-1 )*n ) = sign( one, d( n ) ) + q( n+( smlsiz+qstart-1 )*n ) = one + end if + d( n ) = abs( d( n ) ) + end if + if( icompq==2 ) then + call stdlib_slasd0( nsize, sqre, d( start ), e( start ),u( start, start ), & + ldu, vt( start, start ),ldvt, smlsiz, iwork, work( wstart ), info ) + else + call stdlib_slasda( icompq, smlsiz, nsize, sqre, d( start ),e( start ), q( & + start+( iu+qstart-2 )*n ), n,q( start+( ivt+qstart-2 )*n ),iq( start+k*n ), q(& + start+( difl+qstart-2 )*n ), q( start+( difr+qstart-2 )*n ),q( start+( z+& + qstart-2 )*n ),q( start+( poles+qstart-2 )*n ),iq( start+givptr*n ), iq( & + start+givcol*n ),n, iq( start+perm*n ),q( start+( givnum+qstart-2 )*n ),q( & + start+( ic+qstart-2 )*n ),q( start+( is+qstart-2 )*n ),work( wstart ), iwork,& + info ) + end if + if( info/=0 ) then + return + end if + start = i + 1 + end if + end do loop_30 + ! unscale + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, ierr ) + 40 continue + ! use selection sort to minimize swaps of singular vectors + do ii = 2, n + i = ii - 1 + kk = i + p = d( i ) + do j = ii, n + if( d( j )>p ) then + kk = j + p = d( j ) + end if + end do + if( kk/=i ) then + d( kk ) = d( i ) + d( i ) = p + if( icompq==1 ) then + iq( i ) = kk + else if( icompq==2 ) then + call stdlib_sswap( n, u( 1, i ), 1, u( 1, kk ), 1 ) + call stdlib_sswap( n, vt( i, 1 ), ldvt, vt( kk, 1 ), ldvt ) + end if + else if( icompq==1 ) then + iq( i ) = i + end if + end do + ! if icompq = 1, use iq(n,1) as the indicator for uplo + if( icompq==1 ) then + if( iuplo==1 ) then + iq( n ) = 1 + else + iq( n ) = 0 + end if + end if + ! if b is lower bidiagonal, update u by those givens rotations + ! which rotated b to be upper bidiagonal + if( ( iuplo==2 ) .and. ( icompq==2 ) )call stdlib_slasr( 'L', 'V', 'B', n, n, work( 1 )& + , work( n ), u, ldu ) + return + end subroutine stdlib_sbdsdc + + !> SBDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**T + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**T*VT instead of + !> P**T, for given real input matrices U and VT. When U and VT are the + !> orthogonal matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by SGEBRD, then + !> A = (U*Q) * S * (P**T*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**T*C + !> for a given real input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + + pure subroutine stdlib_sbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, work, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: hndrth = 0.01_sp + real(sp), parameter :: hndrd = 100.0_sp + real(sp), parameter :: meigth = -0.125_sp + integer(ilp), parameter :: maxitr = 6 + + + + + + + + + ! Local Scalars + logical(lk) :: lower, rotate + integer(ilp) :: i, idir, isub, iter, iterdivn, j, ll, lll, m, maxitdivn, nm1, nm12, & + nm13, oldll, oldm + real(sp) :: abse, abss, cosl, cosr, cs, eps, f, g, h, mu, oldcs, oldsn, r, shift, & + sigmn, sigmx, sinl, sinr, sll, smax, smin, sminl, sminoa, sn, thresh, tol, tolmul, & + unfl + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.lower ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ncvt<0 ) then + info = -3 + else if( nru<0 ) then + info = -4 + else if( ncc<0 ) then + info = -5 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + ! if no singular vectors desired, use qd algorithm + if( .not.rotate ) then + call stdlib_slasq1( n, d, e, work, info ) + ! if info equals 2, dqds didn't finish, try to finish + if( info /= 2 ) return + info = 0 + end if + nm1 = n - 1 + nm12 = nm1 + nm1 + nm13 = nm12 + nm1 + idir = 0 + ! get machine constants + eps = stdlib_slamch( 'EPSILON' ) + unfl = stdlib_slamch( 'SAFE MINIMUM' ) + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left + if( lower ) then + do i = 1, n - 1 + call stdlib_slartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + work( i ) = cs + work( nm1+i ) = sn + end do + ! update singular vectors if desired + if( nru>0 )call stdlib_slasr( 'R', 'V', 'F', nru, n, work( 1 ), work( n ), u,ldu ) + + if( ncc>0 )call stdlib_slasr( 'L', 'V', 'F', n, ncc, work( 1 ), work( n ), c,ldc ) + + end if + ! compute singular values to relative accuracy tol + ! (by setting tol to be negative, algorithm will compute + ! singular values to absolute accuracy abs(tol)*norm(input matrix)) + tolmul = max( ten, min( hndrd, eps**meigth ) ) + tol = tolmul*eps + ! compute approximate maximum, minimum singular values + smax = zero + do i = 1, n + smax = max( smax, abs( d( i ) ) ) + end do + do i = 1, n - 1 + smax = max( smax, abs( e( i ) ) ) + end do + sminl = zero + if( tol>=zero ) then + ! relative accuracy desired + sminoa = abs( d( 1 ) ) + if( sminoa==zero )go to 50 + mu = sminoa + do i = 2, n + mu = abs( d( i ) )*( mu / ( mu+abs( e( i-1 ) ) ) ) + sminoa = min( sminoa, mu ) + if( sminoa==zero )go to 50 + end do + 50 continue + sminoa = sminoa / sqrt( real( n,KIND=sp) ) + thresh = max( tol*sminoa, maxitr*(n*(n*unfl)) ) + else + ! absolute accuracy desired + thresh = max( abs( tol )*smax, maxitr*(n*(n*unfl)) ) + end if + ! prepare for main iteration loop for the singular values + ! (maxit is the maximum number of passes through the inner + ! loop permitted before nonconvergence signalled.) + maxitdivn = maxitr*n + iterdivn = 0 + iter = -1 + oldll = -1 + oldm = -1 + ! m points to last element of unconverged part of matrix + m = n + ! begin main iteration loop + 60 continue + ! check for convergence or exceeding iteration count + if( m<=1 )go to 160 + if( iter>=n ) then + iter = iter - n + iterdivn = iterdivn + 1 + if( iterdivn>=maxitdivn )go to 200 + end if + ! find diagonal block of matrix to work on + if( tol0 )call stdlib_srot( ncvt, vt( m-1, 1 ), ldvt, vt( m, 1 ), ldvt, cosr,sinr & + ) + if( nru>0 )call stdlib_srot( nru, u( 1, m-1 ), 1, u( 1, m ), 1, cosl, sinl ) + if( ncc>0 )call stdlib_srot( ncc, c( m-1, 1 ), ldc, c( m, 1 ), ldc, cosl,sinl ) + + m = m - 2 + go to 60 + end if + ! if working on new submatrix, choose shift direction + ! (from larger end diagonal element towards smaller) + if( ll>oldm .or. m=abs( d( m ) ) ) then + ! chase bulge from top (big end) to bottom (small end) + idir = 1 + else + ! chase bulge from bottom (big end) to top (small end) + idir = 2 + end if + end if + ! apply convergence tests + if( idir==1 ) then + ! run convergence test in forward direction + ! first apply standard test to bottom of matrix + if( abs( e( m-1 ) )<=abs( tol )*abs( d( m ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion forward + mu = abs( d( ll ) ) + sminl = mu + do lll = ll, m - 1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll+1 ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + else + ! run convergence test in backward direction + ! first apply standard test to top of matrix + if( abs( e( ll ) )<=abs( tol )*abs( d( ll ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion backward + mu = abs( d( m ) ) + sminl = mu + do lll = m - 1, ll, -1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + end if + oldll = ll + oldm = m + ! compute shift. first, test if shifting would ruin relative + ! accuracy, and if so set the shift to zero. + if( tol>=zero .and. n*tol*( sminl / smax )<=max( eps, hndrth*tol ) ) then + ! use a zero shift to avoid loss of relative accuracy + shift = zero + else + ! compute the shift from 2-by-2 block at end of matrix + if( idir==1 ) then + sll = abs( d( ll ) ) + call stdlib_slas2( d( m-1 ), e( m-1 ), d( m ), shift, r ) + else + sll = abs( d( m ) ) + call stdlib_slas2( d( ll ), e( ll ), d( ll+1 ), shift, r ) + end if + ! test if shift negligible, and if so set to zero + if( sll>zero ) then + if( ( shift / sll )**2ll )e( i-1 ) = oldsn*r + call stdlib_slartg( oldcs*r, d( i+1 )*sn, oldcs, oldsn, d( i ) ) + work( i-ll+1 ) = cs + work( i-ll+1+nm1 ) = sn + work( i-ll+1+nm12 ) = oldcs + work( i-ll+1+nm13 ) = oldsn + end do + h = d( m )*cs + d( m ) = h*oldcs + e( m-1 ) = h*oldsn + ! update singular vectors + if( ncvt>0 )call stdlib_slasr( 'L', 'V', 'F', m-ll+1, ncvt, work( 1 ),work( n ), & + vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_slasr( 'R', 'V', 'F', nru, m-ll+1, work( nm12+1 ),work( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_slasr( 'L', 'V', 'F', m-ll+1, ncc, work( nm12+1 ),work( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + cs = one + oldcs = one + do i = m, ll + 1, -1 + call stdlib_slartg( d( i )*cs, e( i-1 ), cs, sn, r ) + if( i0 )call stdlib_slasr( 'L', 'V', 'B', m-ll+1, ncvt, work( nm12+1 ),work( & + nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_slasr( 'R', 'V', 'B', nru, m-ll+1, work( 1 ),work( n ), u(& + 1, ll ), ldu ) + if( ncc>0 )call stdlib_slasr( 'L', 'V', 'B', m-ll+1, ncc, work( 1 ),work( n ), c(& + ll, 1 ), ldc ) + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + end if + else + ! use nonzero shift + if( idir==1 ) then + ! chase bulge from top to bottom + ! save cosines and sines for later singular vector updates + f = ( abs( d( ll ) )-shift )*( sign( one, d( ll ) )+shift / d( ll ) ) + g = e( ll ) + do i = ll, m - 1 + call stdlib_slartg( f, g, cosr, sinr, r ) + if( i>ll )e( i-1 ) = r + f = cosr*d( i ) + sinr*e( i ) + e( i ) = cosr*e( i ) - sinr*d( i ) + g = sinr*d( i+1 ) + d( i+1 ) = cosr*d( i+1 ) + call stdlib_slartg( f, g, cosl, sinl, r ) + d( i ) = r + f = cosl*e( i ) + sinl*d( i+1 ) + d( i+1 ) = cosl*d( i+1 ) - sinl*e( i ) + if( i0 )call stdlib_slasr( 'L', 'V', 'F', m-ll+1, ncvt, work( 1 ),work( n ), & + vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_slasr( 'R', 'V', 'F', nru, m-ll+1, work( nm12+1 ),work( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_slasr( 'L', 'V', 'F', m-ll+1, ncc, work( nm12+1 ),work( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + f = ( abs( d( m ) )-shift )*( sign( one, d( m ) )+shift /d( m ) ) + g = e( m-1 ) + do i = m, ll + 1, -1 + call stdlib_slartg( f, g, cosr, sinr, r ) + if( ill+1 ) then + g = sinl*e( i-2 ) + e( i-2 ) = cosl*e( i-2 ) + end if + work( i-ll ) = cosr + work( i-ll+nm1 ) = -sinr + work( i-ll+nm12 ) = cosl + work( i-ll+nm13 ) = -sinl + end do + e( ll ) = f + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + ! update singular vectors if desired + if( ncvt>0 )call stdlib_slasr( 'L', 'V', 'B', m-ll+1, ncvt, work( nm12+1 ),work( & + nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_slasr( 'R', 'V', 'B', nru, m-ll+1, work( 1 ),work( n ), u(& + 1, ll ), ldu ) + if( ncc>0 )call stdlib_slasr( 'L', 'V', 'B', m-ll+1, ncc, work( 1 ),work( n ), c(& + ll, 1 ), ldc ) + end if + end if + ! qr iteration finished, go back and check convergence + go to 60 + ! all singular values converged, so make them positive + 160 continue + do i = 1, n + if( d( i )0 )call stdlib_sscal( ncvt, negone, vt( i, 1 ), ldvt ) + end if + end do + ! sort the singular values into decreasing order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n - 1 + ! scan for smallest d(i) + isub = 1 + smin = d( 1 ) + do j = 2, n + 1 - i + if( d( j )<=smin ) then + isub = j + smin = d( j ) + end if + end do + if( isub/=n+1-i ) then + ! swap singular values and vectors + d( isub ) = d( n+1-i ) + d( n+1-i ) = smin + if( ncvt>0 )call stdlib_sswap( ncvt, vt( isub, 1 ), ldvt, vt( n+1-i, 1 ),ldvt ) + + if( nru>0 )call stdlib_sswap( nru, u( 1, isub ), 1, u( 1, n+1-i ), 1 ) + if( ncc>0 )call stdlib_sswap( ncc, c( isub, 1 ), ldc, c( n+1-i, 1 ), ldc ) + + end if + end do + go to 220 + ! maximum number of iterations exceeded, failure to converge + 200 continue + info = 0 + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + 220 continue + return + end subroutine stdlib_sbdsqr + + !> SGEES: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues, the real Schur form T, and, optionally, the matrix of + !> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> real Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A matrix is in real Schur form if it is upper quasi-triangular with + !> 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the + !> form + !> [ a b ] + !> [ c a ] + !> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). + + subroutine stdlib_sgees( jobvs, sort, select, n, a, lda, sdim, wr, wi,vs, ldvs, work, lwork, & + bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: vs(ldvs,*), wi(*), work(*), wr(*) + ! Function Arguments + procedure(stdlib_select_s) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, lastsl, lquery, lst2sl, scalea, wantst, wantvs + integer(ilp) :: hswork, i, i1, i2, ibal, icond, ierr, ieval, ihi, ilo, inxt, ip, itau, & + iwrk, maxwrk, minwrk + real(sp) :: anrm, bignum, cscale, eps, s, sep, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_slascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (workspace: need n) + ibal = 1 + call stdlib_sgebal( 'P', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (workspace: need 3*n, prefer 2*n+n*nb) + itau = n + ibal + iwrk = n + itau + call stdlib_sgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_slacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate orthogonal matrix in vs + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_sorghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'S', jobvs, n, ilo, ihi, a, lda, wr, wi, vs, ldvs,work( iwrk ), & + lwork-iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea ) then + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n, 1, wr, n, ierr ) + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n, 1, wi, n, ierr ) + end if + do i = 1, n + bwork( i ) = select( wr( i ), wi( i ) ) + end do + ! reorder eigenvalues and transform schur vectors + ! (workspace: none needed) + call stdlib_strsen( 'N', jobvs, bwork, n, a, lda, vs, ldvs, wr, wi,sdim, s, sep, & + work( iwrk ), lwork-iwrk+1, idum, 1,icond ) + if( icond>0 )info = n + icond + end if + if( wantvs ) then + ! undo balancing + ! (workspace: need n) + call stdlib_sgebak( 'P', 'R', n, ilo, ihi, work( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_slascl( 'H', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_scopy( n, a, lda+1, wr, 1 ) + if( cscale==smlnum ) then + ! if scaling back towards underflow, adjust wi if an + ! offdiagonal element of a 2-by-2 block in the schur form + ! underflows. + if( ieval>0 ) then + i1 = ieval + 1 + i2 = ihi - 1 + call stdlib_slascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi,max( ilo-1, 1 ), & + ierr ) + else if( wantst ) then + i1 = 1 + i2 = n - 1 + else + i1 = ilo + i2 = ihi - 1 + end if + inxt = i1 - 1 + loop_20: do i = i1, i2 + if( i1 )call stdlib_sswap( i-1, a( 1, i ), 1, a( 1, i+1 ), 1 ) + if( n>i+1 )call stdlib_sswap( n-i-1, a( i, i+2 ), lda,a( i+1, i+2 ), & + lda ) + if( wantvs ) then + call stdlib_sswap( n, vs( 1, i ), 1, vs( 1, i+1 ), 1 ) + end if + a( i, i+1 ) = a( i+1, i ) + a( i+1, i ) = zero + end if + inxt = i + 2 + end if + end do loop_20 + end if + ! undo scaling for the imaginary part of the eigenvalues + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n-ieval, 1,wi( ieval+1 ), max( n-ieval,& + 1 ), ierr ) + end if + if( wantst .and. info==0 ) then + ! check if reordering successful + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = select( wr( i ), wi( i ) ) + if( wi( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_sgees + + !> SGEESX: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues, the real Schur form T, and, optionally, the matrix of + !> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> real Schur form so that selected eigenvalues are at the top left; + !> computes a reciprocal condition number for the average of the + !> selected eigenvalues (RCONDE); and computes a reciprocal condition + !> number for the right invariant subspace corresponding to the + !> selected eigenvalues (RCONDV). The leading columns of Z form an + !> orthonormal basis for this invariant subspace. + !> For further explanation of the reciprocal condition numbers RCONDE + !> and RCONDV, see Section 4.10_sp of the LAPACK Users' Guide (where + !> these quantities are called s and sep respectively). + !> A real matrix is in real Schur form if it is upper quasi-triangular + !> with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in + !> the form + !> [ a b ] + !> [ c a ] + !> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). + + subroutine stdlib_sgeesx( jobvs, sort, select, sense, n, a, lda, sdim,wr, wi, vs, ldvs, & + rconde, rcondv, work, lwork,iwork, liwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, liwork, lwork, n + real(sp), intent(out) :: rconde, rcondv + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: vs(ldvs,*), wi(*), work(*), wr(*) + ! Function Arguments + procedure(stdlib_select_s) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, lastsl, lquery, lst2sl, scalea, wantsb, wantse, wantsn, wantst, & + wantsv, wantvs + integer(ilp) :: hswork, i, i1, i2, ibal, icond, ierr, ieval, ihi, ilo, inxt, ip, itau, & + iwrk, lwrk, liwrk, maxwrk, minwrk + real(sp) :: anrm, bignum, cscale, eps, smlnum + ! Local Arrays + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_slascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (rworkspace: need n) + ibal = 1 + call stdlib_sgebal( 'P', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (rworkspace: need 3*n, prefer 2*n+n*nb) + itau = n + ibal + iwrk = n + itau + call stdlib_sgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_slacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate orthogonal matrix in vs + ! (rworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_sorghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (rworkspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'S', jobvs, n, ilo, ihi, a, lda, wr, wi, vs, ldvs,work( iwrk ), & + lwork-iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea ) then + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n, 1, wr, n, ierr ) + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n, 1, wi, n, ierr ) + end if + do i = 1, n + bwork( i ) = select( wr( i ), wi( i ) ) + end do + ! reorder eigenvalues, transform schur vectors, and compute + ! reciprocal condition numbers + ! (rworkspace: if sense is not 'n', need n+2*sdim*(n-sdim) + ! otherwise, need n ) + ! (iworkspace: if sense is 'v' or 'b', need sdim*(n-sdim) + ! otherwise, need 0 ) + call stdlib_strsen( sense, jobvs, bwork, n, a, lda, vs, ldvs, wr, wi,sdim, rconde, & + rcondv, work( iwrk ), lwork-iwrk+1,iwork, liwork, icond ) + if( .not.wantsn )maxwrk = max( maxwrk, n+2*sdim*( n-sdim ) ) + if( icond==-15 ) then + ! not enough real workspace + info = -16 + else if( icond==-17 ) then + ! not enough integer workspace + info = -18 + else if( icond>0 ) then + ! stdlib_strsen failed to reorder or to restore standard schur form + info = icond + n + end if + end if + if( wantvs ) then + ! undo balancing + ! (rworkspace: need n) + call stdlib_sgebak( 'P', 'R', n, ilo, ihi, work( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_slascl( 'H', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_scopy( n, a, lda+1, wr, 1 ) + if( ( wantsv .or. wantsb ) .and. info==0 ) then + dum( 1 ) = rcondv + call stdlib_slascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + rcondv = dum( 1 ) + end if + if( cscale==smlnum ) then + ! if scaling back towards underflow, adjust wi if an + ! offdiagonal element of a 2-by-2 block in the schur form + ! underflows. + if( ieval>0 ) then + i1 = ieval + 1 + i2 = ihi - 1 + call stdlib_slascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + else if( wantst ) then + i1 = 1 + i2 = n - 1 + else + i1 = ilo + i2 = ihi - 1 + end if + inxt = i1 - 1 + loop_20: do i = i1, i2 + if( i1 )call stdlib_sswap( i-1, a( 1, i ), 1, a( 1, i+1 ), 1 ) + if( n>i+1 )call stdlib_sswap( n-i-1, a( i, i+2 ), lda,a( i+1, i+2 ), & + lda ) + if( wantvs ) then + call stdlib_sswap( n, vs( 1, i ), 1, vs( 1, i+1 ), 1 ) + end if + a( i, i+1 ) = a( i+1, i ) + a( i+1, i ) = zero + end if + inxt = i + 2 + end if + end do loop_20 + end if + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n-ieval, 1,wi( ieval+1 ), max( n-ieval,& + 1 ), ierr ) + end if + if( wantst .and. info==0 ) then + ! check if reordering successful + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = select( wr( i ), wi( i ) ) + if( wi( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + work( 1 ) = maxwrk + if( wantsv .or. wantsb ) then + iwork( 1 ) = sdim*(n-sdim) + else + iwork( 1 ) = 1 + end if + return + end subroutine stdlib_sgeesx + + !> SGEEV: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate-transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + + subroutine stdlib_sgeev( jobvl, jobvr, n, a, lda, wr, wi, vl, ldvl, vr,ldvr, work, lwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: vl(ldvl,*), vr(ldvr,*), wi(*), work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr + character :: side + integer(ilp) :: hswork, i, ibal, ierr, ihi, ilo, itau, iwrk, k, lwork_trevc, maxwrk, & + minwrk, nout + real(sp) :: anrm, bignum, cs, cscale, eps, r, scl, smlnum, sn + ! Local Arrays + logical(lk) :: select(1) + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -1 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_slascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix + ! (workspace: need n) + ibal = 1 + call stdlib_sgebal( 'B', n, a, lda, ilo, ihi, work( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (workspace: need 3*n, prefer 2*n+n*nb) + itau = ibal + n + iwrk = itau + n + call stdlib_sgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_slacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate orthogonal matrix in vl + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_sorghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vl, ldvl,work( iwrk ), & + lwork-iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_slacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_slacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate orthogonal matrix in vr + ! (workspace: need 3*n-1, prefer 2*n+(n-1)*nb) + call stdlib_sorghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + else + ! compute eigenvalues only + ! (workspace: need n+1, prefer n+hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'E', 'N', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + end if + ! if info /= 0 from stdlib_shseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (workspace: need 4*n, prefer n + n + 2*n*nb) + call stdlib_strevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1, ierr ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + ! (workspace: need n) + call stdlib_sgebak( 'B', 'L', n, ilo, ihi, work( ibal ), n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_snrm2( n, vl( 1, i ), 1 ) + call stdlib_sscal( n, scl, vl( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_slapy2( stdlib_snrm2( n, vl( 1, i ), 1 ),stdlib_snrm2( n, & + vl( 1, i+1 ), 1 ) ) + call stdlib_sscal( n, scl, vl( 1, i ), 1 ) + call stdlib_sscal( n, scl, vl( 1, i+1 ), 1 ) + do k = 1, n + work( iwrk+k-1 ) = vl( k, i )**2 + vl( k, i+1 )**2 + end do + k = stdlib_isamax( n, work( iwrk ), 1 ) + call stdlib_slartg( vl( k, i ), vl( k, i+1 ), cs, sn, r ) + call stdlib_srot( n, vl( 1, i ), 1, vl( 1, i+1 ), 1, cs, sn ) + vl( k, i+1 ) = zero + end if + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + ! (workspace: need n) + call stdlib_sgebak( 'B', 'R', n, ilo, ihi, work( ibal ), n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_snrm2( n, vr( 1, i ), 1 ) + call stdlib_sscal( n, scl, vr( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_slapy2( stdlib_snrm2( n, vr( 1, i ), 1 ),stdlib_snrm2( n, & + vr( 1, i+1 ), 1 ) ) + call stdlib_sscal( n, scl, vr( 1, i ), 1 ) + call stdlib_sscal( n, scl, vr( 1, i+1 ), 1 ) + do k = 1, n + work( iwrk+k-1 ) = vr( k, i )**2 + vr( k, i+1 )**2 + end do + k = stdlib_isamax( n, work( iwrk ), 1 ) + call stdlib_slartg( vr( k, i ), vr( k, i+1 ), cs, sn, r ) + call stdlib_srot( n, vr( 1, i ), 1, vr( 1, i+1 ), 1, cs, sn ) + vr( k, i+1 ) = zero + end if + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n-info, 1, wr( info+1 ),max( n-info, 1 & + ), ierr ) + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n-info, 1, wi( info+1 ),max( n-info, 1 & + ), ierr ) + if( info>0 ) then + call stdlib_slascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wr, n,ierr ) + call stdlib_slascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_sgeev + + !> SGEEVX: computes for an N-by-N real nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues + !> (RCONDE), and reciprocal condition numbers for the right + !> eigenvectors (RCONDV). + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate-transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + !> Balancing a matrix means permuting the rows and columns to make it + !> more nearly upper triangular, and applying a diagonal similarity + !> transformation D * A * D**(-1), where D is a diagonal matrix, to + !> make its rows and columns closer in norm and the condition numbers + !> of its eigenvalues and eigenvectors smaller. The computed + !> reciprocal condition numbers correspond to the balanced matrix. + !> Permuting rows and columns will not change the condition numbers + !> (in exact arithmetic) but diagonal scaling will. For further + !> explanation of balancing, see section 4.10.2_sp of the LAPACK + !> Users' Guide. + + subroutine stdlib_sgeevx( balanc, jobvl, jobvr, sense, n, a, lda, wr, wi,vl, ldvl, vr, ldvr, & + ilo, ihi, scale, abnrm,rconde, rcondv, work, lwork, iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + real(sp), intent(out) :: abnrm + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: rconde(*), rcondv(*), scale(*), vl(ldvl,*), vr(ldvr,*), wi(*),& + work(*), wr(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr, wntsnb, wntsne, wntsnn, wntsnv + character :: job, side + integer(ilp) :: hswork, i, icond, ierr, itau, iwrk, k, lwork_trevc, maxwrk, minwrk, & + nout + real(sp) :: anrm, bignum, cs, cscale, eps, r, scl, smlnum, sn + ! Local Arrays + logical(lk) :: select(1) + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + wntsnn = stdlib_lsame( sense, 'N' ) + wntsne = stdlib_lsame( sense, 'E' ) + wntsnv = stdlib_lsame( sense, 'V' ) + wntsnb = stdlib_lsame( sense, 'B' ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'S' ).or. & + stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) )then + info = -1 + else if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -2 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -3 + else if( .not.( wntsnn .or. wntsne .or. wntsnb .or. wntsnv ) .or.( ( wntsne .or. & + wntsnb ) .and. .not.( wantvl .and.wantvr ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_slascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix and compute abnrm + call stdlib_sgebal( balanc, n, a, lda, ilo, ihi, scale, ierr ) + abnrm = stdlib_slange( '1', n, n, a, lda, dum ) + if( scalea ) then + dum( 1 ) = abnrm + call stdlib_slascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + abnrm = dum( 1 ) + end if + ! reduce to upper hessenberg form + ! (workspace: need 2*n, prefer n+n*nb) + itau = 1 + iwrk = itau + n + call stdlib_sgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_slacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate orthogonal matrix in vl + ! (workspace: need 2*n-1, prefer n+(n-1)*nb) + call stdlib_sorghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vl, ldvl,work( iwrk ), & + lwork-iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_slacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_slacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate orthogonal matrix in vr + ! (workspace: need 2*n-1, prefer n+(n-1)*nb) + call stdlib_sorghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( 'S', 'V', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + else + ! compute eigenvalues only + ! if condition numbers desired, compute schur form + if( wntsnn ) then + job = 'E' + else + job = 'S' + end if + ! (workspace: need 1, prefer hswork (see comments) ) + iwrk = itau + call stdlib_shseqr( job, 'N', n, ilo, ihi, a, lda, wr, wi, vr, ldvr,work( iwrk ), & + lwork-iwrk+1, info ) + end if + ! if info /= 0 from stdlib_shseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (workspace: need 3*n, prefer n + 2*n*nb) + call stdlib_strevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1, ierr ) + end if + ! compute condition numbers if desired + ! (workspace: need n*n+6*n unless sense = 'e') + if( .not.wntsnn ) then + call stdlib_strsna( sense, 'A', select, n, a, lda, vl, ldvl, vr, ldvr,rconde, & + rcondv, n, nout, work( iwrk ), n, iwork,icond ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + call stdlib_sgebak( balanc, 'L', n, ilo, ihi, scale, n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_snrm2( n, vl( 1, i ), 1 ) + call stdlib_sscal( n, scl, vl( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_slapy2( stdlib_snrm2( n, vl( 1, i ), 1 ),stdlib_snrm2( n, & + vl( 1, i+1 ), 1 ) ) + call stdlib_sscal( n, scl, vl( 1, i ), 1 ) + call stdlib_sscal( n, scl, vl( 1, i+1 ), 1 ) + do k = 1, n + work( k ) = vl( k, i )**2 + vl( k, i+1 )**2 + end do + k = stdlib_isamax( n, work, 1 ) + call stdlib_slartg( vl( k, i ), vl( k, i+1 ), cs, sn, r ) + call stdlib_srot( n, vl( 1, i ), 1, vl( 1, i+1 ), 1, cs, sn ) + vl( k, i+1 ) = zero + end if + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + call stdlib_sgebak( balanc, 'R', n, ilo, ihi, scale, n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + if( wi( i )==zero ) then + scl = one / stdlib_snrm2( n, vr( 1, i ), 1 ) + call stdlib_sscal( n, scl, vr( 1, i ), 1 ) + else if( wi( i )>zero ) then + scl = one / stdlib_slapy2( stdlib_snrm2( n, vr( 1, i ), 1 ),stdlib_snrm2( n, & + vr( 1, i+1 ), 1 ) ) + call stdlib_sscal( n, scl, vr( 1, i ), 1 ) + call stdlib_sscal( n, scl, vr( 1, i+1 ), 1 ) + do k = 1, n + work( k ) = vr( k, i )**2 + vr( k, i+1 )**2 + end do + k = stdlib_isamax( n, work, 1 ) + call stdlib_slartg( vr( k, i ), vr( k, i+1 ), cs, sn, r ) + call stdlib_srot( n, vr( 1, i ), 1, vr( 1, i+1 ), 1, cs, sn ) + vr( k, i+1 ) = zero + end if + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n-info, 1, wr( info+1 ),max( n-info, 1 & + ), ierr ) + call stdlib_slascl( 'G', 0, 0, cscale, anrm, n-info, 1, wi( info+1 ),max( n-info, 1 & + ), ierr ) + if( info==0 ) then + if( ( wntsnv .or. wntsnb ) .and. icond==0 )call stdlib_slascl( 'G', 0, 0, cscale,& + anrm, n, 1, rcondv, n,ierr ) + else + call stdlib_slascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wr, n,ierr ) + call stdlib_slascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, wi, n,ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_sgeevx + + !> SGEJSV: computes the singular value decomposition (SVD) of a real M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^t, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and + !> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + !> SGEJSV can sometimes compute tiny singular values and their singular vectors much + !> more accurately than other SVD routines, see below under Further Details. + + pure subroutine stdlib_sgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & + v, ldv,work, lwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldv, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: sva(n), u(ldu,*), v(ldv,*), work(lwork) + integer(ilp), intent(out) :: iwork(*) + character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv + ! =========================================================================== + + ! Local Scalars + real(sp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & + entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc + integer(ilp) :: ierr, n1, nr, numrank, p, q, warning + logical(lk) :: almort, defr, errest, goscal, jracc, kill, lsvec, l2aber, l2kill, & + l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp + ! Intrinsic Functions + intrinsic :: abs,log,max,min,float,nint,sign,sqrt + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + jracc = stdlib_lsame( jobv, 'J' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc + rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) + l2rank = stdlib_lsame( joba, 'R' ) + l2aber = stdlib_lsame( joba, 'A' ) + errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) + l2tran = stdlib_lsame( jobt, 'T' ) + l2kill = stdlib_lsame( jobr, 'R' ) + defr = stdlib_lsame( jobr, 'N' ) + l2pert = stdlib_lsame( jobp, 'P' ) + if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & + then + info = - 1 + else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.stdlib_lsame( jobu, 'W' )) )& + then + info = - 2 + else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.stdlib_lsame( jobv, 'W' )) & + .or. ( jracc .and. (.not.lsvec) ) ) then + info = - 3 + else if ( .not. ( l2kill .or. defr ) ) then + info = - 4 + else if ( .not. ( l2tran .or. stdlib_lsame( jobt, 'N' ) ) ) then + info = - 5 + else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = - 6 + else if ( m < 0 ) then + info = - 7 + else if ( ( n < 0 ) .or. ( n > m ) ) then + info = - 8 + else if ( lda < m ) then + info = - 10 + else if ( lsvec .and. ( ldu < m ) ) then + info = - 13 + else if ( rsvec .and. ( ldv < n ) ) then + info = - 15 + else if ( (.not.(lsvec .or. rsvec .or. errest).and.(lwork < max(7,4*n+1,2*m+n))) .or.(& + .not.(lsvec .or. rsvec) .and. errest .and.(lwork < max(7,4*n+n*n,2*m+n))) .or.(lsvec & + .and. (.not.rsvec) .and. (lwork < max(7,2*m+n,4*n+1))).or.(rsvec .and. (.not.lsvec) & + .and. (lwork < max(7,2*m+n,4*n+1))).or.(lsvec .and. rsvec .and. (.not.jracc) .and.(& + lwork big ) then + info = - 9 + call stdlib_xerbla( 'SGEJSV', -info ) + return + end if + aaqq = sqrt(aaqq) + if ( ( aapp < (big / aaqq) ) .and. noscal ) then + sva(p) = aapp * aaqq + else + noscal = .false. + sva(p) = aapp * ( aaqq * scalem ) + if ( goscal ) then + goscal = .false. + call stdlib_sscal( p-1, scalem, sva, 1 ) + end if + end if + end do + if ( noscal ) scalem = one + aapp = zero + aaqq = big + do p = 1, n + aapp = max( aapp, sva(p) ) + if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) + end do + ! quick return for zero m x n matrix + ! #:) + if ( aapp == zero ) then + if ( lsvec ) call stdlib_slaset( 'G', m, n1, zero, one, u, ldu ) + if ( rsvec ) call stdlib_slaset( 'G', n, n, zero, one, v, ldv ) + work(1) = one + work(2) = one + if ( errest ) work(3) = one + if ( lsvec .and. rsvec ) then + work(4) = one + work(5) = one + end if + if ( l2tran ) then + work(6) = zero + work(7) = zero + end if + iwork(1) = 0 + iwork(2) = 0 + iwork(3) = 0 + return + end if + ! issue warning if denormalized column norms detected. override the + ! high relative accuracy request. issue licence to kill columns + ! (set them to zero) whose norm is less than sigma_max / big (roughly). + ! #:( + warning = 0 + if ( aaqq <= sfmin ) then + l2rank = .true. + l2kill = .true. + warning = 1 + end if + ! quick return for one-column matrix + ! #:) + if ( n == 1 ) then + if ( lsvec ) then + call stdlib_slascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr ) + call stdlib_slacpy( 'A', m, 1, a, lda, u, ldu ) + ! computing all m left singular vectors of the m x 1 matrix + if ( n1 /= n ) then + call stdlib_sgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr ) + call stdlib_sorgqr( m,n1,1, u,ldu,work,work(n+1),lwork-n,ierr ) + call stdlib_scopy( m, a(1,1), 1, u(1,1), 1 ) + end if + end if + if ( rsvec ) then + v(1,1) = one + end if + if ( sva(1) < (big*scalem) ) then + sva(1) = sva(1) / scalem + scalem = one + end if + work(1) = one / scalem + work(2) = one + if ( sva(1) /= zero ) then + iwork(1) = 1 + if ( ( sva(1) / scalem) >= sfmin ) then + iwork(2) = 1 + else + iwork(2) = 0 + end if + else + iwork(1) = 0 + iwork(2) = 0 + end if + iwork(3) = 0 + if ( errest ) work(3) = one + if ( lsvec .and. rsvec ) then + work(4) = one + work(5) = one + end if + if ( l2tran ) then + work(6) = zero + work(7) = zero + end if + return + end if + transp = .false. + l2tran = l2tran .and. ( m == n ) + aatmax = -one + aatmin = big + if ( rowpiv .or. l2tran ) then + ! compute the row norms, needed to determine row pivoting sequence + ! (in the case of heavily row weighted a, row pivoting is strongly + ! advised) and to collect information needed to compare the + ! structures of a * a^t and a^t * a (in the case l2tran==.true.). + if ( l2tran ) then + do p = 1, m + xsc = zero + temp1 = one + call stdlib_slassq( n, a(p,1), lda, xsc, temp1 ) + ! stdlib_slassq gets both the ell_2 and the ell_infinity norm + ! in one pass through the vector + work(m+n+p) = xsc * scalem + work(n+p) = xsc * (scalem*sqrt(temp1)) + aatmax = max( aatmax, work(n+p) ) + if (work(n+p) /= zero) aatmin = min(aatmin,work(n+p)) + end do + else + do p = 1, m + work(m+n+p) = scalem*abs( a(p,stdlib_isamax(n,a(p,1),lda)) ) + aatmax = max( aatmax, work(m+n+p) ) + aatmin = min( aatmin, work(m+n+p) ) + end do + end if + end if + ! for square matrix a try to determine whether a^t would be better + ! input for the preconditioned jacobi svd, with faster convergence. + ! the decision is based on an o(n) function of the vector of column + ! and row norms of a, based on the shannon entropy. this should give + ! the right choice in most cases when the difference actually matters. + ! it may fail and pick the slower converging side. + entra = zero + entrat = zero + if ( l2tran ) then + xsc = zero + temp1 = one + call stdlib_slassq( n, sva, 1, xsc, temp1 ) + temp1 = one / temp1 + entra = zero + do p = 1, n + big1 = ( ( sva(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entra = entra + big1 * log(big1) + end do + entra = - entra / log(real(n,KIND=sp)) + ! now, sva().^2/trace(a^t * a) is a point in the probability simplex. + ! it is derived from the diagonal of a^t * a. do the same with the + ! diagonal of a * a^t, compute the entropy of the corresponding + ! probability distribution. note that a * a^t and a^t * a have the + ! same trace. + entrat = zero + do p = n+1, n+m + big1 = ( ( work(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entrat = entrat + big1 * log(big1) + end do + entrat = - entrat / log(real(m,KIND=sp)) + ! analyze the entropies and decide a or a^t. smaller entropy + ! usually means better input for the algorithm. + transp = ( entrat < entra ) + ! if a^t is better than a, transpose a. + if ( transp ) then + ! in an optimal implementation, this trivial transpose + ! should be replaced with faster transpose. + do p = 1, n - 1 + do q = p + 1, n + temp1 = a(q,p) + a(q,p) = a(p,q) + a(p,q) = temp1 + end do + end do + do p = 1, n + work(m+n+p) = sva(p) + sva(p) = work(n+p) + end do + temp1 = aapp + aapp = aatmax + aatmax = temp1 + temp1 = aaqq + aaqq = aatmin + aatmin = temp1 + kill = lsvec + lsvec = rsvec + rsvec = kill + if ( lsvec ) n1 = n + rowpiv = .true. + end if + end if + ! end if l2tran + ! scale the matrix so that its maximal singular value remains less + ! than sqrt(big) -- the matrix is scaled so that its maximal column + ! has euclidean norm equal to sqrt(big/n). the only reason to keep + ! sqrt(big) instead of big is the fact that stdlib_sgejsv uses lapack and + ! blas routines that, in some implementations, are not capable of + ! working in the full interval [sfmin,big] and that they may provoke + ! overflows in the intermediate results. if the singular values spread + ! from sfmin to big, then stdlib_sgesvj will compute them. so, in that case, + ! one should use stdlib_sgesvj instead of stdlib_sgejsv. + big1 = sqrt( big ) + temp1 = sqrt( big / real(n,KIND=sp) ) + call stdlib_slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr ) + if ( aaqq > (aapp * sfmin) ) then + aaqq = ( aaqq / aapp ) * temp1 + else + aaqq = ( aaqq * temp1 ) / aapp + end if + temp1 = temp1 * scalem + call stdlib_slascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr ) + ! to undo scaling at the end of this procedure, multiply the + ! computed singular values with uscal2 / uscal1. + uscal1 = temp1 + uscal2 = aapp + if ( l2kill ) then + ! l2kill enforces computation of nonzero singular values in + ! the restricted range of condition number of the initial a, + ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). + xsc = sqrt( sfmin ) + else + xsc = small + ! now, if the condition number of a is too big, + ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, + ! as a precaution measure, the full svd is computed using stdlib_sgesvj + ! with accumulated jacobi rotations. this provides numerically + ! more robust computation, at the cost of slightly increased run + ! time. depending on the concrete implementation of blas and lapack + ! (i.e. how they behave in presence of extreme ill-conditioning) the + ! implementor may decide to remove this switch. + if ( ( aaqq= (temp1*abs(a(1,1))) ) then + nr = nr + 1 + else + go to 3002 + end if + end do + 3002 continue + else if ( l2rank ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r1 is used as the criterion for + ! close-to-rank-deficient. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & + l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! the goal is high relative accuracy. however, if the matrix + ! has high scaled condition number the relative accuracy is in + ! general not feasible. later on, a condition number estimator + ! will be deployed to estimate the scaled condition number. + ! here we just remove the underflowed part of the triangular + ! factor. this prevents the situation in which the code is + ! working hard to get the accuracy not warranted by the data. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to & + 3302 + nr = nr + 1 + end do + 3302 continue + end if + almort = .false. + if ( nr == n ) then + maxprj = one + do p = 2, n + temp1 = abs(a(p,p)) / sva(iwork(p)) + maxprj = min( maxprj, temp1 ) + end do + if ( maxprj**2 >= one - real(n,KIND=sp)*epsln ) almort = .true. + end if + sconda = - one + condr1 = - one + condr2 = - one + if ( errest ) then + if ( n == nr ) then + if ( rsvec ) then + ! V Is Available As Workspace + call stdlib_slacpy( 'U', n, n, a, lda, v, ldv ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_sscal( p, one/temp1, v(1,p), 1 ) + end do + call stdlib_spocon( 'U', n, v, ldv, one, temp1,work(n+1), iwork(2*n+m+1), & + ierr ) + else if ( lsvec ) then + ! U Is Available As Workspace + call stdlib_slacpy( 'U', n, n, a, lda, u, ldu ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_sscal( p, one/temp1, u(1,p), 1 ) + end do + call stdlib_spocon( 'U', n, u, ldu, one, temp1,work(n+1), iwork(2*n+m+1), & + ierr ) + else + call stdlib_slacpy( 'U', n, n, a, lda, work(n+1), n ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_sscal( p, one/temp1, work(n+(p-1)*n+1), 1 ) + end do + ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths + call stdlib_spocon( 'U', n, work(n+1), n, one, temp1,work(n+n*n+1), iwork(2*n+& + m+1), ierr ) + end if + sconda = one / sqrt(temp1) + ! sconda is an estimate of sqrt(||(r^t * r)^(-1)||_1). + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + else + sconda = - one + end if + end if + l2pert = l2pert .and. ( abs( a(1,1)/a(nr,nr) ) > sqrt(big1) ) + ! if there is no violent scaling, artificial perturbation is not needed. + ! phase 3: + if ( .not. ( rsvec .or. lsvec ) ) then + ! singular values only + ! .. transpose a(1:nr,1:n) + do p = 1, min( n-1, nr ) + call stdlib_scopy( n-p, a(p,p+1), lda, a(p+1,p), 1 ) + end do + ! the following two do-loops introduce small relative perturbation + ! into the strict upper triangle of the lower triangular matrix. + ! small entries below the main diagonal are also changed. + ! this modification is useful if the computing environment does not + ! provide/allow flush to zero underflow, for it prevents many + ! annoying denormalized numbers in case of strongly scaled matrices. + ! the perturbation is structured so that it does not introduce any + ! new perturbation of the singular values, and it does not destroy + ! the job done by the preconditioner. + ! the licence for this perturbation is in the variable l2pert, which + ! should be .false. if flush to zero underflow is active. + if ( .not. almort ) then + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=sp) + do q = 1, nr + temp1 = xsc*abs(a(q,q)) + do p = 1, n + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & + temp1, a(p,q) ) + end do + end do + else + call stdlib_slaset( 'U', nr-1,nr-1, zero,zero, a(1,2),lda ) + end if + ! Second Preconditioning Using The Qr Factorization + call stdlib_sgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr ) + ! And Transpose Upper To Lower Triangular + do p = 1, nr - 1 + call stdlib_scopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 ) + end do + end if + ! row-cyclic jacobi svd algorithm with column pivoting + ! .. again some perturbation (a "background noise") is added + ! to drown denormals + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=sp) + do q = 1, nr + temp1 = xsc*abs(a(q,q)) + do p = 1, nr + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & + temp1, a(p,q) ) + end do + end do + else + call stdlib_slaset( 'U', nr-1, nr-1, zero, zero, a(1,2), lda ) + end if + ! .. and one-sided jacobi rotations are started on a lower + ! triangular matrix (plus perturbation which is ignored in + ! the part which destroys triangular form (confusing?!)) + call stdlib_sgesvj( 'L', 'NOU', 'NOV', nr, nr, a, lda, sva,n, v, ldv, work, & + lwork, info ) + scalem = work(1) + numrank = nint(work(2),KIND=ilp) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! -> singular values and right singular vectors <- + if ( almort ) then + ! In This Case Nr Equals N + do p = 1, nr + call stdlib_scopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + call stdlib_slaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_sgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,work, lwork, info ) + + scalem = work(1) + numrank = nint(work(2),KIND=ilp) + else + ! .. two more qr factorizations ( one qrf is not enough, two require + ! accumulated product of jacobi rotations, three are perfect ) + call stdlib_slaset( 'LOWER', nr-1, nr-1, zero, zero, a(2,1), lda ) + call stdlib_sgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr) + call stdlib_slacpy( 'LOWER', nr, nr, a, lda, v, ldv ) + call stdlib_slaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_sgeqrf( nr, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr + call stdlib_scopy( nr-p+1, v(p,p), ldv, v(p,p), 1 ) + end do + call stdlib_slaset( 'UPPER', nr-1, nr-1, zero, zero, v(1,2), ldv ) + call stdlib_sgesvj( 'LOWER', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, work(n+1), & + lwork-n, info ) + scalem = work(n+1) + numrank = nint(work(n+2),KIND=ilp) + if ( nr < n ) then + call stdlib_slaset( 'A',n-nr, nr, zero,zero, v(nr+1,1), ldv ) + call stdlib_slaset( 'A',nr, n-nr, zero,zero, v(1,nr+1), ldv ) + call stdlib_slaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv ) + end if + call stdlib_sormlq( 'LEFT', 'TRANSPOSE', n, n, nr, a, lda, work,v, ldv, work(n+1), & + lwork-n, ierr ) + end if + do p = 1, n + call stdlib_scopy( n, v(p,1), ldv, a(iwork(p),1), lda ) + end do + call stdlib_slacpy( 'ALL', n, n, a, lda, v, ldv ) + if ( transp ) then + call stdlib_slacpy( 'ALL', n, n, v, ldv, u, ldu ) + end if + else if ( lsvec .and. ( .not. rsvec ) ) then + ! Singular Values And Left Singular Vectors + ! Second Preconditioning Step To Avoid Need To Accumulate + ! jacobi rotations in the jacobi iterations. + do p = 1, nr + call stdlib_scopy( n-p+1, a(p,p), lda, u(p,p), 1 ) + end do + call stdlib_slaset( 'UPPER', nr-1, nr-1, zero, zero, u(1,2), ldu ) + call stdlib_sgeqrf( n, nr, u, ldu, work(n+1), work(2*n+1),lwork-2*n, ierr ) + do p = 1, nr - 1 + call stdlib_scopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 ) + end do + call stdlib_slaset( 'UPPER', nr-1, nr-1, zero, zero, u(1,2), ldu ) + call stdlib_sgesvj( 'LOWER', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, work(n+1), & + lwork-n, info ) + scalem = work(n+1) + numrank = nint(work(n+2),KIND=ilp) + if ( nr < m ) then + call stdlib_slaset( 'A', m-nr, nr,zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_slaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1), ldu ) + call stdlib_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + call stdlib_sormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + if ( rowpiv )call stdlib_slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + do p = 1, n1 + xsc = one / stdlib_snrm2( m, u(1,p), 1 ) + call stdlib_sscal( m, xsc, u(1,p), 1 ) + end do + if ( transp ) then + call stdlib_slacpy( 'ALL', n, n, u, ldu, v, ldv ) + end if + else + ! Full Svd + if ( .not. jracc ) then + if ( .not. almort ) then + ! second preconditioning step (qrf [with pivoting]) + ! note that the composition of transpose, qrf and transpose is + ! equivalent to an lqf call. since in many libraries the qrf + ! seems to be better optimized than the lqf, we do explicit + ! transpose and use the qrf. this is subject to changes in an + ! optimized implementation of stdlib_sgejsv. + do p = 1, nr + call stdlib_scopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + ! The Following Two Loops Perturb Small Entries To Avoid + ! denormals in the second qr factorization, where they are + ! as good as zeros. this is done to avoid painfully slow + ! computation with denormals. the relative size of the perturbation + ! is a parameter that can be changed by the implementer. + ! this perturbation device will be obsolete on machines with + ! properly implemented arithmetic. + ! to switch it off, set l2pert=.false. to remove it from the + ! code, remove the action under l2pert=.true., leave the else part. + ! the following two loops should be blocked and fused with the + ! transposed copy above. + if ( l2pert ) then + xsc = sqrt(small) + do q = 1, nr + temp1 = xsc*abs( v(q,q) ) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + sign( temp1, v(p,q) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_slaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv ) + end if + ! estimate the row scaled condition number of r1 + ! (if r1 is rectangular, n > nr, then the condition number + ! of the leading nr x nr submatrix is estimated.) + call stdlib_slacpy( 'L', nr, nr, v, ldv, work(2*n+1), nr ) + do p = 1, nr + temp1 = stdlib_snrm2(nr-p+1,work(2*n+(p-1)*nr+p),1) + call stdlib_sscal(nr-p+1,one/temp1,work(2*n+(p-1)*nr+p),1) + end do + call stdlib_spocon('LOWER',nr,work(2*n+1),nr,one,temp1,work(2*n+nr*nr+1),iwork(m+& + 2*n+1),ierr) + condr1 = one / sqrt(temp1) + ! Here Need A Second Opinion On The Condition Number + ! Then Assume Worst Case Scenario + ! r1 is ok for inverse <=> condr1 < real(n,KIND=sp) + ! more conservative <=> condr1 < sqrt(real(n,KIND=sp)) + cond_ok = sqrt(real(nr,KIND=sp)) + ! [tp] cond_ok is a tuning parameter. + if ( condr1 < cond_ok ) then + ! .. the second qrf without pivoting. note: in an optimized + ! implementation, this qrf should be implemented as the qrf + ! of a lower triangular matrix. + ! r1^t = q2 * r2 + call stdlib_sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + + if ( l2pert ) then + xsc = sqrt(small)/epsln + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) + end do + end do + end if + if ( nr /= n )call stdlib_slacpy( 'A', n, nr, v, ldv, work(2*n+1), n ) + ! .. save ... + ! This Transposed Copy Should Be Better Than Naive + do p = 1, nr - 1 + call stdlib_scopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 ) + end do + condr2 = condr1 + else + ! .. ill-conditioned case: second qrf with pivoting + ! note that windowed pivoting would be equally good + ! numerically, and more run-time efficient. so, in + ! an optimal implementation, the next call to stdlib_sgeqp3 + ! should be replaced with eg. call sgeqpx (acm toms #782) + ! with properly (carefully) chosen parameters. + ! r1^t * p2 = q2 * r2 + do p = 1, nr + iwork(n+p) = 0 + end do + call stdlib_sgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),work(2*n+1), lwork-& + 2*n, ierr ) + ! * call stdlib_sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1), + ! * $ lwork-2*n, ierr ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) + end do + end do + end if + call stdlib_slacpy( 'A', n, nr, v, ldv, work(2*n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) + v(p,q) = - sign( temp1, v(q,p) ) + end do + end do + else + call stdlib_slaset( 'L',nr-1,nr-1,zero,zero,v(2,1),ldv ) + end if + ! now, compute r2 = l3 * q3, the lq factorization. + call stdlib_sgelqf( nr, nr, v, ldv, work(2*n+n*nr+1),work(2*n+n*nr+nr+1), & + lwork-2*n-n*nr-nr, ierr ) + ! And Estimate The Condition Number + call stdlib_slacpy( 'L',nr,nr,v,ldv,work(2*n+n*nr+nr+1),nr ) + do p = 1, nr + temp1 = stdlib_snrm2( p, work(2*n+n*nr+nr+p), nr ) + call stdlib_sscal( p, one/temp1, work(2*n+n*nr+nr+p), nr ) + end do + call stdlib_spocon( 'L',nr,work(2*n+n*nr+nr+1),nr,one,temp1,work(2*n+n*nr+nr+& + nr*nr+1),iwork(m+2*n+1),ierr ) + condr2 = one / sqrt(temp1) + if ( condr2 >= cond_ok ) then + ! Save The Householder Vectors Used For Q3 + ! (this overwrites the copy of r2, as it will not be + ! needed in this branch, but it does not overwritte the + ! huseholder vectors of q2.). + call stdlib_slacpy( 'U', nr, nr, v, ldv, work(2*n+1), n ) + ! And The Rest Of The Information On Q3 Is In + ! work(2*n+n*nr+1:2*n+n*nr+n) + end if + end if + if ( l2pert ) then + xsc = sqrt(small) + do q = 2, nr + temp1 = xsc * v(q,q) + do p = 1, q - 1 + ! v(p,q) = - sign( temp1, v(q,p) ) + v(p,q) = - sign( temp1, v(p,q) ) + end do + end do + else + call stdlib_slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + end if + ! second preconditioning finished; continue with jacobi svd + ! the input matrix is lower trinagular. + ! recover the right singular vectors as solution of a well + ! conditioned triangular matrix equation. + if ( condr1 < cond_ok ) then + call stdlib_sgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,ldu,work(2*n+n*nr+nr+1),& + lwork-2*n-n*nr-nr,info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + do p = 1, nr + call stdlib_scopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_sscal( nr, sva(p), v(1,p), 1 ) + end do + ! Pick The Right Matrix Equation And Solve It + if ( nr == n ) then + ! :)) .. best case, r1 is inverted. the solution of this matrix + ! equation is q2*v2 = the product of the jacobi rotations + ! used in stdlib_sgesvj, premultiplied with the orthogonal matrix + ! from the second qr factorization. + call stdlib_strsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv ) + else + ! .. r1 is well conditioned, but non-square. transpose(r2) + ! is inverted to get the product of the jacobi rotations + ! used in stdlib_sgesvj. the q-factor from the second qr + ! factorization is then built in explicitly. + call stdlib_strsm('L','U','T','N',nr,nr,one,work(2*n+1),n,v,ldv) + if ( nr < n ) then + call stdlib_slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + end if + call stdlib_sormqr('L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) + end if + else if ( condr2 < cond_ok ) then + ! :) .. the input matrix a is very likely a relative of + ! the kahan matrix :) + ! the matrix r2 is inverted. the solution of the matrix equation + ! is q3^t*v3 = the product of the jacobi rotations (appplied to + ! the lower triangular l3 from the lq factorization of + ! r2=l3*q3), pre-multiplied with the transposed q3. + call stdlib_sgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, work(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr, info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + do p = 1, nr + call stdlib_scopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_sscal( nr, sva(p), u(1,p), 1 ) + end do + call stdlib_strsm('L','U','N','N',nr,nr,one,work(2*n+1),n,u,ldu) + ! Apply The Permutation From The Second Qr Factorization + do q = 1, nr + do p = 1, nr + work(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = work(2*n+n*nr+nr+p) + end do + end do + if ( nr < n ) then + call stdlib_slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + else + ! last line of defense. + ! #:( this is a rather pathological case: no scaled condition + ! improvement after two pivoted qr factorizations. other + ! possibility is that the rank revealing qr factorization + ! or the condition estimator has failed, or the cond_ok + ! is set very close to one (which is unnecessary). normally, + ! this branch should never be executed, but in rare cases of + ! failure of the rrqr or condition estimator, the last line of + ! defense ensures that stdlib_sgejsv completes the task. + ! compute the full svd of l3 using stdlib_sgesvj with explicit + ! accumulation of jacobi rotations. + call stdlib_sgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, work(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr, info ) + scalem = work(2*n+n*nr+nr+1) + numrank = nint(work(2*n+n*nr+nr+2),KIND=ilp) + if ( nr < n ) then + call stdlib_slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + call stdlib_sormlq( 'L', 'T', nr, nr, nr, work(2*n+1), n,work(2*n+n*nr+1), u, & + ldu, work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) + do q = 1, nr + do p = 1, nr + work(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = work(2*n+n*nr+nr+p) + end do + end do + end if + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=sp)) * epsln + do q = 1, n + do p = 1, n + work(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = work(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_snrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_sscal( n, xsc, & + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_slaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! matrix u. this applies to all cases. + call stdlib_sormqr( 'LEFT', 'NO_TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + ! the columns of u are normalized. the cost is o(m*n) flops. + temp1 = sqrt(real(m,KIND=sp)) * epsln + do p = 1, nr + xsc = one / stdlib_snrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_sscal( m, xsc, & + u(1,p), 1 ) + end do + ! if the initial qrf is computed with row pivoting, the left + ! singular vectors must be adjusted. + if ( rowpiv )call stdlib_slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + else + ! The Initial Matrix A Has Almost Orthogonal Columns And + ! the second qrf is not needed + call stdlib_slacpy( 'UPPER', n, n, a, lda, work(n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, n + temp1 = xsc * work( n + (p-1)*n + p ) + do q = 1, p - 1 + work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q)) + end do + end do + else + call stdlib_slaset( 'LOWER',n-1,n-1,zero,zero,work(n+2),n ) + end if + call stdlib_sgesvj( 'UPPER', 'U', 'N', n, n, work(n+1), n, sva,n, u, ldu, work(n+& + n*n+1), lwork-n-n*n, info ) + scalem = work(n+n*n+1) + numrank = nint(work(n+n*n+2),KIND=ilp) + do p = 1, n + call stdlib_scopy( n, work(n+(p-1)*n+1), 1, u(1,p), 1 ) + call stdlib_sscal( n, sva(p), work(n+(p-1)*n+1), 1 ) + end do + call stdlib_strsm( 'LEFT', 'UPPER', 'NOTRANS', 'NO UD', n, n,one, a, lda, work(n+& + 1), n ) + do p = 1, n + call stdlib_scopy( n, work(n+p), n, v(iwork(p),1), ldv ) + end do + temp1 = sqrt(real(n,KIND=sp))*epsln + do p = 1, n + xsc = one / stdlib_snrm2( n, v(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_sscal( n, xsc, & + v(1,p), 1 ) + end do + ! assemble the left singular vector matrix u (m x n). + if ( n < m ) then + call stdlib_slaset( 'A', m-n, n, zero, zero, u(n+1,1), ldu ) + if ( n < n1 ) then + call stdlib_slaset( 'A',n, n1-n, zero, zero, u(1,n+1),ldu ) + call stdlib_slaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu ) + end if + end if + call stdlib_sormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + temp1 = sqrt(real(m,KIND=sp))*epsln + do p = 1, n1 + xsc = one / stdlib_snrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_sscal( m, xsc, & + u(1,p), 1 ) + end do + if ( rowpiv )call stdlib_slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + end if + ! end of the >> almost orthogonal case << in the full svd + else + ! this branch deploys a preconditioned jacobi svd with explicitly + ! accumulated rotations. it is included as optional, mainly for + ! experimental purposes. it does perform well, and can also be used. + ! in this implementation, this branch will be automatically activated + ! if the condition number sigma_max(a) / sigma_min(a) is predicted + ! to be greater than the overflow threshold. this is because the + ! a posteriori computation of the singular vectors assumes robust + ! implementation of blas and some lapack procedures, capable of working + ! in presence of extreme values. since that is not always the case, ... + do p = 1, nr + call stdlib_scopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 1, nr + temp1 = xsc*abs( v(q,q) ) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = sign(& + temp1, v(p,q) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_slaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv ) + end if + call stdlib_sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),lwork-2*n, ierr ) + call stdlib_slacpy( 'L', n, nr, v, ldv, work(2*n+1), n ) + do p = 1, nr + call stdlib_scopy( nr-p+1, v(p,p), ldv, u(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 2, nr + do p = 1, q - 1 + temp1 = xsc * min(abs(u(p,p)),abs(u(q,q))) + u(p,q) = - sign( temp1, u(q,p) ) + end do + end do + else + call stdlib_slaset('U', nr-1, nr-1, zero, zero, u(1,2), ldu ) + end if + call stdlib_sgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, work(2*n+n*nr+1), & + lwork-2*n-n*nr, info ) + scalem = work(2*n+n*nr+1) + numrank = nint(work(2*n+n*nr+2),KIND=ilp) + if ( nr < n ) then + call stdlib_slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv ) + call stdlib_slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv ) + call stdlib_slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) + end if + call stdlib_sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),v,ldv,work(2*n+n*nr+nr+1)& + ,lwork-2*n-n*nr-nr,ierr ) + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=sp)) * epsln + do q = 1, n + do p = 1, n + work(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = work(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_snrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_sscal( n, xsc, & + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_slaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1),ldu ) + call stdlib_slaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu ) + end if + end if + call stdlib_sormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & + lwork-n, ierr ) + if ( rowpiv )call stdlib_slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 ) + end if + if ( transp ) then + ! .. swap u and v because the procedure worked on a^t + do p = 1, n + call stdlib_sswap( n, u(1,p), 1, v(1,p), 1 ) + end do + end if + end if + ! end of the full svd + ! undo scaling, if necessary (and possible) + if ( uscal2 <= (big/sva(1))*uscal1 ) then + call stdlib_slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr ) + uscal1 = one + uscal2 = one + end if + if ( nr < n ) then + do p = nr+1, n + sva(p) = zero + end do + end if + work(1) = uscal2 * scalem + work(2) = uscal1 + if ( errest ) work(3) = sconda + if ( lsvec .and. rsvec ) then + work(4) = condr1 + work(5) = condr2 + end if + if ( l2tran ) then + work(6) = entra + work(7) = entrat + end if + iwork(1) = nr + iwork(2) = numrank + iwork(3) = warning + return + end subroutine stdlib_sgejsv + + !> SGELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_sgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond,rank, work, lwork, iwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(sp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: s(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, & + maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd + real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum + ! Intrinsic Functions + intrinsic :: int,log,max,min,real + ! Executable Statements + ! test the input arguments. + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + smlsiz = stdlib_ilaenv( 9, 'SGELSD', ' ', 0, 0, 0, 0 ) + mnthr = stdlib_ilaenv( 6, 'SGELSD', ' ', m, n, nrhs, -1 ) + nlvl = max( int( log( real( minmn,KIND=sp) / real( smlsiz + 1,KIND=sp) ) /log( & + two ),KIND=ilp) + 1, 0 ) + liwork = 3*minmn*nlvl + 11*minmn + mm = m + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns. + mm = n + maxwrk = max( maxwrk, n + n*stdlib_ilaenv( 1, 'SGEQRF', ' ', m,n, -1, -1 ) ) + + maxwrk = max( maxwrk, n + nrhs*stdlib_ilaenv( 1, 'SORMQR', 'LT',m, nrhs, n, -& + 1 ) ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined. + maxwrk = max( maxwrk, 3*n + ( mm + n )*stdlib_ilaenv( 1,'SGEBRD', ' ', mm, n, & + -1, -1 ) ) + maxwrk = max( maxwrk, 3*n + nrhs*stdlib_ilaenv( 1, 'SORMBR','QLT', mm, nrhs, & + n, -1 ) ) + maxwrk = max( maxwrk, 3*n + ( n - 1 )*stdlib_ilaenv( 1,'SORMBR', 'PLN', n, & + nrhs, n, -1 ) ) + wlalsd = 9*n + 2*n*smlsiz + 8*n*nlvl + n*nrhs +( smlsiz + 1 )**2 + maxwrk = max( maxwrk, 3*n + wlalsd ) + minwrk = max( 3*n + mm, 3*n + nrhs, 3*n + wlalsd ) + end if + if( n>m ) then + wlalsd = 9*m + 2*m*smlsiz + 8*m*nlvl + m*nrhs +( smlsiz + 1 )**2 + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows. + maxwrk = m + m*stdlib_ilaenv( 1, 'SGELQF', ' ', m, n, -1,-1 ) + maxwrk = max( maxwrk, m*m + 4*m + 2*m*stdlib_ilaenv( 1,'SGEBRD', ' ', m, m,& + -1, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + nrhs*stdlib_ilaenv( 1,'SORMBR', 'QLT', m,& + nrhs, m, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + ( m - 1 )*stdlib_ilaenv( 1,'SORMBR', & + 'PLN', m, nrhs, m, -1 ) ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + nrhs*stdlib_ilaenv( 1, 'SORMLQ','LT', n, nrhs, m,& + -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + wlalsd ) + ! xxx: ensure the path 2a case below is triggered. the workspace + ! calculation should use queries for all routines eventually. + maxwrk = max( maxwrk,4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) + else + ! path 2 - remaining underdetermined cases. + maxwrk = 3*m + ( n + m )*stdlib_ilaenv( 1, 'SGEBRD', ' ', m,n, -1, -1 ) + + maxwrk = max( maxwrk, 3*m + nrhs*stdlib_ilaenv( 1, 'SORMBR','QLT', m, nrhs,& + n, -1 ) ) + maxwrk = max( maxwrk, 3*m + m*stdlib_ilaenv( 1, 'SORMBR','PLN', n, nrhs, m,& + -1 ) ) + maxwrk = max( maxwrk, 3*m + wlalsd ) + end if + minwrk = max( 3*m + nrhs, 3*m + m, 3*m + wlalsd ) + end if + end if + minwrk = min( minwrk, maxwrk ) + work( 1 ) = maxwrk + iwork( 1 ) = liwork + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + call stdlib_slaset( 'F', minmn, 1, zero, zero, s, 1 ) + rank = 0 + go to 10 + end if + ! scale b if max entry outside range [smlnum,bignum]. + bnrm = stdlib_slange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_slascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! if m < n make sure certain entries of b are zero. + if( m=n ) then + ! path 1 - overdetermined or exactly determined. + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns. + mm = n + itau = 1 + nwork = itau + n + ! compute a=q*r. + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + info ) + ! multiply b by transpose(q). + ! (workspace: need n+nrhs, prefer n+nrhs*nb) + call stdlib_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + nwork ), lwork-nwork+1, info ) + ! zero out below r. + if( n>1 ) then + call stdlib_slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + end if + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a. + ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) + call stdlib_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r. + ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) + call stdlib_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_slalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of r. + call stdlib_sormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m, wlalsd ) ) & + then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm. + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4*m+m*lda+& + wlalsd ) )ldwork = lda + itau = 1 + nwork = m + 1 + ! compute a=l*q. + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) + + il = nwork + ! copy l to work(il), zeroing out above its diagonal. + call stdlib_slacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) + ie = il + ldwork*m + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il). + ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) + call stdlib_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & + itaup ), work( nwork ),lwork-nwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l. + ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_slalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of l. + call stdlib_sormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! zero out below first m rows of b. + call stdlib_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1 ), ldb ) + nwork = itau + m + ! multiply transpose(q) by b. + ! (workspace: need m+nrhs, prefer m+nrhs*nb) + call stdlib_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& + , lwork-nwork+1, info ) + else + ! path 2 - remaining underdetermined cases. + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a. + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors. + ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) + call stdlib_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_slalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & + nwork ), iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of a. + call stdlib_sormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + end if + ! undo scaling. + if( iascl==1 ) then + call stdlib_slascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_slascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_slascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_slascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 10 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwork + return + end subroutine stdlib_sgelsd + + !> SGELSS: computes the minimum norm solution to a real linear least + !> squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + + subroutine stdlib_sgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(sp), intent(in) :: rcond + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: s(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & + ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr + integer(ilp) :: lwork_sgeqrf, lwork_sormqr, lwork_sgebrd, lwork_sormbr, lwork_sorgbr, & + lwork_sormlq + real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr + ! Local Arrays + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + mm = m + mnthr = stdlib_ilaenv( 6, 'SGELSS', ' ', m, n, nrhs, -1 ) + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns + ! compute space needed for stdlib_sgeqrf + call stdlib_sgeqrf( m, n, a, lda, dum(1), dum(1), -1, info ) + lwork_sgeqrf=dum(1) + ! compute space needed for stdlib_sormqr + call stdlib_sormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1), b,ldb, dum(1), -1, & + info ) + lwork_sormqr=dum(1) + mm = n + maxwrk = max( maxwrk, n + lwork_sgeqrf ) + maxwrk = max( maxwrk, n + lwork_sormqr ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + ! compute workspace needed for stdlib_sbdsqr + bdspac = max( 1, 5*n ) + ! compute space needed for stdlib_sgebrd + call stdlib_sgebrd( mm, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, info & + ) + lwork_sgebrd=dum(1) + ! compute space needed for stdlib_sormbr + call stdlib_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1),b, ldb, dum(1),& + -1, info ) + lwork_sormbr=dum(1) + ! compute space needed for stdlib_sorgbr + call stdlib_sorgbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, info ) + lwork_sorgbr=dum(1) + ! compute total workspace needed + maxwrk = max( maxwrk, 3*n + lwork_sgebrd ) + maxwrk = max( maxwrk, 3*n + lwork_sormbr ) + maxwrk = max( maxwrk, 3*n + lwork_sorgbr ) + maxwrk = max( maxwrk, bdspac ) + maxwrk = max( maxwrk, n*nrhs ) + minwrk = max( 3*n + mm, 3*n + nrhs, bdspac ) + maxwrk = max( minwrk, maxwrk ) + end if + if( n>m ) then + ! compute workspace needed for stdlib_sbdsqr + bdspac = max( 1, 5*m ) + minwrk = max( 3*m+nrhs, 3*m+n, bdspac ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows + ! compute space needed for stdlib_sgebrd + call stdlib_sgebrd( m, m, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, & + info ) + lwork_sgebrd=dum(1) + ! compute space needed for stdlib_sormbr + call stdlib_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_sormbr=dum(1) + ! compute space needed for stdlib_sorgbr + call stdlib_sorgbr( 'P', m, m, m, a, lda, dum(1),dum(1), -1, info ) + lwork_sorgbr=dum(1) + ! compute space needed for stdlib_sormlq + call stdlib_sormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1),b, ldb, dum(1), -& + 1, info ) + lwork_sormlq=dum(1) + ! compute total workspace needed + maxwrk = m + m*stdlib_ilaenv( 1, 'SGELQF', ' ', m, n, -1,-1 ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_sgebrd ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_sormbr ) + maxwrk = max( maxwrk, m*m + 4*m + lwork_sorgbr ) + maxwrk = max( maxwrk, m*m + m + bdspac ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + lwork_sormlq ) + else + ! path 2 - underdetermined + ! compute space needed for stdlib_sgebrd + call stdlib_sgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, & + info ) + lwork_sgebrd=dum(1) + ! compute space needed for stdlib_sormbr + call stdlib_sormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_sormbr=dum(1) + ! compute space needed for stdlib_sorgbr + call stdlib_sorgbr( 'P', m, n, m, a, lda, dum(1),dum(1), -1, info ) + lwork_sorgbr=dum(1) + maxwrk = 3*m + lwork_sgebrd + maxwrk = max( maxwrk, 3*m + lwork_sormbr ) + maxwrk = max( maxwrk, 3*m + lwork_sorgbr ) + maxwrk = max( maxwrk, bdspac ) + maxwrk = max( maxwrk, n*nrhs ) + end if + end if + maxwrk = max( minwrk, maxwrk ) + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) + call stdlib_slaset( 'F', minmn, 1, zero, zero, s, minmn ) + rank = 0 + go to 70 + end if + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', m, nrhs, b, ldb, work ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_slascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! overdetermined case + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + info ) + ! multiply b by transpose(q) + ! (workspace: need n+nrhs, prefer n+nrhs*nb) + call stdlib_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + iwork ), lwork-iwork+1, info ) + ! zero out below r + if( n>1 )call stdlib_slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) + call stdlib_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r + ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) + call stdlib_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in a + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + iwork = ie + n + ! perform bidiagonal qr iteration + ! multiply b by transpose of left singular vectors + ! compute right singular vectors in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, n, 0, nrhs, s, work( ie ), a, lda, dum,1, b, ldb, work( & + iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_srscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_slaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors + ! (workspace: need n, prefer n*nrhs) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_sgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) + + call stdlib_slacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_sgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1, i ),ldb, zero, work,& + n ) + call stdlib_slacpy( 'G', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_sgemv( 'T', n, n, one, a, lda, b, 1, zero, work, 1 ) + call stdlib_scopy( n, work, 1, b, 1 ) + end if + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & + lda + itau = 1 + iwork = m + 1 + ! compute a=l*q + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) + + il = iwork + ! copy l to work(il), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) + ie = il + ldwork*m + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(il) + ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) + call stdlib_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & + itaup ), work( iwork ),lwork-iwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l + ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( iwork ),lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in work(il) + ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & + lwork-iwork+1, info ) + iwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of l in work(il) and + ! multiplying b by transpose of left singular vectors + ! (workspace: need m*m+m+bdspac) + call stdlib_sbdsqr( 'U', m, m, 0, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& + ldb, work( iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_srscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_slaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + iwork = ie + ! multiply b by right singular vectors of l in work(il) + ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) + if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1 ) then + call stdlib_sgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & + work( iwork ), ldb ) + call stdlib_slacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = ( lwork-iwork+1 ) / m + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_sgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1, i ), ldb,& + zero, work( iwork ), m ) + call stdlib_slacpy( 'G', m, bl, work( iwork ), m, b( 1, i ),ldb ) + end do + else + call stdlib_sgemv( 'T', m, m, one, work( il ), ldwork, b( 1, 1 ),1, zero, work( & + iwork ), 1 ) + call stdlib_scopy( m, work( iwork ), 1, b( 1, 1 ), 1 ) + end if + ! zero out below first m rows of b + call stdlib_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1 ), ldb ) + iwork = itau + m + ! multiply transpose(q) by b + ! (workspace: need m+nrhs, prefer m+nrhs*nb) + call stdlib_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& + , lwork-iwork+1, info ) + else + ! path 2 - remaining underdetermined cases + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors + ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) + call stdlib_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + iwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of a in a and + ! multiplying b by transpose of left singular vectors + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'L', m, n, 0, nrhs, s, work( ie ), a, lda, dum,1, b, ldb, work( & + iwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_srscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_slaset( 'F', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors of a + ! (workspace: need n, prefer n*nrhs) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_sgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) + + call stdlib_slacpy( 'F', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_sgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1, i ),ldb, zero, work,& + n ) + call stdlib_slacpy( 'F', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_sgemv( 'T', m, n, one, a, lda, b, 1, zero, work, 1 ) + call stdlib_scopy( n, work, 1, b, 1 ) + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_slascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_slascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_slascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_slascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 70 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_sgelss + + !> SGESDD: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, optionally computing the left and right singular + !> vectors. If singular vectors are desired, it uses a + !> divide-and-conquer algorithm. + !> The SVD is written + !> A = U * SIGMA * transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + !> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**T, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_sgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs + integer(ilp) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & + ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl + integer(ilp) :: lwork_sgebrd_mn, lwork_sgebrd_mm, lwork_sgebrd_nn, lwork_sgelqf_mn, & + lwork_sgeqrf_mn, lwork_sorgbr_p_mm, lwork_sorgbr_q_nn, lwork_sorglq_mn, & + lwork_sorglq_nn, lwork_sorgqr_mm, lwork_sorgqr_mn, lwork_sormbr_prt_mm, & + lwork_sormbr_qln_mm, lwork_sormbr_prt_mn, lwork_sormbr_qln_mn, lwork_sormbr_prt_nn, & + lwork_sormbr_qln_nn + real(sp) :: anrm, bignum, eps, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: int,max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntqa = stdlib_lsame( jobz, 'A' ) + wntqs = stdlib_lsame( jobz, 'S' ) + wntqas = wntqa .or. wntqs + wntqo = stdlib_lsame( jobz, 'O' ) + wntqn = stdlib_lsame( jobz, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n .and. ldvt=n .and. minmn>0 ) then + ! compute space needed for stdlib_sbdsdc + if( wntqn ) then + ! stdlib_sbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_sp) + ! keep 7*n for backwards compatibility. + bdspac = 7*n + else + bdspac = 3*n*n + 4*n + end if + ! compute space preferred for each routine + call stdlib_sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_sgebrd_mn = int( dum(1),KIND=ilp) + call stdlib_sgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_sgebrd_nn = int( dum(1),KIND=ilp) + call stdlib_sgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_sgeqrf_mn = int( dum(1),KIND=ilp) + call stdlib_sorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,ierr ) + lwork_sorgbr_q_nn = int( dum(1),KIND=ilp) + call stdlib_sorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_sorgqr_mm = int( dum(1),KIND=ilp) + call stdlib_sorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr ) + lwork_sorgqr_mn = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'P', 'R', 'T', n, n, n, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_sormbr_prt_nn = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_sormbr_qln_nn = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_sormbr_qln_mn = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_sormbr_qln_mm = int( dum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + wrkbl = n + lwork_sgeqrf_mn + wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn ) + maxwrk = max( wrkbl, bdspac + n ) + minwrk = bdspac + n + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + wrkbl = n + lwork_sgeqrf_mn + wrkbl = max( wrkbl, n + lwork_sorgqr_mn ) + wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + 2*n*n + minwrk = bdspac + 2*n*n + 3*n + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + wrkbl = n + lwork_sgeqrf_mn + wrkbl = max( wrkbl, n + lwork_sorgqr_mn ) + wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + n*n + minwrk = bdspac + n*n + 3*n + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + wrkbl = n + lwork_sgeqrf_mn + wrkbl = max( wrkbl, n + lwork_sorgqr_mm ) + wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + n*n + minwrk = n*n + max( 3*n + bdspac, n + m ) + end if + else + ! path 5 (m >= n, but not much larger) + wrkbl = 3*n + lwork_sgebrd_mn + if( wntqn ) then + ! path 5n (m >= n, jobz='n') + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + else if( wntqo ) then + ! path 5o (m >= n, jobz='o') + wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn ) + wrkbl = max( wrkbl, 3*n + bdspac ) + maxwrk = wrkbl + m*n + minwrk = 3*n + max( m, n*n + bdspac ) + else if( wntqs ) then + ! path 5s (m >= n, jobz='s') + wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn ) + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + else if( wntqa ) then + ! path 5a (m >= n, jobz='a') + wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn ) + maxwrk = max( wrkbl, 3*n + bdspac ) + minwrk = 3*n + max( m, bdspac ) + end if + end if + else if( minmn>0 ) then + ! compute space needed for stdlib_sbdsdc + if( wntqn ) then + ! stdlib_sbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_sp) + ! keep 7*n for backwards compatibility. + bdspac = 7*m + else + bdspac = 3*m*m + 4*m + end if + ! compute space preferred for each routine + call stdlib_sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),dum(1), dum(1), -1, & + ierr ) + lwork_sgebrd_mn = int( dum(1),KIND=ilp) + call stdlib_sgebrd( m, m, a, m, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_sgebrd_mm = int( dum(1),KIND=ilp) + call stdlib_sgelqf( m, n, a, m, dum(1), dum(1), -1, ierr ) + lwork_sgelqf_mn = int( dum(1),KIND=ilp) + call stdlib_sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr ) + lwork_sorglq_nn = int( dum(1),KIND=ilp) + call stdlib_sorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr ) + lwork_sorglq_mn = int( dum(1),KIND=ilp) + call stdlib_sorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr ) + lwork_sorgbr_p_mm = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'P', 'R', 'T', m, m, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_sormbr_prt_mm = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'P', 'R', 'T', m, n, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_sormbr_prt_mn = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'P', 'R', 'T', n, n, m, dum(1), n,dum(1), dum(1), n, dum(1), & + -1, ierr ) + lwork_sormbr_prt_nn = int( dum(1),KIND=ilp) + call stdlib_sormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,dum(1), dum(1), m, dum(1), & + -1, ierr ) + lwork_sormbr_qln_mm = int( dum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + wrkbl = m + lwork_sgelqf_mn + wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm ) + maxwrk = max( wrkbl, bdspac + m ) + minwrk = bdspac + m + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + wrkbl = m + lwork_sgelqf_mn + wrkbl = max( wrkbl, m + lwork_sorglq_mn ) + wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + 2*m*m + minwrk = bdspac + 2*m*m + 3*m + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + wrkbl = m + lwork_sgelqf_mn + wrkbl = max( wrkbl, m + lwork_sorglq_mn ) + wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*m + minwrk = bdspac + m*m + 3*m + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + wrkbl = m + lwork_sgelqf_mn + wrkbl = max( wrkbl, m + lwork_sorglq_nn ) + wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*m + minwrk = m*m + max( 3*m + bdspac, m + n ) + end if + else + ! path 5t (n > m, but not much larger) + wrkbl = 3*m + lwork_sgebrd_mn + if( wntqn ) then + ! path 5tn (n > m, jobz='n') + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + else if( wntqo ) then + ! path 5to (n > m, jobz='o') + wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn ) + wrkbl = max( wrkbl, 3*m + bdspac ) + maxwrk = wrkbl + m*n + minwrk = 3*m + max( n, m*m + bdspac ) + else if( wntqs ) then + ! path 5ts (n > m, jobz='s') + wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn ) + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + else if( wntqa ) then + ! path 5ta (n > m, jobz='a') + wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm ) + wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_nn ) + maxwrk = max( wrkbl, 3*m + bdspac ) + minwrk = 3*m + max( n, bdspac ) + end if + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = stdlib_sroundup_lwork( maxwrk ) + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + n + ! compute a=q*r + ! workspace: need n [tau] + n [work] + ! workspace: prefer n [tau] + n*nb [work] + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! zero out below r + call stdlib_slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! workspace: need 3*n [e, tauq, taup] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + nwork = ie + n + ! perform bidiagonal svd, computing singular values only + ! workspace: need n [e] + bdspac + call stdlib_sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 2 (m >> n, jobz = 'o') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is ldwrkr by n + if( lwork >= lda*n + n*n + 3*n + bdspac ) then + ldwrkr = lda + else + ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n + end if + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),ldwrkr ) + ! generate q in a + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! work(iu) is n by n + iu = nwork + nwork = iu + n*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & + idum, work( nwork ), iwork,info ) + ! overwrite work(iu) by left singular vectors of r + ! and vt by right singular vectors of r + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iu ), n, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in work(ir) and copying to a + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] + do i = 1, m, ldwrkr + chunk = min( m - i + 1, ldwrkr ) + call stdlib_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( iu ), & + n, zero, work( ir ),ldwrkr ) + call stdlib_slacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is n by n + ldwrkr = n + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),ldwrkr ) + ! generate q in a + ! workspace: need n*n [r] + n [tau] + n [work] + ! workspace: prefer n*n [r] + n [tau] + n*nb [work] + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagoal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of r and vt + ! by right singular vectors of r + ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! workspace: need n*n [r] + call stdlib_slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) + call stdlib_sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& + ldu ) + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + itau = iu + ldwrku*n + nwork = itau + n + ! compute a=q*r, copying result to u + ! workspace: need n*n [u] + n [tau] + n [work] + ! workspace: prefer n*n [u] + n [tau] + n*nb [work] + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! workspace: need n*n [u] + n [tau] + m [work] + ! workspace: prefer n*n [u] + n [tau] + m*nb [work] + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ! produce r in a, zeroing out other entries + call stdlib_slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda ) + ie = itau + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] + call stdlib_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & + idum, work( nwork ), iwork,info ) + ! overwrite work(iu) by left singular vectors of r and vt + ! by right singular vectors of r + ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] + ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & + ldwrku,work( nwork ), lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! workspace: need n*n [u] + call stdlib_sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& + lda ) + ! copy left singular vectors of a from a to u + call stdlib_slacpy( 'F', m, n, a, lda, u, ldu ) + end if + else + ! m < mnthr + ! path 5 (m >= n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = ie + n + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! workspace: need 3*n [e, tauq, taup] + m [work] + ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] + call stdlib_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5n (m >= n, jobz='n') + ! perform bidiagonal svd, only computing singular values + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 5o (m >= n, jobz='o') + iu = nwork + if( lwork >= m*n + 3*n + bdspac ) then + ! work( iu ) is m by n + ldwrku = m + nwork = iu + ldwrku*n + call stdlib_slaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) + ! ir is unused; silence compile warnings + ir = -1 + else + ! work( iu ) is n by n + ldwrku = n + nwork = iu + ldwrku*n + ! work(ir) is ldwrkr by n + ir = nwork + ldwrkr = ( lwork - n*n - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in work(iu) and computing right + ! singular vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac + call stdlib_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & + dum, idum, work( nwork ),iwork, info ) + ! overwrite vt by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + if( lwork >= m*n + 3*n + bdspac ) then + ! path 5o-fast + ! overwrite work(iu) by left singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & + ), ldwrku,work( nwork ), lwork - nwork + 1, ierr ) + ! copy left singular vectors of a from work(iu) to a + call stdlib_slacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) + else + ! path 5o-slow + ! generate q in a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] + call stdlib_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & + lwork - nwork + 1, ierr ) + ! multiply q in a by left singular vectors of + ! bidiagonal matrix in work(iu), storing result in + ! work(ir) and copying to a + ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] + ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] + do i = 1, m, ldwrkr + chunk = min( m - i + 1, ldwrkr ) + call stdlib_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( iu )& + , ldwrku, zero,work( ir ), ldwrkr ) + call stdlib_slacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + end if + else if( wntqs ) then + ! path 5s (m >= n, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_slaset( 'F', m, n, zero, zero, u, ldu ) + call stdlib_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + n [work] + ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + else if( wntqa ) then + ! path 5a (m >= n, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*n [e, tauq, taup] + bdspac + call stdlib_slaset( 'F', m, m, zero, zero, u, ldu ) + call stdlib_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! set the right corner of u to identity matrix + if( m>n ) then + call stdlib_slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) + end if + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*n [e, tauq, taup] + m [work] + ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + m + ! compute a=l*q + ! workspace: need m [tau] + m [work] + ! workspace: prefer m [tau] + m*nb [work] + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! zero out above l + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! workspace: need 3*m [e, tauq, taup] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + nwork = ie + m + ! perform bidiagonal svd, computing singular values only + ! workspace: need m [e] + bdspac + call stdlib_sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ! work(il) is m by m; it is later resized to m by chunk for gemm + il = ivt + m*m + if( lwork >= m*n + m*m + 3*m + bdspac ) then + ldwrkl = m + chunk = n + else + ldwrkl = m + chunk = ( lwork - m*m ) / m + end if + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy l to work(il), zeroing about above it + call stdlib_slacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_slaset( 'U', m - 1, m - 1, zero, zero,work( il + ldwrkl ), ldwrkl & + ) + ! generate q in a + ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u, and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & + idum, work( nwork ),iwork, info ) + ! overwrite u by left singular vectors of l and work(ivt) + ! by right singular vectors of l + ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & + work( ivt ), m,work( nwork ), lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(ivt) by q + ! in a, storing result in work(il) and copying to a + ! workspace: need m*m [vt] + m*m [l] + ! workspace: prefer m*m [vt] + m*n [l] + ! at this point, l is resized as m by chunk. + do i = 1, n, chunk + blk = min( n - i + 1, chunk ) + call stdlib_sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1, i ), lda,& + zero, work( il ), ldwrkl ) + call stdlib_slacpy( 'F', m, blk, work( il ), ldwrkl,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + il = 1 + ! work(il) is m by m + ldwrkl = m + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! workspace: need m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [l] + m [tau] + m*nb [work] + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + ! copy l to work(il), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_slaset( 'U', m - 1, m - 1, zero, zero,work( il + ldwrkl ), ldwrkl & + ) + ! generate q in a + ! workspace: need m*m [l] + m [tau] + m [work] + ! workspace: prefer m*m [l] + m [tau] + m*nb [work] + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(iu). + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork - nwork + 1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of l and vt + ! by right singular vectors of l + ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & + vt, ldvt, work( nwork ),lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(il) by + ! q in a, storing result in vt + ! workspace: need m*m [l] + call stdlib_slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & + vt, ldvt ) + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ldwkvt = m + itau = ivt + ldwkvt*m + nwork = itau + m + ! compute a=l*q, copying result to vt + ! workspace: need m*m [vt] + m [tau] + m [work] + ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & + 1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! workspace: need m*m [vt] + m [tau] + n [work] + ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & + nwork + 1, ierr ) + ! produce l in a, zeroing out other entries + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = itau + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] + ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] + call stdlib_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & + dum, idum,work( nwork ), iwork, info ) + ! overwrite u by left singular vectors of l and work(ivt) + ! by right singular vectors of l + ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] + ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& + ldwkvt,work( nwork ), lwork - nwork + 1, ierr ) + ! multiply right singular vectors of l in work(ivt) by + ! q in vt, storing result in a + ! workspace: need m*m [vt] + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& + a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_slacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else + ! n < mnthr + ! path 5t (n > m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = ie + m + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! workspace: need 3*m [e, tauq, taup] + n [work] + ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] + call stdlib_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5tn (n > m, jobz='n') + ! perform bidiagonal svd, only computing singular values + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,dum, idum, & + work( nwork ), iwork, info ) + else if( wntqo ) then + ! path 5to (n > m, jobz='o') + ldwkvt = m + ivt = nwork + if( lwork >= m*n + 3*m + bdspac ) then + ! work( ivt ) is m by n + call stdlib_slaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) + nwork = ivt + ldwkvt*n + ! il is unused; silence compile warnings + il = -1 + else + ! work( ivt ) is m by m + nwork = ivt + ldwkvt*m + il = nwork + ! work(il) is m by chunk + chunk = ( lwork - m*m - 3*m ) / m + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in work(ivt) + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac + call stdlib_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & + dum, idum,work( nwork ), iwork, info ) + ! overwrite u by left singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + if( lwork >= m*n + 3*m + bdspac ) then + ! path 5to-fast + ! overwrite work(ivt) by left singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] + call stdlib_sormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & + ivt ), ldwkvt,work( nwork ), lwork - nwork + 1, ierr ) + ! copy right singular vectors of a from work(ivt) to a + call stdlib_slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) + else + ! path 5to-slow + ! generate p**t in a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] + call stdlib_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & + lwork - nwork + 1, ierr ) + ! multiply q in a by right singular vectors of + ! bidiagonal matrix in work(ivt), storing result in + ! work(il) and copying to a + ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] + ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] + do i = 1, n, chunk + blk = min( n - i + 1, chunk ) + call stdlib_sgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1, & + i ), lda, zero,work( il ), m ) + call stdlib_slacpy( 'F', m, blk, work( il ), m, a( 1, i ),lda ) + end do + end if + else if( wntqs ) then + ! path 5ts (n > m, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_slaset( 'F', m, n, zero, zero, vt, ldvt ) + call stdlib_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + m [work] + ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + else if( wntqa ) then + ! path 5ta (n > m, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in u and computing right singular + ! vectors of bidiagonal matrix in vt + ! workspace: need 3*m [e, tauq, taup] + bdspac + call stdlib_slaset( 'F', n, n, zero, zero, vt, ldvt ) + call stdlib_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & + work( nwork ), iwork,info ) + ! set the right corner of vt to identity matrix + if( n>m ) then + call stdlib_slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) + end if + ! overwrite u by left singular vectors of a and vt + ! by right singular vectors of a + ! workspace: need 3*m [e, tauq, taup] + n [work] + ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] + call stdlib_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork - nwork + 1, ierr ) + call stdlib_sormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork - nwork + 1, ierr ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( anrm SGESVD: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + !> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**T, not V. + + subroutine stdlib_sgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu, jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& + wntvs + integer(ilp) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & + iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & + wrkbl + integer(ilp) :: lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m, lwork_sgebrd, & + lwork_sorgbr_p, lwork_sorgbr_q, lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m + real(sp) :: anrm, bignum, eps, smlnum + ! Local Arrays + real(sp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntua = stdlib_lsame( jobu, 'A' ) + wntus = stdlib_lsame( jobu, 'S' ) + wntuas = wntua .or. wntus + wntuo = stdlib_lsame( jobu, 'O' ) + wntun = stdlib_lsame( jobu, 'N' ) + wntva = stdlib_lsame( jobvt, 'A' ) + wntvs = stdlib_lsame( jobvt, 'S' ) + wntvas = wntva .or. wntvs + wntvo = stdlib_lsame( jobvt, 'O' ) + wntvn = stdlib_lsame( jobvt, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then + info = -1 + else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda=n .and. minmn>0 ) then + ! compute space needed for stdlib_sbdsqr + mnthr = stdlib_ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 ) + bdspac = 5*n + ! compute space needed for stdlib_sgeqrf + call stdlib_sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_sgeqrf = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sorgqr + call stdlib_sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_sorgqr_n = int( dum(1),KIND=ilp) + call stdlib_sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_sorgqr_m = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sgebrd + call stdlib_sgebrd( n, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_sgebrd = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sorgbr p + call stdlib_sorgbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_p = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sorgbr q + call stdlib_sorgbr( 'Q', n, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_q = int( dum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + maxwrk = n + lwork_sgeqrf + maxwrk = max( maxwrk, 3*n+lwork_sgebrd ) + if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p ) + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 4*n, bdspac ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_n ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( n*n+wrkbl, n*n+m*n+n ) + minwrk = max( 3*n+m, bdspac ) + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or + ! 'a') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_n ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( n*n+wrkbl, n*n+m*n+n ) + minwrk = max( 3*n+m, bdspac ) + else if( wntus .and. wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_n ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n+m, bdspac ) + else if( wntus .and. wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_n ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*n*n + wrkbl + minwrk = max( 3*n+m, bdspac ) + else if( wntus .and. wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' or + ! 'a') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_n ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n+m, bdspac ) + else if( wntua .and. wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_m ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n+m, bdspac ) + else if( wntua .and. wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_m ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*n*n + wrkbl + minwrk = max( 3*n+m, bdspac ) + else if( wntua .and. wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' or + ! 'a') + wrkbl = n + lwork_sgeqrf + wrkbl = max( wrkbl, n+lwork_sorgqr_m ) + wrkbl = max( wrkbl, 3*n+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q ) + wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = n*n + wrkbl + minwrk = max( 3*n+m, bdspac ) + end if + else + ! path 10 (m at least n, but not much larger) + call stdlib_sgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_sgebrd = int( dum(1),KIND=ilp) + maxwrk = 3*n + lwork_sgebrd + if( wntus .or. wntuo ) then + call stdlib_sorgbr( 'Q', m, n, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_q = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q ) + end if + if( wntua ) then + call stdlib_sorgbr( 'Q', m, m, n, a, lda, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_q = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q ) + end if + if( .not.wntvn ) then + maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p ) + end if + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 3*n+m, bdspac ) + end if + else if( minmn>0 ) then + ! compute space needed for stdlib_sbdsqr + mnthr = stdlib_ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 ) + bdspac = 5*m + ! compute space needed for stdlib_sgelqf + call stdlib_sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr ) + lwork_sgelqf = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sorglq + call stdlib_sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr ) + lwork_sorglq_n = int( dum(1),KIND=ilp) + call stdlib_sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr ) + lwork_sorglq_m = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sgebrd + call stdlib_sgebrd( m, m, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_sgebrd = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sorgbr p + call stdlib_sorgbr( 'P', m, m, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_p = int( dum(1),KIND=ilp) + ! compute space needed for stdlib_sorgbr q + call stdlib_sorgbr( 'Q', m, m, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_q = int( dum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + maxwrk = m + lwork_sgelqf + maxwrk = max( maxwrk, 3*m+lwork_sgebrd ) + if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q ) + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 4*m, bdspac ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_m ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( m*m+wrkbl, m*m+m*n+m ) + minwrk = max( 3*m+n, bdspac ) + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', + ! jobvt='o') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_m ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = max( m*m+wrkbl, m*m+m*n+m ) + minwrk = max( 3*m+n, bdspac ) + else if( wntvs .and. wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_m ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m+n, bdspac ) + else if( wntvs .and. wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_m ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*m*m + wrkbl + minwrk = max( 3*m+n, bdspac ) + maxwrk = max( maxwrk, minwrk ) + else if( wntvs .and. wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_m ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m+n, bdspac ) + else if( wntva .and. wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_n ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m+n, bdspac ) + else if( wntva .and. wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_n ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = 2*m*m + wrkbl + minwrk = max( 3*m+n, bdspac ) + else if( wntva .and. wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + wrkbl = m + lwork_sgelqf + wrkbl = max( wrkbl, m+lwork_sorglq_n ) + wrkbl = max( wrkbl, 3*m+lwork_sgebrd ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p ) + wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q ) + wrkbl = max( wrkbl, bdspac ) + maxwrk = m*m + wrkbl + minwrk = max( 3*m+n, bdspac ) + end if + else + ! path 10t(n greater than m, but not much larger) + call stdlib_sgebrd( m, n, a, lda, s, dum(1), dum(1),dum(1), dum(1), -1, ierr ) + + lwork_sgebrd = int( dum(1),KIND=ilp) + maxwrk = 3*m + lwork_sgebrd + if( wntvs .or. wntvo ) then + ! compute space needed for stdlib_sorgbr p + call stdlib_sorgbr( 'P', m, n, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_p = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p ) + end if + if( wntva ) then + call stdlib_sorgbr( 'P', n, n, m, a, n, dum(1),dum(1), -1, ierr ) + lwork_sorgbr_p = int( dum(1),KIND=ilp) + maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p ) + end if + if( .not.wntun ) then + maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q ) + end if + maxwrk = max( maxwrk, bdspac ) + minwrk = max( 3*m+n, bdspac ) + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + ! no left singular vectors to be computed + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out below r + if( n > 1 ) then + call stdlib_slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + ncvt = 0 + if( wntvo .or. wntvas ) then + ! if right singular vectors desired, generate p'. + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + ncvt = n + end if + iwork = ie + n + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a if desired + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,dum, 1, dum, 1, & + work( iwork ), info ) + ! if right singular vectors desired in vt, copy them there + if( wntvas )call stdlib_slacpy( 'F', n, n, a, lda, vt, ldvt ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + ! n left singular vectors to be overwritten on a and + ! no right singular vectors to be computed + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then + ! work(iu) is lda by n, work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then + ! work(iu) is lda by n, work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n, work(ir) is n by n + ldwrku = ( lwork-n*n-n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to work(ir) and zero out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) + + ! generate q in a + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r + ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n+bdspac) + call stdlib_sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + iu = ie + n + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (workspace: need n*n+2*n, prefer n*n+m*n+n) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( ir )& + , ldwrkr, zero,work( iu ), ldwrku ) + call stdlib_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) + call stdlib_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing a + ! (workspace: need 4*n, prefer 3*n+n*nb) + call stdlib_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,a, lda, dum, 1, & + work( iwork ), info ) + end if + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n and work(ir) is n by n + ldwrku = ( lwork-n*n-n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_slaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt, copying result to work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (workspace: need n*n+4*n-1, prefer n*n+3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) and computing right + ! singular vectors of r in vt + ! (workspace: need n*n+bdspac) + call stdlib_sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + iu = ie + n + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (workspace: need n*n+2*n, prefer n*n+m*n+n) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),lda, work( ir )& + , ldwrkr, zero,work( iu ), ldwrku ) + call stdlib_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_slaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in a by left vectors bidiagonalizing r + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& + work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,a, lda, dum, & + 1, work( iwork ), info ) + end if + else if( wntus ) then + if( wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + ! n left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n+bdspac) + call stdlib_sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,1, work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! (workspace: need n*n) + call stdlib_sgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & + zero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_slaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,1, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*n*n+4*n, + ! prefer 2*n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need 2*n*n+4*n-1, + ! prefer 2*n*n+3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (workspace: need 2*n*n+bdspac) + call stdlib_sbdsqr( 'U', n, n, n, 0, s, work( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, dum, 1, work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (workspace: need n*n) + call stdlib_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & + zero, u, ldu ) + ! copy right singular vectors of r to a + ! (workspace: need n*n) + call stdlib_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_slaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in a + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,lda, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' + ! or 'a') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need n*n+4*n-1, + ! prefer n*n+3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (workspace: need n*n+bdspac) + call stdlib_sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,ldvt, work( iu ),& + ldwrku, dum, 1,work( iwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (workspace: need n*n) + call stdlib_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & + zero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_slaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt & + ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + else if( wntua ) then + if( wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + ! m left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! copy r to work(ir), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) + + ! generate q in u + ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (workspace: need n*n+bdspac) + call stdlib_sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,1, work( ir ), & + ldwrkr, dum, 1,work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(ir), storing result in a + ! (workspace: need n*n) + call stdlib_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_slacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n+m, prefer n+m*nb) + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_slaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,1, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*n*n+4*n, + ! prefer 2*n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need 2*n*n+4*n-1, + ! prefer 2*n*n+3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (workspace: need 2*n*n+bdspac) + call stdlib_sbdsqr( 'U', n, n, n, 0, s, work( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, dum, 1, work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (workspace: need n*n) + call stdlib_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_slacpy( 'F', m, n, a, lda, u, ldu ) + ! copy right singular vectors of r from work(ir) to a + call stdlib_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n+m, prefer n+m*nb) + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_slaset( 'L', n-1, n-1, zero, zero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in a + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,lda, u, ldu, dum, & + 1, work( iwork ),info ) + end if + else if( wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' + ! or 'a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( n+m, 4*n, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) + + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) + call stdlib_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) + call stdlib_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need n*n+4*n-1, + ! prefer n*n+3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (workspace: need n*n+bdspac) + call stdlib_sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,ldvt, work( iu ),& + ldwrku, dum, 1,work( iwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (workspace: need n*n) + call stdlib_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & + zero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_slacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (workspace: need 2*n, prefer n+n*nb) + call stdlib_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (workspace: need n+m, prefer n+m*nb) + call stdlib_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r from a to vt, zeroing out below it + call stdlib_slacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_slaset( 'L', n-1, n-1, zero, zero,vt( 2, 1 ), ldvt & + ) + ie = itau + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (workspace: need 4*n, prefer 3*n+2*n*nb) + call stdlib_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (workspace: need 3*n+m, prefer 3*n+m*nb) + call stdlib_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + iwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + end if + else + ! m < mnthr + ! path 10 (m at least n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = ie + n + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) + call stdlib_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (workspace: need 3*n+ncu, prefer 3*n+ncu*nb) + call stdlib_slacpy( 'L', m, n, a, lda, u, ldu ) + if( wntus )ncu = n + if( wntua )ncu = m + call stdlib_sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_slacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (workspace: need 4*n, prefer 3*n+n*nb) + call stdlib_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) + call stdlib_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + iwork = ie + n + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,ldvt, u, ldu, dum,& + 1, work( iwork ), info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,u, ldu, dum, & + 1, work( iwork ), info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,ldvt, a, lda, dum,& + 1, work( iwork ), info ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + ! no right singular vectors to be computed + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out above l + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda ) + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuo .or. wntuas ) then + ! if left singular vectors desired, generate q + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + iwork = ie + m + nru = 0 + if( wntuo .or. wntuas )nru = m + ! perform bidiagonal qr iteration, computing left singular + ! vectors of a in a if desired + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,lda, dum, 1, & + work( iwork ), info ) + ! if left singular vectors desired in u, copy them there + if( wntuas )call stdlib_slacpy( 'F', m, m, a, lda, u, ldu ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! no left singular vectors to be computed + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m-m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to work(ir) and zero out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) + + ! generate q in a + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l + ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, dum,& + 1, dum, 1,work( iwork ), info ) + iu = ie + m + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (workspace: need m*m+2*m, prefer m*m+m*n+m) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1, i & + ), lda, zero,work( iu ), ldwrku ) + call stdlib_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,dum, 1, dum, 1, & + work( iwork ), info ) + end if + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m-m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing about above it + call stdlib_slacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ! generate q in a + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u, copying result to work(ir) + ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) + ! generate right vectors bidiagonalizing l in work(ir) + ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u, and computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, m, 0, s, work( ie ),work( ir ), ldwrkr, u, & + ldu, dum, 1,work( iwork ), info ) + iu = ie + m + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (workspace: need m*m+2*m, prefer m*m+m*n+m)) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1, i & + ), lda, zero,work( iu ), ldwrku ) + call stdlib_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ! generate q in a + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & + ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in a + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,u, ldu, dum, 1, & + work( iwork ), info ) + end if + else if( wntvs ) then + if( wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + ! m right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(ir), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & + ) + ! generate q in a + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l in + ! work(ir) + ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, & + dum, 1, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & + zero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy result to vt + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,ldvt, dum, 1, & + dum, 1, work( iwork ),info ) + end if + else if( wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out below it + call stdlib_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ! generate q in a + ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*m*m+4*m, + ! prefer 2*m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need 2*m*m+4*m-1, + ! prefer 2*m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (workspace: need 2*m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, dum, 1, work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & + zero, vt, ldvt ) + ! copy left singular vectors of l to a + ! (workspace: need m*m) + call stdlib_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors of l in a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, compute left + ! singular vectors of a in a and compute right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, a, lda, & + dum, 1, work( iwork ),info ) + end if + else if( wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is lda by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ! generate q in a + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need m*m+4*m-1, + ! prefer m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (workspace: need m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + u, ldu, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (workspace: need m*m) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & + zero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + else if( wntva ) then + if( wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + ! n right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( n+m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! copy l to work(ir), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & + ) + ! generate q in vt + ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (workspace: need m*m+4*m-1, + ! prefer m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (workspace: need m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, 0, 0, s, work( ie ),work( ir ), ldwrkr, & + dum, 1, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_slacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m+n, prefer m+n*nb) + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,ldvt, dum, 1, & + dum, 1, work( iwork ),info ) + end if + else if( wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( n+m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (workspace: need 2*m*m+4*m, + ! prefer 2*m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need 2*m*m+4*m-1, + ! prefer 2*m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (workspace: need 2*m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, dum, 1, work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_slacpy( 'F', m, n, a, lda, vt, ldvt ) + ! copy left singular vectors of a from work(ir) to a + call stdlib_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m+n, prefer m+n*nb) + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),lda ) + ! bidiagonalize l in a + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, a, lda, & + dum, 1, work( iwork ),info ) + end if + else if( wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( n+m, 4*m, bdspac ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by m + ldwrku = lda + else + ! work(iu) is m by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & + ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + call stdlib_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & + ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) + call stdlib_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (workspace: need m*m+bdspac) + call stdlib_sbdsqr( 'U', m, m, m, 0, s, work( ie ),work( iu ), ldwrku, & + u, ldu, dum, 1,work( iwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (workspace: need m*m) + call stdlib_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & + zero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_slacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (workspace: need 2*m, prefer m+m*nb) + call stdlib_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (workspace: need m+n, prefer m+n*nb) + call stdlib_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_slacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),ldu ) + ie = itau + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (workspace: need 4*m, prefer 3*m+2*m*nb) + call stdlib_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (workspace: need 3*m+n, prefer 3*m+n*nb) + call stdlib_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + iwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,ldvt, u, ldu, & + dum, 1, work( iwork ),info ) + end if + end if + end if + else + ! n < mnthr + ! path 10t(n greater than m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = ie + m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) + call stdlib_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) + call stdlib_slacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (workspace: need 3*m+nrvt, prefer 3*m+nrvt*nb) + call stdlib_slacpy( 'U', m, n, a, lda, vt, ldvt ) + if( wntva )nrvt = n + if( wntvs )nrvt = m + call stdlib_sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) + call stdlib_sorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (workspace: need 4*m, prefer 3*m+m*nb) + call stdlib_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + iwork = ie + m + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,ldvt, u, ldu, dum,& + 1, work( iwork ), info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,u, ldu, dum, & + 1, work( iwork ), info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (workspace: need bdspac) + call stdlib_sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,ldvt, a, lda, dum,& + 1, work( iwork ), info ) + end if + end if + end if + ! if stdlib_sbdsqr failed to converge, copy unconverged superdiagonals + ! to work( 2:minmn ) + if( info/=0 ) then + if( ie>2 ) then + do i = 1, minmn - 1 + work( i+1 ) = work( i+ie-1 ) + end do + end if + if( ie<2 ) then + do i = minmn - 1, 1, -1 + work( i+1 ) = work( i+ie-1 ) + end do + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_slascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1, work( 2 ),minmn, ierr ) + if( anrm SGESVDQ: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + subroutine stdlib_sgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) + ! Scalar Arguments + character, intent(in) :: joba, jobp, jobr, jobu, jobv + integer(ilp), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork + integer(ilp), intent(out) :: numrank, info + integer(ilp), intent(inout) :: lwork + ! Array Arguments + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: u(ldu,*), v(ldv,*), work(*) + real(sp), intent(out) :: s(*), rwork(*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: ierr, iwoff, nr, n1, optratio, p, q + integer(ilp) :: lwcon, lwqp3, lwrk_sgelqf, lwrk_sgesvd, lwrk_sgesvd2, lwrk_sgeqp3, & + lwrk_sgeqrf, lwrk_sormlq, lwrk_sormqr, lwrk_sormqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & + lworq, lworq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk + logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& + rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr + real(sp) :: big, epsln, rtmp, sconda, sfmin + ! Local Arrays + real(sp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! Executable Statements + ! test the input arguments + wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) + wntur = stdlib_lsame( jobu, 'R' ) + wntua = stdlib_lsame( jobu, 'A' ) + wntuf = stdlib_lsame( jobu, 'F' ) + lsvc0 = wntus .or. wntur .or. wntua + lsvec = lsvc0 .or. wntuf + dntwu = stdlib_lsame( jobu, 'N' ) + wntvr = stdlib_lsame( jobv, 'R' ) + wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) + rsvec = wntvr .or. wntva + dntwv = stdlib_lsame( jobv, 'N' ) + accla = stdlib_lsame( joba, 'A' ) + acclm = stdlib_lsame( joba, 'M' ) + conda = stdlib_lsame( joba, 'E' ) + acclh = stdlib_lsame( joba, 'H' ) .or. conda + rowprm = stdlib_lsame( jobp, 'P' ) + rtrans = stdlib_lsame( jobr, 'T' ) + if ( rowprm ) then + if ( conda ) then + iminwrk = max( 1, n + m - 1 + n ) + else + iminwrk = max( 1, n + m - 1 ) + end if + rminwrk = max( 2, m ) + else + if ( conda ) then + iminwrk = max( 1, n + n ) + else + iminwrk = max( 1, n ) + end if + rminwrk = 2 + end if + lquery = (liwork == -1 .or. lwork == -1 .or. lrwork == -1) + info = 0 + if ( .not. ( accla .or. acclm .or. acclh ) ) then + info = -1 + else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = -2 + else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then + info = -3 + else if ( .not.( lsvec .or. dntwu ) ) then + info = -4 + else if ( wntur .and. wntva ) then + info = -5 + else if ( .not.( rsvec .or. dntwv )) then + info = -5 + else if ( m<0 ) then + info = -6 + else if ( ( n<0 ) .or. ( n>m ) ) then + info = -7 + else if ( lda big / sqrt(real(m,KIND=sp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_slascl('G',0,0,sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + call stdlib_slaswp( n, a, lda, 1, m-1, iwork(n+1), 1 ) + end if + ! .. at this stage, preemptive scaling is done only to avoid column + ! norms overflows during the qr factorization. the svd procedure should + ! have its own scaling to save the singular values from overflows and + ! underflows. that depends on the svd procedure. + if ( .not.rowprm ) then + rtmp = stdlib_slange( 'M', m, n, a, lda, rdummy ) + if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then + info = -8 + call stdlib_xerbla( 'SGESVDQ', -info ) + return + end if + if ( rtmp > big / sqrt(real(m,KIND=sp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_slascl('G',0,0, sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + end if + ! Qr Factorization With Column Pivoting + ! a * p = q * [ r ] + ! [ 0 ] + do p = 1, n + ! All Columns Are Free Columns + iwork(p) = 0 + end do + call stdlib_sgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,ierr ) + ! if the user requested accuracy level allows truncation in the + ! computed upper triangular factor, the matrix r is examined and, + ! if possible, replaced with its leading upper trapezoidal part. + epsln = stdlib_slamch('E') + sfmin = stdlib_slamch('S') + ! small = sfmin / epsln + nr = n + if ( accla ) then + ! standard absolute error bound suffices. all sigma_i with + ! sigma_i < n*eps*||a||_f are flushed to zero. this is an + ! aggressive enforcement of lower numerical rank by introducing a + ! backward error of the order of n*eps*||a||_f. + nr = 1 + rtmp = sqrt(real(n,KIND=sp))*epsln + do p = 2, n + if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 + nr = nr + 1 + end do + 3002 continue + elseif ( acclm ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r is used as the criterion for being + ! close-to-rank-deficient. the threshold is set to epsln=stdlib_slamch('e'). + ! [[this can be made more flexible by replacing this hard-coded value + ! with a user specified threshold.]] also, the values that underflow + ! will be truncated. + nr = 1 + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go & + to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! Rrqr Not Authorized To Determine Numerical Rank Except In The + ! obvious case of zero pivots. + ! .. inspect r for exact zeros on the diagonal; + ! r(i,i)=0 => r(i:n,i:n)=0. + nr = 1 + do p = 2, n + if ( abs(a(p,p)) == zero ) go to 3502 + nr = nr + 1 + end do + 3502 continue + if ( conda ) then + ! estimate the scaled condition number of a. use the fact that it is + ! the same as the scaled condition number of r. + ! V Is Used As Workspace + call stdlib_slacpy( 'U', n, n, a, lda, v, ldv ) + ! only the leading nr x nr submatrix of the triangular factor + ! is considered. only if nr=n will this give a reliable error + ! bound. however, even for nr < n, this can be used on an + ! expert level and obtain useful information in the sense of + ! perturbation theory. + do p = 1, nr + rtmp = stdlib_snrm2( p, v(1,p), 1 ) + call stdlib_sscal( p, one/rtmp, v(1,p), 1 ) + end do + if ( .not. ( lsvec .or. rsvec ) ) then + call stdlib_spocon( 'U', nr, v, ldv, one, rtmp,work, iwork(n+iwoff), ierr & + ) + else + call stdlib_spocon( 'U', nr, v, ldv, one, rtmp,work(n+1), iwork(n+iwoff), & + ierr ) + end if + sconda = one / sqrt(rtmp) + ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + ! see the reference [1] for more details. + end if + endif + if ( wntur ) then + n1 = nr + else if ( wntus .or. wntuf) then + n1 = n + else if ( wntua ) then + n1 = m + end if + if ( .not. ( rsvec .or. lsvec ) ) then + ! ....................................................................... + ! Only The Singular Values Are Requested + ! ....................................................................... + if ( rtrans ) then + ! .. compute the singular values of r**t = [a](1:nr,1:n)**t + ! .. set the lower triangle of [a] to [a](1:nr,1:n)**t and + ! the upper triangle of [a] to zero. + do p = 1, min( n, nr ) + do q = p + 1, n + a(q,p) = a(p,q) + if ( q <= nr ) a(p,q) = zero + end do + end do + call stdlib_sgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, work, lwork, info & + ) + else + ! .. compute the singular values of r = [a](1:nr,1:n) + if ( nr > 1 )call stdlib_slaset( 'L', nr-1,nr-1, zero,zero, a(2,1), lda ) + call stdlib_sgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, work, lwork, info & + ) + end if + else if ( lsvec .and. ( .not. rsvec) ) then + ! ....................................................................... + ! The Singular Values And The Left Singular Vectors Requested + ! ......................................................................."""""""" + if ( rtrans ) then + ! .. apply stdlib_sgesvd to r**t + ! .. copy r**t into [u] and overwrite [u] with the right singular + ! vectors of r + do p = 1, nr + do q = p, n + u(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_slaset( 'U', nr-1,nr-1, zero,zero, u(1,2), ldu ) + ! .. the left singular vectors not computed, the nr right singular + ! vectors overwrite [u](1:nr,1:nr) as transposed. these + ! will be pre-multiplied by q to build the left singular vectors of a. + call stdlib_sgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, nr + do q = p + 1, nr + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + else + ! Apply Stdlib_Sgesvd To R + ! .. copy r into [u] and overwrite [u] with the left singular vectors + call stdlib_slacpy( 'U', nr, n, a, lda, u, ldu ) + if ( nr > 1 )call stdlib_slaset( 'L', nr-1, nr-1, zero, zero, u(2,1), ldu ) + + ! .. the right singular vectors not computed, the nr left singular + ! vectors overwrite [u](1:nr,1:nr) + call stdlib_sgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of + ! r. these will be pre-multiplied by q to build the left singular + ! vectors of a. + end if + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. ( .not.wntuf ) ) then + call stdlib_slaset('A', m-nr, nr, zero, zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_slaset( 'A',nr,n1-nr,zero,zero,u(1,nr+1), ldu ) + call stdlib_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not.wntuf )call stdlib_sormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& + n+1), lwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_slaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! ....................................................................... + ! The Singular Values And The Right Singular Vectors Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_sgesvd to r**t + ! .. copy r**t into v and overwrite v with the left singular vectors + do p = 1, nr + do q = p, n + v(q,p) = (a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + ! .. the left singular vectors of r**t overwrite v, the right singular + ! vectors not computed + if ( wntvr .or. ( nr == n ) ) then + call stdlib_sgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, nr + do q = p + 1, nr + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr + 1, n + v(p,q) = v(q,p) + end do + end do + end if + call stdlib_slapmt( .false., nr, n, v, ldv, iwork ) + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:n,1:nr) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the qr factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_slaset('G', n, n-nr, zero, zero, v(1,nr+1), ldv) + call stdlib_sgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, n + do q = p + 1, n + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + call stdlib_slapmt( .false., n, n, v, ldv, iwork ) + end if + else + ! Aply Stdlib_Sgesvd To R + ! Copy R Into V And Overwrite V With The Right Singular Vectors + call stdlib_slacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_slaset( 'L', nr-1, nr-1, zero, zero, v(2,1), ldv ) + + ! .. the right singular vectors overwrite v, the nr left singular + ! vectors stored in u(1:nr,1:nr) + if ( wntvr .or. ( nr == n ) ) then + call stdlib_sgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_slapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:nr,1:n) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the lq factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_slaset('G', n-nr, n, zero,zero, v(nr+1,1), ldv) + call stdlib_sgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_slapmt( .false., n, n, v, ldv, iwork ) + end if + ! .. now [v] contains the transposed matrix of the right singular + ! vectors of a. + end if + else + ! ....................................................................... + ! Full Svd Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_sgesvd to r**t [[this option is left for r + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r**t into [v] and overwrite [v] with the left singular + ! vectors of r**t + do p = 1, nr + do q = p, n + v(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv ) + ! .. the left singular vectors of r**t overwrite [v], the nr right + ! singular vectors of r**t stored in [u](1:nr,1:nr) as transposed + call stdlib_sgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, work(n+1), & + lwork-n, info ) + ! Assemble V + do p = 1, nr + do q = p + 1, nr + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr+1, n + v(p,q) = v(q,p) + end do + end do + end if + call stdlib_slapmt( .false., nr, n, v, ldv, iwork ) + do p = 1, nr + do q = p + 1, nr + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_slaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! .. copy r**t into [v] and overwrite [v] with the left singular + ! vectors of r**t + ! [[the optimal ratio n/nr for using qrf instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'sgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio*nr > n ) then + do p = 1, nr + do q = p, n + v(q,p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_slaset('U',nr-1,nr-1, zero,zero, v(1,2),ldv) + + call stdlib_slaset('A',n,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_sgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, work(n+1), & + lwork-n, info ) + do p = 1, n + do q = p + 1, n + rtmp = v(q,p) + v(q,p) = v(p,q) + v(p,q) = rtmp + end do + end do + call stdlib_slapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + do p = 1, n + do q = p + 1, n + rtmp = u(q,p) + u(q,p) = u(p,q) + u(p,q) = rtmp + end do + end do + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_slaset('A',m-n,n,zero,zero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_slaset('A',n,n1-n,zero,zero,u(1,n+1),ldu) + call stdlib_slaset('A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) + end if + end if + else + ! .. copy r**t into [u] and overwrite [u] with the right + ! singular vectors of r + do p = 1, nr + do q = p, n + u(q,nr+p) = a(p,q) + end do + end do + if ( nr > 1 )call stdlib_slaset('U',nr-1,nr-1,zero,zero,u(1,nr+2),ldu) + + call stdlib_sgeqrf( n, nr, u(1,nr+1), ldu, work(n+1),work(n+nr+1), lwork-& + n-nr, ierr ) + do p = 1, nr + do q = 1, n + v(q,p) = u(p,nr+q) + end do + end do + call stdlib_slaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv) + call stdlib_sgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, work(n+nr+1)& + ,lwork-n-nr, info ) + call stdlib_slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + call stdlib_sormqr('R','C', n, n, nr, u(1,nr+1), ldu,work(n+1),v,ldv,work(& + n+nr+1),lwork-n-nr,ierr) + call stdlib_slapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_slaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) + end if + end if + end if + end if + else + ! .. apply stdlib_sgesvd to r [[this is the recommended option]] + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r into [v] and overwrite v with the right singular vectors + call stdlib_slacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_slaset( 'L', nr-1,nr-1, zero,zero, v(2,1), ldv ) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_sgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_slapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_slaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! The Requested Number Of The Left Singular Vectors + ! is then n1 (n or m) + ! [[the optimal ratio n/nr for using lq instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'sgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio * nr > n ) then + call stdlib_slacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_slaset('L', nr-1,nr-1, zero,zero, v(2,1),ldv) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_slaset('A', n-nr,n, zero,zero, v(nr+1,1),ldv) + call stdlib_sgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & + lwork-n, info ) + call stdlib_slapmt( .false., n, n, v, ldv, iwork ) + ! .. now [v] contains the transposed matrix of the right + ! singular vectors of a. the leading n left singular vectors + ! are in [u](1:n,1:n) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_slaset('A',m-n,n,zero,zero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_slaset('A',n,n1-n,zero,zero,u(1,n+1),ldu) + call stdlib_slaset( 'A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) + end if + end if + else + call stdlib_slacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu ) + if ( nr > 1 )call stdlib_slaset('L',nr-1,nr-1,zero,zero,u(nr+2,1),ldu) + + call stdlib_sgelqf( nr, n, u(nr+1,1), ldu, work(n+1),work(n+nr+1), lwork-n-& + nr, ierr ) + call stdlib_slacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv) + if ( nr > 1 )call stdlib_slaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv) + call stdlib_sgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, work(n+nr+& + 1), lwork-n-nr, info ) + call stdlib_slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv) + call stdlib_slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv) + call stdlib_slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) + call stdlib_sormlq('R','N',n,n,nr,u(nr+1,1),ldu,work(n+1),v, ldv, work(n+& + nr+1),lwork-n-nr,ierr) + call stdlib_slapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_slaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu) + call stdlib_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) + end if + end if + end if + end if + ! .. end of the "r**t or r" branch + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not. wntuf )call stdlib_sormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& + n+1), lwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_slaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + ! ... end of the "full svd" branch + end if + ! check whether some singular values are returned as zeros, e.g. + ! due to underflow, and update the numerical rank. + p = nr + do q = p, 1, -1 + if ( s(q) > zero ) go to 4002 + nr = nr - 1 + end do + 4002 continue + ! .. if numerical rank deficiency is detected, the truncated + ! singular values are set to zero. + if ( nr < n ) call stdlib_slaset( 'G', n-nr,1, zero,zero, s(nr+1), n ) + ! .. undo scaling; this may cause overflow in the largest singular + ! values. + if ( ascaled )call stdlib_slascl( 'G',0,0, one,sqrt(real(m,KIND=sp)), nr,1, s, n, ierr & + ) + if ( conda ) rwork(1) = sconda + rwork(2) = p - nr + ! .. p-nr is the number of singular values that are computed as + ! exact zeros in stdlib_sgesvd() applied to the (possibly truncated) + ! full row rank triangular (trapezoidal) factor of a. + numrank = nr + return + end subroutine stdlib_sgesvdq + + !> SGESVJ: computes the singular value decomposition (SVD) of a real + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + !> SGESVJ can sometimes compute tiny singular values and their singular vectors much + !> more accurately than other SVD routines, see below under Further Details. + + pure subroutine stdlib_sgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n + character, intent(in) :: joba, jobu, jobv + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), v(ldv,*), work(lwork) + real(sp), intent(out) :: sva(n) + ! ===================================================================== + ! Local Parameters + integer(ilp), parameter :: nsweep = 30 + + + ! Local Scalars + real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & + large, mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, & + temp1, theta, thsign, tol + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, goscale, lower, lsvec, noscale, rotok, rsvec, uctol, & + upper + ! Local Arrays + real(sp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,min,float,sign,sqrt + ! from lapack + ! from lapack + ! Executable Statements + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) + uctol = stdlib_lsame( jobu, 'C' ) + rsvec = stdlib_lsame( jobv, 'V' ) + applv = stdlib_lsame( jobv, 'A' ) + upper = stdlib_lsame( joba, 'U' ) + lower = stdlib_lsame( joba, 'L' ) + if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then + info = -1 + else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -2 + else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -5 + else if( ldaj}|a(:,i)^t * a(:,j)|/(||a(:,i)||*||a(:,j)||) < ctol*eps + ! where eps is the round-off and ctol is defined as follows: + if( uctol ) then + ! ... user controlled + ctol = work( 1 ) + else + ! ... default + if( lsvec .or. rsvec .or. applv ) then + ctol = sqrt( real( m,KIND=sp) ) + else + ctol = real( m,KIND=sp) + end if + end if + ! ... and the machine dependent parameters are + ! [!] (make sure that stdlib_slamch() works properly on the target machine.) + epsln = stdlib_slamch( 'EPSILON' ) + rooteps = sqrt( epsln ) + sfmin = stdlib_slamch( 'SAFEMINIMUM' ) + rootsfmin = sqrt( sfmin ) + small = sfmin / epsln + big = stdlib_slamch( 'OVERFLOW' ) + ! big = one / sfmin + rootbig = one / rootsfmin + large = big / sqrt( real( m*n,KIND=sp) ) + bigtheta = one / rooteps + tol = ctol*epsln + roottol = sqrt( tol ) + if( real( m,KIND=sp)*epsln>=one ) then + info = -4 + call stdlib_xerbla( 'SGESVJ', -info ) + return + end if + ! initialize the right singular vector matrix. + if( rsvec ) then + mvl = n + call stdlib_slaset( 'A', mvl, n, zero, one, v, ldv ) + else if( applv ) then + mvl = mv + end if + rsvec = rsvec .or. applv + ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) + ! (!) if necessary, scale a to protect the largest singular value + ! from overflow. it is possible that saving the largest singular + ! value destroys the information about the small ones. + ! this initial scaling is almost minimal in the sense that the + ! goal is to make sure that no column norm overflows, and that + ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries + ! in a are detected, the procedure returns with info=-6. + skl = one / sqrt( real( m,KIND=sp)*real( n,KIND=sp) ) + noscale = .true. + goscale = .true. + if( lower ) then + ! the input matrix is m-by-n lower triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_slassq( m-p+1, a( p, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'SGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else if( upper ) then + ! the input matrix is m-by-n upper triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_slassq( p, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'SGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else + ! the input matrix is m-by-n general dense + do p = 1, n + aapp = zero + aaqq = one + call stdlib_slassq( m, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'SGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + end if + if( noscale )skl = one + ! move the smaller part of the spectrum from the underflow threshold + ! (!) start by determining the position of the nonzero entries of the + ! array sva() relative to ( sfmin, big ). + aapp = zero + aaqq = big + do p = 1, n + if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) + aapp = max( aapp, sva( p ) ) + end do + ! #:) quick return for zero matrix + if( aapp==zero ) then + if( lsvec )call stdlib_slaset( 'G', m, n, zero, one, a, lda ) + work( 1 ) = one + work( 2 ) = zero + work( 3 ) = zero + work( 4 ) = zero + work( 5 ) = zero + work( 6 ) = zero + return + end if + ! #:) quick return for one-column matrix + if( n==1 ) then + if( lsvec )call stdlib_slascl( 'G', 0, 0, sva( 1 ), skl, m, 1,a( 1, 1 ), lda, ierr ) + + work( 1 ) = one / skl + if( sva( 1 )>=sfmin ) then + work( 2 ) = one + else + work( 2 ) = zero + end if + work( 3 ) = zero + work( 4 ) = zero + work( 5 ) = zero + work( 6 ) = zero + return + end if + ! protect small singular values from underflow, and try to + ! avoid underflows/overflows in computing jacobi rotations. + sn = sqrt( sfmin / epsln ) + temp1 = sqrt( big / real( n,KIND=sp) ) + if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & + then + temp1 = min( big, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=sp) ) ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = max( sn / aaqq, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=sp) )*aapp ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else + temp1 = one + end if + ! scale, if necessary + if( temp1/=one ) then + call stdlib_slascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr ) + end if + skl = temp1*skl + if( skl/=one ) then + call stdlib_slascl( joba, 0, 0, one, skl, m, n, a, lda, ierr ) + skl = one / skl + end if + ! row-cyclic jacobi svd algorithm with column pivoting + emptsw = ( n*( n-1 ) ) / 2 + notrot = 0 + fastr( 1 ) = zero + ! a is represented in factored form a = a * diag(work), where diag(work) + ! is initialized to identity. work is updated during fast scaled + ! rotations. + do q = 1, n + work( q ) = one + end do + swband = 3 + ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective + ! if stdlib_sgesvj is used as a computational routine in the preconditioned + ! jacobi svd algorithm stdlib_sgesvj. for sweeps i=1:swband the procedure + ! works on pivots inside a band-like region around the diagonal. + ! the boundaries are determined dynamically, based on the number of + ! pivots above a threshold. + kbl = min( 8, n ) + ! [tp] kbl is a tuning parameter that defines the tile size in the + ! tiling of the p-q loops of pivot pairs. in general, an optimal + ! value of kbl depends on the matrix dimensions and on the + ! parameters of the computer's memory. + nbl = n / kbl + if( ( nbl*kbl )/=n )nbl = nbl + 1 + blskip = kbl**2 + ! [tp] blkskip is a tuning parameter that depends on swband and kbl. + rowskip = min( 5, kbl ) + ! [tp] rowskip is a tuning parameter. + lkahead = 1 + ! [tp] lkahead is a tuning parameter. + ! quasi block transformations, using the lower (upper) triangular + ! structure of the input matrix. the quasi-block-cycling usually + ! invokes cubic convergence. big part of this cycle is done inside + ! canonical subspaces of dimensions less than m. + if( ( lower .or. upper ) .and. ( n>max( 64, 4*kbl ) ) ) then + ! [tp] the number of partition levels and the actual partition are + ! tuning parameters. + n4 = n / 4 + n2 = n / 2 + n34 = 3*n4 + if( applv ) then + q = 0 + else + q = 1 + end if + if( lower ) then + ! this works very well on lower triangular matrices, in particular + ! in the framework of the preconditioned jacobi svd (xgejsv). + ! the idea is simple: + ! [+ 0 0 0] note that jacobi transformations of [0 0] + ! [+ + 0 0] [0 0] + ! [+ + x 0] actually work on [x 0] [x 0] + ! [+ + x x] [x x]. [x x] + call stdlib_sgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,work( n34+1 ), & + sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2, work( n+1 ), & + lwork-n, ierr ) + call stdlib_sgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,work( n2+1 ), sva( & + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,work( n+1 ), lwork-n, & + ierr ) + call stdlib_sgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,work( n2+1 ), sva(& + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, & + ierr ) + call stdlib_sgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,work( n4+1 ), sva( & + n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, & + ierr ) + call stdlib_sgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & + tol, 1, work( n+1 ), lwork-n,ierr ) + call stdlib_sgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, sfmin,& + tol, 1, work( n+1 ),lwork-n, ierr ) + else if( upper ) then + call stdlib_sgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & + tol, 2, work( n+1 ), lwork-n,ierr ) + call stdlib_sgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, work( n4+1 ),sva( n4+1 ), & + mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1, work( n+1 ), lwork-n,ierr ) + + call stdlib_sgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, work( n+1 ),lwork-n, ierr ) + call stdlib_sgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,work( n2+1 ), sva( n2+1 ),& + mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,work( n+1 ), lwork-n, ierr ) + + end if + end if + ! .. row-cyclic pivot strategy with de rijk's pivoting .. + loop_1993: do i = 1, nsweep + ! .. go go go ... + mxaapq = zero + mxsinj = zero + iswrot = 0 + notrot = 0 + pskipped = 0 + ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs + ! 1 <= p < q <= n. this is the first step toward a blocked implementation + ! of the rotations. new implementation, based on block transformations, + ! is under development. + loop_2000: do ibr = 1, nbl + igl = ( ibr-1 )*kbl + 1 + loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) + igl = igl + ir1*kbl + loop_2001: do p = igl, min( igl+kbl-1, n-1 ) + ! .. de rijk's pivoting + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + call stdlib_sswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_sswap( mvl, v( 1, p ), 1,v( 1, q ), 1 ) + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = work( p ) + work( p ) = work( q ) + work( q ) = temp1 + end if + if( ir1==0 ) then + ! column norms are periodically updated by explicit + ! norm computation. + ! caveat: + ! unfortunately, some blas implementations compute stdlib_snrm2(m,a(1,p),1) + ! as sqrt(stdlib_sdot(m,a(1,p),1,a(1,p),1)), which may cause the result to + ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_snrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_snrm2 is available, the if-then-else + ! below should read "aapp = stdlib_snrm2( m, a(1,p), 1 ) * work(p)". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_snrm2( m, a( 1, p ), 1 )*work( p ) + else + temp1 = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp )*work( p ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_scopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_slascl( 'G', 0, 0, aapp,work( p ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_sdot( m, work( n+1 ), 1,a( 1, q ), 1 )*work( & + q ) / aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_scopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + call stdlib_slascl( 'G', 0, 0, aaqq,work( q ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_sdot( m, work( n+1 ), 1,a( 1, p ), 1 )*work( & + p ) / aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq ) / aapq + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*work( p ) / work( q ) + fastr( 4 ) = -t*work( q ) /work( p ) + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = work( p ) / work( q ) + aqoap = work( q ) / work( p ) + if( work( p )>=one ) then + if( work( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + work( p ) = work( p )*cs + work( q ) = work( q )*cs + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + end if + else + if( work( q )>=one ) then + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_saxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + else + if( work( p )>=work( q ) )then + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_saxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_scopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_slascl( 'G', 0, 0, aapp, one, m,1, work( n+1 ), & + lda,ierr ) + call stdlib_slascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + temp1 = -aapq*work( p ) / work( q ) + call stdlib_saxpy( m, temp1, work( n+1 ), 1,a( 1, q ), 1 ) + + call stdlib_slascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_snrm2( m, a( 1, q ), 1 )*work( q ) + + else + t = zero + aaqq = one + call stdlib_slassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*work( q ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_snrm2( m, a( 1, p ), 1 )*work( p ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*work( p ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_scopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + call stdlib_slascl( 'G', 0, 0, aapp,work( p ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_sdot( m, work( n+1 ), 1,a( 1, q ), 1 )*work( & + q ) / aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*work( & + p )*work( q ) /aaqq ) / aapp + else + call stdlib_scopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + call stdlib_slascl( 'G', 0, 0, aaqq,work( q ), m, 1,work( n+& + 1 ), lda, ierr ) + aapq = stdlib_sdot( m, work( n+1 ), 1,a( 1, p ), 1 )*work( & + p ) / aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq ) / aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*work( p ) / work( q ) + fastr( 4 ) = -t*work( q ) /work( p ) + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = work( p ) / work( q ) + aqoap = work( q ) / work( p ) + if( work( p )>=one ) then + if( work( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + work( p ) = work( p )*cs + work( q ) = work( q )*cs + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_saxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + end if + else + if( work( q )>=one ) then + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_saxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_saxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + else + if( work( p )>=work( q ) )then + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + work( p ) = work( p )*cs + work( q ) = work( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_saxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + work( p ) = work( p ) / cs + work( q ) = work( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_scopy( m, a( 1, p ), 1,work( n+1 ), 1 ) + + call stdlib_slascl( 'G', 0, 0, aapp, one,m, 1, work( n+1 & + ), lda,ierr ) + call stdlib_slascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*work( p ) / work( q ) + call stdlib_saxpy( m, temp1, work( n+1 ),1, a( 1, q ), 1 & + ) + call stdlib_slascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_scopy( m, a( 1, q ), 1,work( n+1 ), 1 ) + + call stdlib_slascl( 'G', 0, 0, aaqq, one,m, 1, work( n+1 & + ), lda,ierr ) + call stdlib_slascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*work( q ) / work( p ) + call stdlib_saxpy( m, temp1, work( n+1 ),1, a( 1, p ), 1 & + ) + call stdlib_slascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_snrm2( m, a( 1, q ), 1 )*work( q ) + + else + t = zero + aaqq = one + call stdlib_slassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*work( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_snrm2( m, a( 1, p ), 1 )*work( p ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*work( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_snrm2( m, a( 1, n ), 1 )*work( n ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*work( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the singular values and find how many are above + ! the underflow threshold. + n2 = 0 + n4 = 0 + do p = 1, n - 1 + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = work( p ) + work( p ) = work( q ) + work( q ) = temp1 + call stdlib_sswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + if( sva( p )/=zero ) then + n4 = n4 + 1 + if( sva( p )*skl>sfmin )n2 = n2 + 1 + end if + end do + if( sva( n )/=zero ) then + n4 = n4 + 1 + if( sva( n )*skl>sfmin )n2 = n2 + 1 + end if + ! normalize the left singular vectors. + if( lsvec .or. uctol ) then + do p = 1, n2 + call stdlib_sscal( m, work( p ) / sva( p ), a( 1, p ), 1 ) + end do + end if + ! scale the product of jacobi rotations (assemble the fast rotations). + if( rsvec ) then + if( applv ) then + do p = 1, n + call stdlib_sscal( mvl, work( p ), v( 1, p ), 1 ) + end do + else + do p = 1, n + temp1 = one / stdlib_snrm2( mvl, v( 1, p ), 1 ) + call stdlib_sscal( mvl, temp1, v( 1, p ), 1 ) + end do + end if + end if + ! undo scaling, if necessary (and possible). + if( ( ( skl>one ) .and. ( sva( 1 )<( big / skl ) ) ).or. ( ( skl( sfmin / skl ) ) ) ) then + do p = 1, n + sva( p ) = skl*sva( p ) + end do + skl = one + end if + work( 1 ) = skl + ! the singular values of a are skl*sva(1:n). if skl/=one + ! then some of the singular values may overflow or underflow and + ! the spectrum is given in this factored representation. + work( 2 ) = real( n4,KIND=sp) + ! n4 is the number of computed nonzero singular values of a. + work( 3 ) = real( n2,KIND=sp) + ! n2 is the number of singular values of a greater than sfmin. + ! if n2 SGGES3: computes for a pair of N-by-N real nonsymmetric matrices (A,B), + !> the generalized eigenvalues, the generalized real Schur form (S,T), + !> optionally, the left and/or right matrices of Schur vectors (VSL and + !> VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> quasi-triangular matrix S and the upper triangular matrix T.The + !> leading columns of VSL and VSR then form an orthonormal basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> SGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or both being zero. + !> A pair of matrices (S,T) is in generalized real Schur form if T is + !> upper triangular with non-negative diagonal and S is block upper + !> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond + !> to real generalized eigenvalues, while 2-by-2 blocks of S will be + !> "standardized" by making the corresponding elements of T have the + !> form: + !> [ a 0 ] + !> [ 0 b ] + !> and the pair of corresponding 2-by-2 blocks in S and T will have a + !> complex conjugate pair of generalized eigenvalues. + + subroutine stdlib_sgges3( jobvsl, jobvsr, sort, selctg, n, a, lda, b,ldb, sdim, alphar, & + alphai, beta, vsl, ldvsl,vsr, ldvsr, work, lwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), & + work(*) + ! Function Arguments + procedure(stdlib_selctg_s) :: selctg + ! ===================================================================== + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, lst2sl, & + wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, ip, iright, irows, & + itau, iwrk, lwkopt + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, safmax, safmin, & + smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(sp) :: dif(2) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_slascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_sggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = iwrk + iwrk = itau + irows + call stdlib_sgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_sormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + if( ilvsl ) then + call stdlib_slaset( 'FULL', n, n, zero, one, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_slacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_sorgqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_slaset( 'FULL', n, n, zero, one, vsr, ldvsr ) + ! reduce to generalized hessenberg form + call stdlib_sgghd3( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + work( iwrk ), lwork+1-iwrk, ierr ) + ! perform qz algorithm, computing schur vectors if desired + iwrk = itau + call stdlib_slaqz0( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vsl, ldvsl, vsr, ldvsr,work( iwrk ), lwork+1-iwrk, 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 40 + end if + ! sort eigenvalues alpha/beta if desired + sdim = 0 + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl ) then + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n,ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n,ierr ) + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alphar( i ), alphai( i ), beta( i ) ) + end do + call stdlib_stgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alphar,alphai, beta, & + vsl, ldvsl, vsr, ldvsr, sdim, pvsl,pvsr, dif, work( iwrk ), lwork-iwrk+1, idum, 1,& + ierr ) + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + if( ilvsl )call stdlib_sggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_sggbak( 'P', 'R', n, ilo, ihi, work( ileft ),work( iright ), n, & + vsr, ldvsr, ierr ) + ! check if unscaling would cause over/underflow, if so, rescale + ! (alphar(i),alphai(i),beta(i)) so beta(i) is on the order of + ! b(i,i) and alphar(i) and alphai(i) are on the order of a(i,i) + if( ilascl )then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( alphar( i )/safmax )>( anrmto/anrm ) .or.( safmin/alphar( i ) )>( & + anrm/anrmto ) ) then + work( 1 ) = abs( a( i, i )/alphar( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + else if( ( alphai( i )/safmax )>( anrmto/anrm ) .or.( safmin/alphai( i ) )>( & + anrm/anrmto ) ) then + work( 1 ) = abs( a( i, i+1 )/alphai( i ) ) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + if( ilbscl )then + do i = 1, n + if( alphai( i )/=zero ) then + if( ( beta( i )/safmax )>( bnrmto/bnrm ) .or.( safmin/beta( i ) )>( & + bnrm/bnrmto ) ) then + work( 1 ) = abs(b( i, i )/beta( i )) + beta( i ) = beta( i )*work( 1 ) + alphar( i ) = alphar( i )*work( 1 ) + alphai( i ) = alphai( i )*work( 1 ) + end if + end if + end do + end if + ! undo scaling + if( ilascl ) then + call stdlib_slascl( 'H', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphar, n, ierr ) + call stdlib_slascl( 'G', 0, 0, anrmto, anrm, n, 1, alphai, n, ierr ) + end if + if( ilbscl ) then + call stdlib_slascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_slascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + lst2sl = .true. + sdim = 0 + ip = 0 + do i = 1, n + cursl = selctg( alphar( i ), alphai( i ), beta( i ) ) + if( alphai( i )==zero ) then + if( cursl )sdim = sdim + 1 + ip = 0 + if( cursl .and. .not.lastsl )info = n + 2 + else + if( ip==1 ) then + ! last eigenvalue of conjugate pair + cursl = cursl .or. lastsl + lastsl = cursl + if( cursl )sdim = sdim + 2 + ip = -1 + if( cursl .and. .not.lst2sl )info = n + 2 + else + ! first eigenvalue of conjugate pair + ip = 1 + end if + end if + lst2sl = lastsl + lastsl = cursl + end do + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_sgges3 + + !> SGGEV3: computes for a pair of N-by-N real nonsymmetric matrices (A,B) + !> the generalized eigenvalues, and optionally, the left and/or right + !> generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B . + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_sggev3( jobvl, jobvr, n, a, lda, b, ldb, alphar,alphai, beta, vl, ldvl, vr,& + ldvr, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: alphai(*), alphar(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, itau, & + iwrk, jc, jr, lwkopt + real(sp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_slascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_slange( 'M', n, n, b, ldb, work ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_slascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ileft = 1 + iright = n + 1 + iwrk = iright + n + call stdlib_sggbal( 'P', n, a, lda, b, ldb, ilo, ihi, work( ileft ),work( iright ), & + work( iwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = iwrk + iwrk = itau + irows + call stdlib_sgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_sormqr( 'L', 'T', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + if( ilvl ) then + call stdlib_slaset( 'FULL', n, n, zero, one, vl, ldvl ) + if( irows>1 ) then + call stdlib_slacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_sorgqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_slaset( 'FULL', n, n, zero, one, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_sgghd3( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + work( iwrk ), lwork+1-iwrk, ierr ) + else + call stdlib_sgghd3( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_slaqz0( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & + beta, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 110 + end if + ! compute eigenvectors + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_stgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 110 + end if + ! undo balancing on vl and vr and normalization + if( ilvl ) then + call stdlib_sggbak( 'P', 'L', n, ilo, ihi, work( ileft ),work( iright ), n, vl, & + ldvl, ierr ) + loop_50: do jc = 1, n + if( alphai( jc ) SGSVJ0: is called from SGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + + pure subroutine stdlib_sgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep + real(sp), intent(in) :: eps, sfmin, tol + character, intent(in) :: jobv + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*) + real(sp), intent(out) :: work(lwork) + ! ===================================================================== + + ! Local Scalars + real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Local Arrays + real(sp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,float,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( lda sqrt(overflow_threshold), and + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_snrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_snrm2 is available, the if-then-else + ! below should read "aapp = stdlib_snrm2( m, a(1,p), 1 ) * d(p)". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_snrm2( m, a( 1, p ), 1 )*d( p ) + else + temp1 = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp )*d( p ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_scopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_sdot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_scopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_sdot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + ! Rotate + ! rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq ) / aapq + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + end if + else + if( d( q )>=one ) then + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_saxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + else + if( d( p )>=d( q ) ) then + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_saxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_scopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aapp, one, m,1, work, lda, & + ierr ) + call stdlib_slascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_saxpy( m, temp1, work, 1,a( 1, q ), 1 ) + call stdlib_slascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_snrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_slassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_snrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ........................................................ + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! Safe Gram Matrix Computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_scopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_sdot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_scopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_sdot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq ) / aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_saxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + end if + else + if( d( q )>=one ) then + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_saxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_saxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + else + if( d( p )>=d( q ) ) then + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_saxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_scopy( m, a( 1, p ), 1, work,1 ) + call stdlib_slascl( 'G', 0, 0, aapp, one,m, 1, work, lda,& + ierr ) + call stdlib_slascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_saxpy( m, temp1, work, 1,a( 1, q ), 1 ) + + call stdlib_slascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_scopy( m, a( 1, q ), 1, work,1 ) + call stdlib_slascl( 'G', 0, 0, aaqq, one,m, 1, work, lda,& + ierr ) + call stdlib_slascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*d( q ) / d( p ) + call stdlib_saxpy( m, temp1, work, 1,a( 1, p ), 1 ) + + call stdlib_slascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_snrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_slassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_snrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_snrm2( m, a( 1, n ), 1 )*d( n ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*d( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:) reaching this point means that the procedure has completed the given + ! number of iterations. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means that during the i-th sweep all pivots were + ! below the given tolerance, causing early exit. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector d. + do p = 1, n - 1 + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = d( p ) + d( p ) = d( q ) + d( q ) = temp1 + call stdlib_sswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_sgsvj0 + + !> SGSVJ1: is called from SGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> SGSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + + pure subroutine stdlib_sgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(sp), intent(in) :: eps, sfmin, tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep + character, intent(in) :: jobv + ! Array Arguments + real(sp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*) + real(sp), intent(out) :: work(lwork) + ! ===================================================================== + + ! Local Scalars + real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, & + mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, & + thsign + integer(ilp) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & + notrot, nblc, nblr, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Local Arrays + real(sp) :: fastr(5) + ! Intrinsic Functions + intrinsic :: abs,max,float,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( n1<0 ) then + info = -4 + else if( ldazero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! Safe Gram Matrix Computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_scopy( m, a( 1, p ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aapp, d( p ),m, 1, work, lda,& + ierr ) + aapq = stdlib_sdot( m, work, 1, a( 1, q ),1 )*d( q ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_sdot( m, a( 1, p ), 1, a( 1,q ), 1 )*d( p )& + *d( q ) / aaqq )/ aapp + else + call stdlib_scopy( m, a( 1, q ), 1, work, 1 ) + call stdlib_slascl( 'G', 0, 0, aaqq, d( q ),m, 1, work, lda,& + ierr ) + aapq = stdlib_sdot( m, work, 1, a( 1, p ),1 )*d( p ) / & + aapp + end if + end if + mxaapq = max( mxaapq, abs( aapq ) ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq )>tol ) then + notrot = 0 + ! rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq ) / aapq + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + fastr( 3 ) = t*d( p ) / d( q ) + fastr( 4 ) = -t*d( q ) / d( p ) + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1, fastr ) + + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1,v( 1, q ),& + 1,fastr ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) + apoaq = d( p ) / d( q ) + aqoap = d( q ) / d( p ) + if( d( p )>=one ) then + if( d( q )>=one ) then + fastr( 3 ) = t*apoaq + fastr( 4 ) = -t*aqoap + d( p ) = d( p )*cs + d( q ) = d( q )*cs + call stdlib_srotm( m, a( 1, p ), 1,a( 1, q ), 1,& + fastr ) + if( rsvec )call stdlib_srotm( mvl,v( 1, p ), 1, v( & + 1, q ),1, fastr ) + else + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( 1, & + p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,a( & + 1, q ), 1 ) + if( rsvec ) then + call stdlib_saxpy( mvl, -t*aqoap,v( 1, q ), 1,v(& + 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ), 1,& + v( 1, q ), 1 ) + end if + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + end if + else + if( d( q )>=one ) then + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1, q & + ), 1 ) + call stdlib_saxpy( m, -cs*sn*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + if( rsvec ) then + call stdlib_saxpy( mvl, t*apoaq,v( 1, p ), 1,v( & + 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q ), & + 1,v( 1, p ), 1 ) + end if + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + else + if( d( p )>=d( q ) ) then + call stdlib_saxpy( m, -t*aqoap,a( 1, q ), 1,a( & + 1, p ), 1 ) + call stdlib_saxpy( m, cs*sn*apoaq,a( 1, p ), 1,& + a( 1, q ), 1 ) + d( p ) = d( p )*cs + d( q ) = d( q ) / cs + if( rsvec ) then + call stdlib_saxpy( mvl,-t*aqoap,v( 1, q ), 1,& + v( 1, p ), 1 ) + call stdlib_saxpy( mvl,cs*sn*apoaq,v( 1, p ),& + 1,v( 1, q ), 1 ) + end if + else + call stdlib_saxpy( m, t*apoaq,a( 1, p ), 1,a( 1,& + q ), 1 ) + call stdlib_saxpy( m,-cs*sn*aqoap,a( 1, q ), 1,& + a( 1, p ), 1 ) + d( p ) = d( p ) / cs + d( q ) = d( q )*cs + if( rsvec ) then + call stdlib_saxpy( mvl,t*apoaq, v( 1, p ),1, & + v( 1, q ), 1 ) + call stdlib_saxpy( mvl,-cs*sn*aqoap,v( 1, q )& + , 1,v( 1, p ), 1 ) + end if + end if + end if + end if + end if + else + if( aapp>aaqq ) then + call stdlib_scopy( m, a( 1, p ), 1, work,1 ) + call stdlib_slascl( 'G', 0, 0, aapp, one,m, 1, work, lda,& + ierr ) + call stdlib_slascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + temp1 = -aapq*d( p ) / d( q ) + call stdlib_saxpy( m, temp1, work, 1,a( 1, q ), 1 ) + + call stdlib_slascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_scopy( m, a( 1, q ), 1, work,1 ) + call stdlib_slascl( 'G', 0, 0, aaqq, one,m, 1, work, lda,& + ierr ) + call stdlib_slascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + temp1 = -aapq*d( q ) / d( p ) + call stdlib_saxpy( m, temp1, work, 1,a( 1, p ), 1 ) + + call stdlib_slascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q) + ! .. recompute sva(q) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_snrm2( m, a( 1, q ), 1 )*d( q ) + else + t = zero + aaqq = one + call stdlib_slassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq )*d( q ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_snrm2( m, a( 1, p ), 1 )*d( p ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp )*d( p ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + ! if ( notrot >= emptsw ) go to 2011 + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapp= emptsw ) go to 2011 + end if + end do loop_2100 + ! end of the p-loop + end do loop_2010 + ! end of the jbc-loop + 2011 continue + ! 2011 bailed out of the jbc-loop + do p = igl, min( igl+kbl-1, n ) + sva( p ) = abs( sva( p ) ) + end do + ! ** if ( notrot >= emptsw ) go to 1994 + end do loop_2000 + ! 2000 :: end of the ibr-loop + ! .. update sva(n) + if( ( sva( n )rootsfmin ) )then + sva( n ) = stdlib_snrm2( m, a( 1, n ), 1 )*d( n ) + else + t = zero + aapp = one + call stdlib_slassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp )*d( n ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:) reaching this point means that the procedure has completed the given + ! number of sweeps. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means that during the i-th sweep all pivots were + ! below the given threshold, causing early exit. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector d + do p = 1, n - 1 + q = stdlib_isamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + temp1 = d( p ) + d( p ) = d( q ) + d( q ) = temp1 + call stdlib_sswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_sgsvj1 + + !> SHSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_shseqr( job, compz, n, ilo, ihi, h, ldh, wr, wi, z,ldz, work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + character, intent(in) :: compz, job + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), z(ldz,*) + real(sp), intent(out) :: wi(*), work(*), wr(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: nl = 49 + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_slahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== nl allocates some local workspace to help small matrices + ! . through a rare stdlib_slahqr failure. nl > ntiny = 15 is + ! . required and nl <= nmin = stdlib_ilaenv(ispec=12,...) is recom- + ! . mended. (the default value of nmin is 75.) using nl = 49 + ! . allows up to six simultaneous shifts and a 16-by-16 + ! . deflation window. ==== + + + ! Local Arrays + real(sp) :: hl(nl,nl), workl(nl) + ! Local Scalars + integer(ilp) :: i, kbot, nmin + logical(lk) :: initz, lquery, wantt, wantz + ! Intrinsic Functions + intrinsic :: max,min,real + ! Executable Statements + ! ==== decode and check the input parameters. ==== + wantt = stdlib_lsame( job, 'S' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + work( 1 ) = real( max( 1, n ),KIND=sp) + lquery = lwork==-1 + info = 0 + if( .not.stdlib_lsame( job, 'E' ) .and. .not.wantt ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( ldhnmin ) then + call stdlib_slaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo,ihi, z, ldz, & + work, lwork, info ) + else + ! ==== small matrix ==== + call stdlib_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo,ihi, z, ldz, & + info ) + if( info>0 ) then + ! ==== a rare stdlib_slahqr failure! stdlib_slaqr0 sometimes succeeds + ! . when stdlib_slahqr fails. ==== + kbot = info + if( n>=nl ) then + ! ==== larger matrices have enough subdiagonal scratch + ! . space to call stdlib_slaqr0 directly. ==== + call stdlib_slaqr0( wantt, wantz, n, ilo, kbot, h, ldh, wr,wi, ilo, ihi, z,& + ldz, work, lwork, info ) + else + ! ==== tiny matrices don't have enough subdiagonal + ! . scratch space to benefit from stdlib_slaqr0. hence, + ! . tiny matrices must be copied into a larger + ! . array before calling stdlib_slaqr0. ==== + call stdlib_slacpy( 'A', n, n, h, ldh, hl, nl ) + hl( n+1, n ) = zero + call stdlib_slaset( 'A', nl, nl-n, zero, zero, hl( 1, n+1 ),nl ) + call stdlib_slaqr0( wantt, wantz, nl, ilo, kbot, hl, nl, wr,wi, ilo, ihi, & + z, ldz, workl, nl, info ) + if( wantt .or. info/=0 )call stdlib_slacpy( 'A', n, n, hl, nl, h, ldh ) + + end if + end if + end if + ! ==== clear out the trash, if necessary. ==== + if( ( wantt .or. info/=0 ) .and. n>2 )call stdlib_slaset( 'L', n-2, n-2, zero, zero,& + h( 3, 1 ), ldh ) + ! ==== ensure reported workspace size is backward-compatible with + ! . previous lapack versions. ==== + work( 1 ) = max( real( max( 1, n ),KIND=sp), work( 1 ) ) + end if + end subroutine stdlib_shseqr + + !> SLALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, SLALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by SLALSA. + + pure subroutine stdlib_slalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, difl,& + difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, work,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldb, ldbx, ldgcol, ldu, n, nrhs, smlsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), givptr(*), k(*), perm(ldgcol,*) + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: b(ldb,*) + real(sp), intent(out) :: bx(ldbx,*), work(*) + real(sp), intent(in) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), s(& + *), u(ldu,*), vt(ldu,*), z(ldu,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, im1, inode, j, lf, ll, lvl, lvl2, nd, ndb1, ndiml, ndimr, & + nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqre + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n SLALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_slalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, iwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: ldb, n, nrhs, smlsiz + real(sp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: b(ldb,*), d(*), e(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: bx, bxst, c, difl, difr, givcol, givnum, givptr, i, icmpq1, icmpq2, & + iwk, j, k, nlvl, nm1, nsize, nsub, nwork, perm, poles, s, sizei, smlszp, sqre, st, st1,& + u, vt, z + real(sp) :: cs, eps, orgnrm, r, rcnd, sn, tol + ! Intrinsic Functions + intrinsic :: abs,int,log,real,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -3 + else if( nrhs<1 ) then + info = -4 + else if( ( ldb<1 ) .or. ( ldb=one ) ) then + rcnd = eps + else + rcnd = rcond + end if + rank = 0 + ! quick return if possible. + if( n==0 ) then + return + else if( n==1 ) then + if( d( 1 )==zero ) then + call stdlib_slaset( 'A', 1, nrhs, zero, zero, b, ldb ) + else + rank = 1 + call stdlib_slascl( 'G', 0, 0, d( 1 ), one, 1, nrhs, b, ldb, info ) + d( 1 ) = abs( d( 1 ) ) + end if + return + end if + ! rotate the matrix if it is lower bidiagonal. + if( uplo=='L' ) then + do i = 1, n - 1 + call stdlib_slartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( nrhs==1 ) then + call stdlib_srot( 1, b( i, 1 ), 1, b( i+1, 1 ), 1, cs, sn ) + else + work( i*2-1 ) = cs + work( i*2 ) = sn + end if + end do + if( nrhs>1 ) then + do i = 1, nrhs + do j = 1, n - 1 + cs = work( j*2-1 ) + sn = work( j*2 ) + call stdlib_srot( 1, b( j, i ), 1, b( j+1, i ), 1, cs, sn ) + end do + end do + end if + end if + ! scale. + nm1 = n - 1 + orgnrm = stdlib_slanst( 'M', n, d, e ) + if( orgnrm==zero ) then + call stdlib_slaset( 'A', n, nrhs, zero, zero, b, ldb ) + return + end if + call stdlib_slascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, nm1, 1, e, nm1, info ) + ! if n is smaller than the minimum divide size smlsiz, then solve + ! the problem with another solver. + if( n<=smlsiz ) then + nwork = 1 + n*n + call stdlib_slaset( 'A', n, n, zero, one, work, n ) + call stdlib_slasdq( 'U', 0, n, n, 0, nrhs, d, e, work, n, work, n, b,ldb, work( & + nwork ), info ) + if( info/=0 ) then + return + end if + tol = rcnd*abs( d( stdlib_isamax( n, d, 1 ) ) ) + do i = 1, n + if( d( i )<=tol ) then + call stdlib_slaset( 'A', 1, nrhs, zero, zero, b( i, 1 ), ldb ) + else + call stdlib_slascl( 'G', 0, 0, d( i ), one, 1, nrhs, b( i, 1 ),ldb, info ) + + rank = rank + 1 + end if + end do + call stdlib_sgemm( 'T', 'N', n, nrhs, n, one, work, n, b, ldb, zero,work( nwork ), & + n ) + call stdlib_slacpy( 'A', n, nrhs, work( nwork ), n, b, ldb ) + ! unscale. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_slasrt( 'D', n, d, info ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end if + ! book-keeping and setting up some constants. + nlvl = int( log( real( n,KIND=sp) / real( smlsiz+1,KIND=sp) ) / log( two ),KIND=ilp) + & + 1 + smlszp = smlsiz + 1 + u = 1 + vt = 1 + smlsiz*n + difl = vt + smlszp*n + difr = difl + nlvl*n + z = difr + nlvl*n*2 + c = z + nlvl*n + s = c + n + poles = s + n + givnum = poles + 2*nlvl*n + bx = givnum + 2*nlvl*n + nwork = bx + n*nrhs + sizei = 1 + n + k = sizei + n + givptr = k + n + perm = givptr + n + givcol = perm + nlvl*n + iwk = givcol + nlvl*n*2 + st = 1 + sqre = 0 + icmpq1 = 1 + icmpq2 = 0 + nsub = 0 + do i = 1, n + if( abs( d( i ) )=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - st + 1 + iwork( sizei+nsub-1 ) = nsize + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n), which is not solved + ! explicitly. + nsize = i - st + 1 + iwork( sizei+nsub-1 ) = nsize + nsub = nsub + 1 + iwork( nsub ) = n + iwork( sizei+nsub-1 ) = 1 + call stdlib_scopy( nrhs, b( n, 1 ), ldb, work( bx+nm1 ), n ) + end if + st1 = st - 1 + if( nsize==1 ) then + ! this is a 1-by-1 subproblem and is not solved + ! explicitly. + call stdlib_scopy( nrhs, b( st, 1 ), ldb, work( bx+st1 ), n ) + else if( nsize<=smlsiz ) then + ! this is a small subproblem and is solved by stdlib_slasdq. + call stdlib_slaset( 'A', nsize, nsize, zero, one,work( vt+st1 ), n ) + call stdlib_slasdq( 'U', 0, nsize, nsize, 0, nrhs, d( st ),e( st ), work( vt+& + st1 ), n, work( nwork ),n, b( st, 1 ), ldb, work( nwork ), info ) + if( info/=0 ) then + return + end if + call stdlib_slacpy( 'A', nsize, nrhs, b( st, 1 ), ldb,work( bx+st1 ), n ) + + else + ! a large problem. solve it using divide and conquer. + call stdlib_slasda( icmpq1, smlsiz, nsize, sqre, d( st ),e( st ), work( u+st1 & + ), n, work( vt+st1 ),iwork( k+st1 ), work( difl+st1 ),work( difr+st1 ), work( & + z+st1 ),work( poles+st1 ), iwork( givptr+st1 ),iwork( givcol+st1 ), n, iwork( & + perm+st1 ),work( givnum+st1 ), work( c+st1 ),work( s+st1 ), work( nwork ), & + iwork( iwk ),info ) + if( info/=0 ) then + return + end if + bxst = bx + st1 + call stdlib_slalsa( icmpq2, smlsiz, nsize, nrhs, b( st, 1 ),ldb, work( bxst ),& + n, work( u+st1 ), n,work( vt+st1 ), iwork( k+st1 ),work( difl+st1 ), work( & + difr+st1 ),work( z+st1 ), work( poles+st1 ),iwork( givptr+st1 ), iwork( & + givcol+st1 ), n,iwork( perm+st1 ), work( givnum+st1 ),work( c+st1 ), work( s+& + st1 ), work( nwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + st = i + 1 + end if + end do loop_60 + ! apply the singular values and treat the tiny ones as zero. + tol = rcnd*abs( d( stdlib_isamax( n, d, 1 ) ) ) + do i = 1, n + ! some of the elements in d can be negative because 1-by-1 + ! subproblems were not solved explicitly. + if( abs( d( i ) )<=tol ) then + call stdlib_slaset( 'A', 1, nrhs, zero, zero, work( bx+i-1 ), n ) + else + rank = rank + 1 + call stdlib_slascl( 'G', 0, 0, d( i ), one, 1, nrhs,work( bx+i-1 ), n, info ) + + end if + d( i ) = abs( d( i ) ) + end do + ! now apply back the right singular vectors. + icmpq2 = 1 + do i = 1, nsub + st = iwork( i ) + st1 = st - 1 + nsize = iwork( sizei+i-1 ) + bxst = bx + st1 + if( nsize==1 ) then + call stdlib_scopy( nrhs, work( bxst ), n, b( st, 1 ), ldb ) + else if( nsize<=smlsiz ) then + call stdlib_sgemm( 'T', 'N', nsize, nrhs, nsize, one,work( vt+st1 ), n, work( & + bxst ), n, zero,b( st, 1 ), ldb ) + else + call stdlib_slalsa( icmpq2, smlsiz, nsize, nrhs, work( bxst ), n,b( st, 1 ), ldb,& + work( u+st1 ), n,work( vt+st1 ), iwork( k+st1 ),work( difl+st1 ), work( difr+& + st1 ),work( z+st1 ), work( poles+st1 ),iwork( givptr+st1 ), iwork( givcol+st1 ),& + n,iwork( perm+st1 ), work( givnum+st1 ),work( c+st1 ), work( s+st1 ), work( & + nwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + end do + ! unscale and sort the singular values. + call stdlib_slascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_slasrt( 'D', n, d, info ) + call stdlib_slascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end subroutine stdlib_slalsd + + !> SLAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_slaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), z(ldz,*) + real(sp), intent(out) :: wi(*), work(*), wr(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(sp), parameter :: wilk1 = 0.75_sp + real(sp), parameter :: wilk2 = -0.4375_sp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_slahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constants wilk1 and wilk2 are used to form the + ! . exceptional shifts. ==== + + + ! Local Scalars + real(sp) :: aa, bb, cc, cs, dd, sn, ss, swap + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + real(sp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,int,max,min,mod,real + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = one + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_slahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & + ihiz, z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_slaqr3 ==== + call stdlib_slaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_slaqr5, stdlib_slaqr3) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=sp) + return + end if + ! ==== stdlib_slahqr/stdlib_slaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_80: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_slaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, wr, wi, h( kv, 1 ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh,& + work, lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_slaqr3 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_slaqr3 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, max( ks+1, ktop+2 ), -2 + ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + aa = wilk1*ss + h( i, i ) + bb = ss + cc = wilk2*ss + dd = aa + call stdlib_slanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & + ), cs, sn ) + end do + if( ks==ktop ) then + wr( ks+1 ) = h( ks+1, ks+1 ) + wi( ks+1 ) = zero + wr( ks ) = wr( ks+1 ) + wi( ks ) = wi( ks+1 ) + end if + else + ! ==== got ns/2 or fewer shifts? use stdlib_slaqr4 or + ! . stdlib_slahqr on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_slacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + if( ns>nmin ) then + call stdlib_slaqr4( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( & + ks ),wi( ks ), 1, 1, zdum, 1, work,lwork, inf ) + else + call stdlib_slahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( & + ks ),wi( ks ), 1, 1, zdum, 1, inf ) + end if + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. ==== + if( ks>=kbot ) then + aa = h( kbot-1, kbot-1 ) + cc = h( kbot, kbot-1 ) + bb = h( kbot-1, kbot ) + dd = h( kbot, kbot ) + call stdlib_slanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & + kbot ),wi( kbot ), cs, sn ) + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) + ! . bubble sort keeps complex conjugate + ! . pairs together. ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( abs( wr( i ) )+abs( wi( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_80 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 90 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = real( lwkopt,KIND=sp) + end subroutine stdlib_slaqr0 + + !> Aggressive early deflation: + !> SLAQR3: accepts as input an upper Hessenberg matrix + !> H and performs an orthogonal similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an orthogonal similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + subroutine stdlib_slaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& + sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), z(ldz,*) + real(sp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + + ! Local Scalars + real(sp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & + tau, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & + ltop, lwk1, lwk2, lwk3, lwkopt, nmin + logical(lk) :: bulge, sorted + ! Intrinsic Functions + intrinsic :: abs,int,max,min,real,sqrt + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_sgehrd ==== + call stdlib_sgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_sormhr ==== + call stdlib_sormhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_slaqr4 ==== + call stdlib_slaqr4( .true., .true., jw, 1, jw, t, ldt, sr, si, 1, jw,v, ldv, work, -& + 1, infqr ) + lwk3 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=sp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = one + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one / safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = zero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sr( kwtop ) = h( kwtop, kwtop ) + si( kwtop ) = zero + ns = 1 + nd = 0 + if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero + end if + work( 1 ) = one + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_slacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_scopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_slaset( 'A', jw, jw, zero, one, v, ldv ) + nmin = stdlib_ilaenv( 12, 'SLAQR3', 'SV', jw, 1, jw, lwork ) + if( jw>nmin ) then + call stdlib_slaqr4( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, & + jw, v, ldv, work, lwork, infqr ) + else + call stdlib_slahqr( .true., .true., jw, 1, jw, t, ldt, sr( kwtop ),si( kwtop ), 1, & + jw, v, ldv, infqr ) + end if + ! ==== stdlib_strexc needs a clean margin near the diagonal ==== + do j = 1, jw - 3 + t( j+2, j ) = zero + t( j+3, j ) = zero + end do + if( jw>2 )t( jw, jw-2 ) = zero + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + 20 continue + if( ilst<=ns ) then + if( ns==1 ) then + bulge = .false. + else + bulge = t( ns, ns-1 )/=zero + end if + ! ==== small spike tip test for deflation ==== + if( .not. bulge ) then + ! ==== real eigenvalue ==== + foo = abs( t( ns, ns ) ) + if( foo==zero )foo = abs( s ) + if( abs( s*v( 1, ns ) )<=max( smlnum, ulp*foo ) ) then + ! ==== deflatable ==== + ns = ns - 1 + else + ! ==== undeflatable. move it up out of the way. + ! . (stdlib_strexc can not fail in this case.) ==== + ifst = ns + call stdlib_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 1 + end if + else + ! ==== complex conjugate pair ==== + foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & + ) ) + if( foo==zero )foo = abs( s ) + if( max( abs( s*v( 1, ns ) ), abs( s*v( 1, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & + then + ! ==== deflatable ==== + ns = ns - 2 + else + ! ==== undeflatable. move them up out of the way. + ! . fortunately, stdlib_strexc does the right thing with + ! . ilst in case of a rare exchange failure. ==== + ifst = ns + call stdlib_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + ilst = ilst + 2 + end if + end if + ! ==== end deflation detection loop ==== + go to 20 + end if + ! ==== return to hessenberg form ==== + if( ns==0 )s = zero + if( ns=evk ) then + i = k + else + sorted = .false. + ifst = i + ilst = k + call stdlib_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) + if( info==0 ) then + i = ilst + else + i = k + end if + end if + if( i==kend ) then + k = i + 1 + else if( t( i+1, i )==zero ) then + k = i + 1 + else + k = i + 2 + end if + go to 40 + end if + go to 30 + 50 continue + end if + ! ==== restore shift/eigenvalue array from t ==== + i = jw + 60 continue + if( i>=infqr+1 ) then + if( i==infqr+1 ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else if( t( i, i-1 )==zero ) then + sr( kwtop+i-1 ) = t( i, i ) + si( kwtop+i-1 ) = zero + i = i - 1 + else + aa = t( i-1, i-1 ) + cc = t( i, i-1 ) + bb = t( i-1, i ) + dd = t( i, i ) + call stdlib_slanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& + 1 ),si( kwtop+i-1 ), cs, sn ) + i = i - 2 + end if + go to 60 + end if + if( ns1 .and. s/=zero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_scopy( ns, v, ldv, work, 1 ) + beta = work( 1 ) + call stdlib_slarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = one + call stdlib_slaset( 'L', jw-2, jw-2, zero, zero, t( 3, 1 ), ldt ) + call stdlib_slarf( 'L', ns, jw, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_slarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_slarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_sgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*v( 1, 1 ) + call stdlib_slacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_scopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=zero )call stdlib_sormhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_sgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & + zero, wv, ldwv ) + call stdlib_slacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_sgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & + zero, t, ldt ) + call stdlib_slacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_sgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & + zero, wv, ldwv ) + call stdlib_slacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = real( lwkopt,KIND=sp) + end subroutine stdlib_slaqr3 + + !> SLAQR4: implements one level of recursion for SLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by SLAQR0 and, for large enough + !> deflation window size, it may be called by SLAQR3. This + !> subroutine is identical to SLAQR0 except that it calls SLAQR2 + !> instead of SLAQR3. + !> SLAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**T, where T is an upper quasi-triangular matrix (the + !> Schur form), and Z is the orthogonal matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input orthogonal + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + subroutine stdlib_slaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + real(sp), intent(inout) :: h(ldh,*), z(ldz,*) + real(sp), intent(out) :: wi(*), work(*), wr(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(sp), parameter :: wilk1 = 0.75_sp + real(sp), parameter :: wilk2 = -0.4375_sp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_slahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constants wilk1 and wilk2 are used to form the + ! . exceptional shifts. ==== + + + ! Local Scalars + real(sp) :: aa, bb, cc, cs, dd, sn, ss, swap + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + real(sp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,int,max,min,mod,real + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = one + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_slahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & + ihiz, z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_slaqr2 ==== + call stdlib_slaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_slaqr5, stdlib_slaqr2) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = real( lwkopt,KIND=sp) + return + end if + ! ==== stdlib_slahqr/stdlib_slaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_80: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_slaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, wr, wi, h( kv, 1 ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh,& + work, lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_slaqr2 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_slaqr2 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, max( ks+1, ktop+2 ), -2 + ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) + aa = wilk1*ss + h( i, i ) + bb = ss + cc = wilk2*ss + dd = aa + call stdlib_slanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & + ), cs, sn ) + end do + if( ks==ktop ) then + wr( ks+1 ) = h( ks+1, ks+1 ) + wi( ks+1 ) = zero + wr( ks ) = wr( ks+1 ) + wi( ks ) = wi( ks+1 ) + end if + else + ! ==== got ns/2 or fewer shifts? use stdlib_slahqr + ! . on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_slacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + call stdlib_slahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, wr( ks & + ), wi( ks ),1, 1, zdum, 1, inf ) + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. ==== + if( ks>=kbot ) then + aa = h( kbot-1, kbot-1 ) + cc = h( kbot, kbot-1 ) + bb = h( kbot-1, kbot ) + dd = h( kbot, kbot ) + call stdlib_slanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & + kbot ),wi( kbot ), cs, sn ) + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) + ! . bubble sort keeps complex conjugate + ! . pairs together. ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( abs( wr( i ) )+abs( wi( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_80 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 90 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = real( lwkopt,KIND=sp) + end subroutine stdlib_slaqr4 + + !> SLAQZ0: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**T, B = Q1*T*Z1**T, + !> as computed by SGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**T, T = Q*P*Z**T, + !> where Q and Z are orthogonal matrices, P is an upper triangular + !> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 + !> diagonal blocks. + !> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair + !> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of + !> eigenvalues. + !> Additionally, the 2-by-2 upper triangular diagonal blocks of P + !> corresponding to 2-by-2 blocks of S are reduced to positive diagonal + !> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, + !> P(j,j) > 0, and P(j+1,j+1) > 0. + !> Optionally, the orthogonal matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> orthogonal matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the orthogonal factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Real eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + + recursive subroutine stdlib_slaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, & + alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) + ! arguments + character, intent( in ) :: wants, wantq, wantz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec + integer(ilp), intent( out ) :: info + real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), alphar(& + * ), alphai( * ), beta( * ), work( * ) + + ! local scalars + real(sp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap + integer(ilp) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& + nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & + istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & + rcost, i + logical(lk) :: ilschur, ilq, ilz + character :: jbcmpz*3 + if( stdlib_lsame( wants, 'E' ) ) then + ilschur = .false. + iwants = 1 + else if( stdlib_lsame( wants, 'S' ) ) then + ilschur = .true. + iwants = 2 + else + iwants = 0 + end if + if( stdlib_lsame( wantq, 'N' ) ) then + ilq = .false. + iwantq = 1 + else if( stdlib_lsame( wantq, 'V' ) ) then + ilq = .true. + iwantq = 2 + else if( stdlib_lsame( wantq, 'I' ) ) then + ilq = .true. + iwantq = 3 + else + iwantq = 0 + end if + if( stdlib_lsame( wantz, 'N' ) ) then + ilz = .false. + iwantz = 1 + else if( stdlib_lsame( wantz, 'V' ) ) then + ilz = .true. + iwantz = 2 + else if( stdlib_lsame( wantz, 'I' ) ) then + ilz = .true. + iwantz = 3 + else + iwantz = 0 + end if + ! check argument values + info = 0 + if( iwants==0 ) then + info = -1 + else if( iwantq==0 ) then + info = -2 + else if( iwantz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi= 2 ) then + call stdlib_shgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,& + beta, q, ldq, z, ldz, work,lwork, info ) + return + end if + ! find out required workspace + ! workspace query to stdlib_slaqz3 + nw = max( nwr, nmin ) + call stdlib_slaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & + n_undeflated, n_deflated, alphar,alphai, beta, work, nw, work, nw, work, -1, rec,& + aed_info ) + itemp1 = int( work( 1 ),KIND=ilp) + ! workspace query to stdlib_slaqz4 + call stdlib_slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, & + lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1, sweep_info ) + itemp2 = int( work( 1 ),KIND=ilp) + lworkreq = max( itemp1+2*nw**2, itemp2+2*nbr**2 ) + if ( lwork ==-1 ) then + work( 1 ) = real( lworkreq,KIND=sp) + return + else if ( lwork < lworkreq ) then + info = -19 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLAQZ0', info ) + return + end if + ! initialize q and z + if( iwantq==3 ) call stdlib_slaset( 'FULL', n, n, zero, one, q, ldq ) + if( iwantz==3 ) call stdlib_slaset( 'FULL', n, n, zero, one, z, ldz ) + ! get machine constants + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp)/ulp ) + istart = ilo + istop = ihi + maxit = 3*( ihi-ilo+1 ) + ld = 0 + do iiter = 1, maxit + if( iiter >= maxit ) then + info = istop+1 + goto 80 + end if + if ( istart+1 >= istop ) then + istop = istart + exit + end if + ! check deflations at the end + if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+& + abs( a( istop-2,istop-2 ) ) ) ) ) then + a( istop-1, istop-2 ) = zero + istop = istop-2 + ld = 0 + eshift = zero + else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+& + abs( a( istop-1,istop-1 ) ) ) ) ) then + a( istop, istop-1 ) = zero + istop = istop-1 + ld = 0 + eshift = zero + end if + ! check deflations at the start + if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 & + ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then + a( istart+2, istart+1 ) = zero + istart = istart+2 + ld = 0 + eshift = zero + else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )& + )+abs( a( istart+1,istart+1 ) ) ) ) ) then + a( istart+1, istart ) = zero + istart = istart+1 + ld = 0 + eshift = zero + end if + if ( istart+1 >= istop ) then + exit + end if + ! check interior deflations + istart2 = istart + do k = istop, istart+1, -1 + if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & + ) ) ) ) then + a( k, k-1 ) = zero + istart2 = k + exit + end if + end do + ! get range to apply rotations to + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = istart2 + istopm = istop + end if + ! check infinite eigenvalues, this is done without blocking so might + ! slow down the method when many infinite eigenvalues are present + k = istop + do while ( k>=istart2 ) + temp = zero + if( k < istop ) then + temp = temp+abs( b( k, k+1 ) ) + end if + if( k > istart2 ) then + temp = temp+abs( b( k-1, k ) ) + end if + if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then + ! a diagonal element of b is negligable, move it + ! to the top and deflate it + do k2 = k, istart2+1, -1 + call stdlib_slartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) + b( k2-1, k2 ) = temp + b( k2-1, k2-1 ) = zero + call stdlib_srot( k2-2-istartm+1, b( istartm, k2 ), 1,b( istartm, k2-1 ), & + 1, c1, s1 ) + call stdlib_srot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1, a( & + istartm, k2-1 ), 1, c1, s1 ) + if ( ilz ) then + call stdlib_srot( n, z( 1, k2 ), 1, z( 1, k2-1 ), 1, c1,s1 ) + end if + if( k2= istop ) then + istop = istart2-1 + ld = 0 + eshift = zero + cycle + end if + nw = nwr + nshifts = nsr + nblock = nbr + if ( istop-istart2+1 < nmin ) then + ! setting nw to the size of the subblock will make aed deflate + ! all the eigenvalues. this is slightly more efficient than just + ! using qz_small because the off diagonal part gets updated via blas. + if ( istop-istart+1 < nmin ) then + nw = istop-istart+1 + istart2 = istart + else + nw = istop-istart2+1 + end if + end if + ! time for aed + call stdlib_slaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& + z, ldz, n_undeflated, n_deflated,alphar, alphai, beta, work, nw, work( nw**2+1 ),& + nw, work( 2*nw**2+1 ), lwork-2*nw**2, rec,aed_info ) + if ( n_deflated > 0 ) then + istop = istop-n_deflated + ld = 0 + eshift = zero + end if + if ( 100*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & + ) then + ! aed has uncovered many eigenvalues. skip a qz sweep and run + ! aed again. + cycle + end if + ld = ld+1 + ns = min( nshifts, istop-istart2 ) + ns = min( ns, n_undeflated ) + shiftpos = istop-n_deflated-n_undeflated+1 + ! shuffle shifts to put double shifts in front + ! this ensures that we don't split up a double shift + do i = shiftpos, shiftpos+n_undeflated-1, 2 + if( alphai( i )/=-alphai( i+1 ) ) then + swap = alphar( i ) + alphar( i ) = alphar( i+1 ) + alphar( i+1 ) = alphar( i+2 ) + alphar( i+2 ) = swap + swap = alphai( i ) + alphai( i ) = alphai( i+1 ) + alphai( i+1 ) = alphai( i+2 ) + alphai( i+2 ) = swap + swap = beta( i ) + beta( i ) = beta( i+1 ) + beta( i+1 ) = beta( i+2 ) + beta( i+2 ) = swap + end if + end do + if ( mod( ld, 6 ) == 0 ) then + ! exceptional shift. chosen for no particularly good reason. + if( ( real( maxit,KIND=sp)*safmin )*abs( a( istop,istop-1 ) ) SLAQZ3: performs AED + + recursive subroutine stdlib_slaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & + ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info ) + ! arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & + rec + real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), alphar(& + * ), alphai( * ), beta( * ) + integer(ilp), intent( out ) :: ns, nd, info + real(sp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) + real(sp), intent(out) :: work(*) + + ! local scalars + logical(lk) :: bulge + integer(ilp) :: jw, kwtop, kwbot, istopm, istartm, k, k2, stgexc_info, ifst, ilst, & + lworkreq, qz_small_info + real(sp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp + info = 0 + ! set up deflation window + jw = min( nw, ihi-ilo+1 ) + kwtop = ihi-jw+1 + if ( kwtop == ilo ) then + s = zero + else + s = a( kwtop, kwtop-1 ) + end if + ! determine required workspace + ifst = 1 + ilst = jw + call stdlib_stgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, & + work, -1, stgexc_info ) + lworkreq = int( work( 1 ),KIND=ilp) + call stdlib_slaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1, rec+1, qz_small_info ) + lworkreq = max( lworkreq, int( work( 1 ),KIND=ilp)+2*jw**2 ) + lworkreq = max( lworkreq, n*nw, 2*nw**2+n ) + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = lworkreq + return + else if ( lwork < lworkreq ) then + info = -26 + end if + if( info/=0 ) then + call stdlib_xerbla( 'SLAQZ3', -info ) + return + end if + ! get machine constants + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_slabad( safmin, safmax ) + ulp = stdlib_slamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=sp)/ulp ) + if ( ihi == kwtop ) then + ! 1 by 1 deflation window, just try a regular deflation + alphar( kwtop ) = a( kwtop, kwtop ) + alphai( kwtop ) = zero + beta( kwtop ) = b( kwtop, kwtop ) + ns = 1 + nd = 0 + if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if ( kwtop > ilo ) then + a( kwtop, kwtop-1 ) = zero + end if + end if + end if + ! store window in case of convergence failure + call stdlib_slacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) + call stdlib_slacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2+1 ), jw ) + + ! transform window to real schur form + call stdlib_slaset( 'FULL', jw, jw, zero, one, qc, ldqc ) + call stdlib_slaset( 'FULL', jw, jw, zero, one, zc, ldzc ) + call stdlib_slaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2*jw**2+1 ), lwork-2*jw**2,rec+1, & + qz_small_info ) + if( qz_small_info /= 0 ) then + ! convergence failure, restore the window and exit + nd = 0 + ns = jw-qz_small_info + call stdlib_slacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) + call stdlib_slacpy( 'ALL', jw, jw, work( jw**2+1 ), jw, b( kwtop,kwtop ), ldb ) + + return + end if + ! deflation detection loop + if ( kwtop == ilo .or. s == zero ) then + kwbot = kwtop-1 + else + kwbot = ihi + k = 1 + k2 = 1 + do while ( k <= jw ) + bulge = .false. + if ( kwbot-kwtop+1 >= 2 ) then + bulge = a( kwbot, kwbot-1 ) /= zero + end if + if ( bulge ) then + ! try to deflate complex conjugate eigenvalue pair + temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( & + a( kwbot-1, kwbot ) ) ) + if( temp == zero )then + temp = abs( s ) + end if + if ( max( abs( s*qc( 1, kwbot-kwtop ) ), abs( s*qc( 1,kwbot-kwtop+1 ) ) ) <= & + max( smlnum,ulp*temp ) ) then + ! deflatable + kwbot = kwbot-2 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_stgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,stgexc_info ) + + k2 = k2+2 + end if + k = k+2 + else + ! try to deflate real eigenvalue + temp = abs( a( kwbot, kwbot ) ) + if( temp == zero ) then + temp = abs( s ) + end if + if ( ( abs( s*qc( 1, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) & + then + ! deflatable + kwbot = kwbot-1 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_stgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,stgexc_info ) + + k2 = k2+1 + end if + k = k+1 + end if + end do + end if + ! store eigenvalues + nd = ihi-kwbot + ns = jw-nd + k = kwtop + do while ( k <= ihi ) + bulge = .false. + if ( k < ihi ) then + if ( a( k+1, k ) /= zero ) then + bulge = .true. + end if + end if + if ( bulge ) then + ! 2x2 eigenvalue block + call stdlib_slag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),& + alphar( k ),alphar( k+1 ), alphai( k ) ) + alphai( k+1 ) = -alphai( k ) + k = k+2 + else + ! 1x1 eigenvalue block + alphar( k ) = a( k, k ) + alphai( k ) = zero + beta( k ) = b( k, k ) + k = k+1 + end if + end do + if ( kwtop /= ilo .and. s /= zero ) then + ! reflect spike back, this will create optimally packed bulges + a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1,1:jw-nd ) + do k = kwbot-1, kwtop, -1 + call stdlib_slartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) + a( k, kwtop-1 ) = temp + a( k+1, kwtop-1 ) = zero + k2 = max( kwtop, k-1 ) + call stdlib_srot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) + call stdlib_srot( ihi-( k-1 )+1, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) + + call stdlib_srot( jw, qc( 1, k-kwtop+1 ), 1, qc( 1, k+1-kwtop+1 ),1, c1, s1 ) + + end do + ! chase bulges down + istartm = kwtop + istopm = ihi + k = kwbot-1 + do while ( k >= kwtop ) + if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then + ! move double pole block down and remove it + do k2 = k-1, kwbot-2 + call stdlib_slaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,& + ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc ) + end do + k = k-2 + else + ! k points to single shift + do k2 = k, kwbot-2 + ! move shift down + call stdlib_slartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp ) + b( k2+1, k2+1 ) = temp + b( k2+1, k2 ) = zero + call stdlib_srot( k2+2-istartm+1, a( istartm, k2+1 ), 1,a( istartm, k2 ), & + 1, c1, s1 ) + call stdlib_srot( k2-istartm+1, b( istartm, k2+1 ), 1,b( istartm, k2 ), 1, & + c1, s1 ) + call stdlib_srot( jw, zc( 1, k2+1-kwtop+1 ), 1, zc( 1,k2-kwtop+1 ), 1, c1, & + s1 ) + call stdlib_slartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp ) + a( k2+1, k2 ) = temp + a( k2+2, k2 ) = zero + call stdlib_srot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,& + s1 ) + call stdlib_srot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,& + s1 ) + call stdlib_srot( jw, qc( 1, k2+1-kwtop+1 ), 1, qc( 1,k2+2-kwtop+1 ), 1, & + c1, s1 ) + end do + ! remove the shift + call stdlib_slartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp ) + + b( kwbot, kwbot ) = temp + b( kwbot, kwbot-1 ) = zero + call stdlib_srot( kwbot-istartm, b( istartm, kwbot ), 1,b( istartm, kwbot-1 ),& + 1, c1, s1 ) + call stdlib_srot( kwbot-istartm+1, a( istartm, kwbot ), 1,a( istartm, kwbot-1 & + ), 1, c1, s1 ) + call stdlib_srot( jw, zc( 1, kwbot-kwtop+1 ), 1, zc( 1,kwbot-1-kwtop+1 ), 1, & + c1, s1 ) + k = k-1 + end if + end do + end if + ! apply qc and zc to rest of the matrix + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + if ( istopm-ihi > 0 ) then + call stdlib_sgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), & + lda, zero, work, jw ) + call stdlib_slacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) + call stdlib_sgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), & + ldb, zero, work, jw ) + call stdlib_slacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) + end if + if ( ilq ) then + call stdlib_sgemm( 'N', 'N', n, jw, jw, one, q( 1, kwtop ), ldq, qc,ldqc, zero, & + work, n ) + call stdlib_slacpy( 'ALL', n, jw, work, n, q( 1, kwtop ), ldq ) + end if + if ( kwtop-1-istartm+1 > 0 ) then + call stdlib_sgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, & + zc, ldzc, zero, work,kwtop-istartm ) + call stdlib_slacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop & + ), lda ) + call stdlib_sgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, & + zc, ldzc, zero, work,kwtop-istartm ) + call stdlib_slacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop & + ), ldb ) + end if + if ( ilz ) then + call stdlib_sgemm( 'N', 'N', n, jw, jw, one, z( 1, kwtop ), ldz, zc,ldzc, zero, & + work, n ) + call stdlib_slacpy( 'ALL', n, jw, work, n, z( 1, kwtop ), ldz ) + end if + end subroutine stdlib_slaqz3 + + !> To find the desired eigenvalues of a given real symmetric + !> tridiagonal matrix T, SLARRE: sets any "small" off-diagonal + !> elements to zero, and for each unreduced block T_i, it finds + !> (a) a suitable shift at one end of the block's spectrum, + !> (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and + !> (c) eigenvalues of each L_i D_i L_i^T. + !> The representations and eigenvalues found are then used by + !> SSTEMR to compute the eigenvectors of T. + !> The accuracy varies depending on whether bisection is used to + !> find a few eigenvalues or the dqds algorithm (subroutine SLASQ2) to + !> conpute all and then discard any unwanted one. + !> As an added benefit, SLARRE also outputs the n + !> Gerschgorin intervals for the matrices L_i D_i L_i^T. + + pure subroutine stdlib_slarre( range, n, vl, vu, il, iu, d, e, e2,rtol1, rtol2, spltol, & + nsplit, isplit, m,w, werr, wgap, iblock, indexw, gers, pivmin,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: range + integer(ilp), intent(in) :: il, iu, n + integer(ilp), intent(out) :: info, m, nsplit + real(sp), intent(out) :: pivmin + real(sp), intent(in) :: rtol1, rtol2, spltol + real(sp), intent(inout) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: iblock(*), isplit(*), iwork(*), indexw(*) + real(sp), intent(inout) :: d(*), e(*), e2(*) + real(sp), intent(out) :: gers(*), w(*), werr(*), wgap(*), work(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: hndrd = 100.0_sp + real(sp), parameter :: pert = 4.0_sp + real(sp), parameter :: fourth = one/four + real(sp), parameter :: fac = half + real(sp), parameter :: maxgrowth = 64.0_sp + real(sp), parameter :: fudge = 2.0_sp + integer(ilp), parameter :: maxtry = 6 + integer(ilp), parameter :: allrng = 1 + integer(ilp), parameter :: indrng = 2 + integer(ilp), parameter :: valrng = 3 + + + ! Local Scalars + logical(lk) :: forceb, norep, usedqd + integer(ilp) :: cnt, cnt1, cnt2, i, ibegin, idum, iend, iinfo, in, indl, indu, irange, & + j, jblk, mb, mm, wbegin, wend + real(sp) :: avgap, bsrtol, clwdth, dmax, dpivot, eabs, emax, eold, eps, gl, gu, isleft,& + isrght, rtl, rtol, s1, s2, safmin, sgndef, sigma, spdiam, tau, tmp, tmp1 + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=0 ) then + return + end if + ! decode range + if( stdlib_lsame( range, 'A' ) ) then + irange = allrng + else if( stdlib_lsame( range, 'V' ) ) then + irange = valrng + else if( stdlib_lsame( range, 'I' ) ) then + irange = indrng + end if + m = 0 + ! get machine constants + safmin = stdlib_slamch( 'S' ) + eps = stdlib_slamch( 'P' ) + ! set parameters + rtl = hndrd*eps + ! if one were ever to ask for less initial precision in bsrtol, + ! one should keep in mind that for the subset case, the extremal + ! eigenvalues must be at least as accurate as the current setting + ! (eigenvalues in the middle need not as much accuracy) + bsrtol = sqrt(eps)*(0.5e-3_sp) + ! treat case of 1x1 matrix for quick return + if( n==1 ) then + if( (irange==allrng).or.((irange==valrng).and.(d(1)>vl).and.(d(1)<=vu)).or.((& + irange==indrng).and.(il==1).and.(iu==1)) ) then + m = 1 + w(1) = d(1) + ! the computation error of the eigenvalue is zero + werr(1) = zero + wgap(1) = zero + iblock( 1 ) = 1 + indexw( 1 ) = 1 + gers(1) = d( 1 ) + gers(2) = d( 1 ) + endif + ! store the shift for the initial rrr, which is zero in this case + e(1) = zero + return + end if + ! general case: tridiagonal matrix of order > 1 + ! init werr, wgap. compute gerschgorin intervals and spectral diameter. + ! compute maximum off-diagonal entry and pivmin. + gl = d(1) + gu = d(1) + eold = zero + emax = zero + e(n) = zero + do i = 1,n + werr(i) = zero + wgap(i) = zero + eabs = abs( e(i) ) + if( eabs >= emax ) then + emax = eabs + end if + tmp1 = eabs + eold + gers( 2*i-1) = d(i) - tmp1 + gl = min( gl, gers( 2*i - 1)) + gers( 2*i ) = d(i) + tmp1 + gu = max( gu, gers(2*i) ) + eold = eabs + end do + ! the minimum pivot allowed in the sturm sequence for t + pivmin = safmin * max( one, emax**2 ) + ! compute spectral diameter. the gerschgorin bounds give an + ! estimate that is wrong by at most a factor of sqrt(2) + spdiam = gu - gl + ! compute splitting points + call stdlib_slarra( n, d, e, e2, spltol, spdiam,nsplit, isplit, iinfo ) + ! can force use of bisection instead of faster dqds. + ! option left in the code for future multisection work. + forceb = .false. + ! initialize usedqd, dqds should be used for allrng unless someone + ! explicitly wants bisection. + usedqd = (( irange==allrng ) .and. (.not.forceb)) + if( (irange==allrng) .and. (.not. forceb) ) then + ! set interval [vl,vu] that contains all eigenvalues + vl = gl + vu = gu + else + ! we call stdlib_slarrd to find crude approximations to the eigenvalues + ! in the desired range. in case irange = indrng, we also obtain the + ! interval (vl,vu] that contains all the wanted eigenvalues. + ! an interval [left,right] has converged if + ! right-leftvl ).and.( d( & + ibegin )<=vu ) ).or. ( (irange==indrng).and.(iblock(wbegin)==jblk))) then + m = m + 1 + w( m ) = d( ibegin ) + werr(m) = zero + ! the gap for a single block doesn't matter for the later + ! algorithm and is assigned an arbitrary large value + wgap(m) = zero + iblock( m ) = jblk + indexw( m ) = 1 + wbegin = wbegin + 1 + endif + ! e( iend ) holds the shift for the initial rrr + e( iend ) = zero + ibegin = iend + 1 + cycle loop_170 + end if + ! blocks of size larger than 1x1 + ! e( iend ) will hold the shift for the initial rrr, for now set it =0 + e( iend ) = zero + ! find local outer bounds gl,gu for the block + gl = d(ibegin) + gu = d(ibegin) + do i = ibegin , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + if(.not. ((irange==allrng).and.(.not.forceb)) ) then + ! count the number of eigenvalues in the current block. + mb = 0 + do i = wbegin,mm + if( iblock(i)==jblk ) then + mb = mb+1 + else + goto 21 + endif + end do + 21 continue + if( mb==0) then + ! no eigenvalue in the current block lies in the desired range + ! e( iend ) holds the shift for the initial rrr + e( iend ) = zero + ibegin = iend + 1 + cycle loop_170 + else + ! decide whether dqds or bisection is more efficient + usedqd = ( (mb > fac*in) .and. (.not.forceb) ) + wend = wbegin + mb - 1 + ! calculate gaps for the current block + ! in later stages, when representations for individual + ! eigenvalues are different, we use sigma = e( iend ). + sigma = zero + do i = wbegin, wend - 1 + wgap( i ) = max( zero,w(i+1)-werr(i+1) - (w(i)+werr(i)) ) + end do + wgap( wend ) = max( zero,vu - sigma - (w( wend )+werr( wend ))) + ! find local index of the first and last desired evalue. + indl = indexw(wbegin) + indu = indexw( wend ) + endif + endif + if(( (irange==allrng) .and. (.not. forceb) ).or.usedqd) then + ! case of dqds + ! find approximations to the extremal eigenvalues of the block + call stdlib_slarrk( in, 1, gl, gu, d(ibegin),e2(ibegin), pivmin, rtl, tmp, tmp1, & + iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + isleft = max(gl, tmp - tmp1- hndrd * eps* abs(tmp - tmp1)) + call stdlib_slarrk( in, in, gl, gu, d(ibegin),e2(ibegin), pivmin, rtl, tmp, tmp1,& + iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + isrght = min(gu, tmp + tmp1+ hndrd * eps * abs(tmp + tmp1)) + ! improve the estimate of the spectral diameter + spdiam = isrght - isleft + else + ! case of bisection + ! find approximations to the wanted extremal eigenvalues + isleft = max(gl, w(wbegin) - werr(wbegin)- hndrd * eps*abs(w(wbegin)- werr(& + wbegin) )) + isrght = min(gu,w(wend) + werr(wend)+ hndrd * eps * abs(w(wend)+ werr(wend))) + + endif + ! decide whether the base representation for the current block + ! l_jblk d_jblk l_jblk^t = t_jblk - sigma_jblk i + ! should be on the left or the right end of the current block. + ! the strategy is to shift to the end which is "more populated" + ! furthermore, decide whether to use dqds for the computation of + ! dqds is chosen if all eigenvalues are desired or the number of + ! eigenvalues to be computed is large compared to the blocksize. + if( ( irange==allrng ) .and. (.not.forceb) ) then + ! if all the eigenvalues have to be computed, we use dqd + usedqd = .true. + ! indl is the local index of the first eigenvalue to compute + indl = 1 + indu = in + ! mb = number of eigenvalues to compute + mb = in + wend = wbegin + mb - 1 + ! define 1/4 and 3/4 points of the spectrum + s1 = isleft + fourth * spdiam + s2 = isrght - fourth * spdiam + else + ! stdlib_slarrd has computed iblock and indexw for each eigenvalue + ! approximation. + ! choose sigma + if( usedqd ) then + s1 = isleft + fourth * spdiam + s2 = isrght - fourth * spdiam + else + tmp = min(isrght,vu) - max(isleft,vl) + s1 = max(isleft,vl) + fourth * tmp + s2 = min(isrght,vu) - fourth * tmp + endif + endif + ! compute the negcount at the 1/4 and 3/4 points + if(mb>1) then + call stdlib_slarrc( 'T', in, s1, s2, d(ibegin),e(ibegin), pivmin, cnt, cnt1, & + cnt2, iinfo) + endif + if(mb==1) then + sigma = gl + sgndef = one + elseif( cnt1 - indl >= indu - cnt2 ) then + if( ( irange==allrng ) .and. (.not.forceb) ) then + sigma = max(isleft,gl) + elseif( usedqd ) then + ! use gerschgorin bound as shift to get pos def matrix + ! for dqds + sigma = isleft + else + ! use approximation of the first desired eigenvalue of the + ! block as shift + sigma = max(isleft,vl) + endif + sgndef = one + else + if( ( irange==allrng ) .and. (.not.forceb) ) then + sigma = min(isrght,gu) + elseif( usedqd ) then + ! use gerschgorin bound as shift to get neg def matrix + ! for dqds + sigma = isrght + else + ! use approximation of the first desired eigenvalue of the + ! block as shift + sigma = min(isrght,vu) + endif + sgndef = -one + endif + ! an initial sigma has been chosen that will be used for computing + ! t - sigma i = l d l^t + ! define the increment tau of the shift in case the initial shift + ! needs to be refined to obtain a factorization with not too much + ! element growth. + if( usedqd ) then + ! the initial sigma was to the outer end of the spectrum + ! the matrix is definite and we need not retreat. + tau = spdiam*eps*n + two*pivmin + tau = max( tau,two*eps*abs(sigma) ) + else + if(mb>1) then + clwdth = w(wend) + werr(wend) - w(wbegin) - werr(wbegin) + avgap = abs(clwdth / real(wend-wbegin,KIND=sp)) + if( sgndef==one ) then + tau = half*max(wgap(wbegin),avgap) + tau = max(tau,werr(wbegin)) + else + tau = half*max(wgap(wend-1),avgap) + tau = max(tau,werr(wend)) + endif + else + tau = werr(wbegin) + endif + endif + loop_80: do idum = 1, maxtry + ! compute l d l^t factorization of tridiagonal matrix t - sigma i. + ! store d in work(1:in), l in work(in+1:2*in), and reciprocals of + ! pivots in work(2*in+1:3*in) + dpivot = d( ibegin ) - sigma + work( 1 ) = dpivot + dmax = abs( work(1) ) + j = ibegin + do i = 1, in - 1 + work( 2*in+i ) = one / work( i ) + tmp = e( j )*work( 2*in+i ) + work( in+i ) = tmp + dpivot = ( d( j+1 )-sigma ) - tmp*e( j ) + work( i+1 ) = dpivot + dmax = max( dmax, abs(dpivot) ) + j = j + 1 + end do + ! check for element growth + if( dmax > maxgrowth*spdiam ) then + norep = .true. + else + norep = .false. + endif + if( usedqd .and. .not.norep ) then + ! ensure the definiteness of the representation + ! all entries of d (of l d l^t) must have the same sign + do i = 1, in + tmp = sgndef*work( i ) + if( tmp1 ) then + ! perturb each entry of the base representation by a small + ! (but random) relative amount to overcome difficulties with + ! glued matrices. + do i = 1, 4 + iseed( i ) = 1 + end do + call stdlib_slarnv(2, iseed, 2*in-1, work(1)) + do i = 1,in-1 + d(ibegin+i-1) = d(ibegin+i-1)*(one+eps*pert*work(i)) + e(ibegin+i-1) = e(ibegin+i-1)*(one+eps*pert*work(in+i)) + end do + d(iend) = d(iend)*(one+eps*four*work(in)) + endif + ! don't update the gerschgorin intervals because keeping track + ! of the updates would be too much work in stdlib_slarrv. + ! we update w instead and use it to locate the proper gerschgorin + ! intervals. + ! compute the required eigenvalues of l d l' by bisection or dqds + if ( .not.usedqd ) then + ! if stdlib_slarrd has been used, shift the eigenvalue approximations + ! according to their representation. this is necessary for + ! a uniform stdlib_slarrv since dqds computes eigenvalues of the + ! shifted representation. in stdlib_slarrv, w will always hold the + ! unshifted eigenvalue approximation. + do j=wbegin,wend + w(j) = w(j) - sigma + werr(j) = werr(j) + abs(w(j)) * eps + end do + ! call stdlib_slarrb to reduce eigenvalue error of the approximations + ! from stdlib_slarrd + do i = ibegin, iend-1 + work( i ) = d( i ) * e( i )**2 + end do + ! use bisection to find ev from indl to indu + call stdlib_slarrb(in, d(ibegin), work(ibegin),indl, indu, rtol1, rtol2, indl-1,& + w(wbegin), wgap(wbegin), werr(wbegin),work( 2*n+1 ), iwork, pivmin, spdiam,in, & + iinfo ) + if( iinfo /= 0 ) then + info = -4 + return + end if + ! stdlib_slarrb computes all gaps correctly except for the last one + ! record distance to vu/gu + wgap( wend ) = max( zero,( vu-sigma ) - ( w( wend ) + werr( wend ) ) ) + do i = indl, indu + m = m + 1 + iblock(m) = jblk + indexw(m) = i + end do + else + ! call dqds to get all eigs (and then possibly delete unwanted + ! eigenvalues). + ! note that dqds finds the eigenvalues of the l d l^t representation + ! of t to high relative accuracy. high relative accuracy + ! might be lost when the shift of the rrr is subtracted to obtain + ! the eigenvalues of t. however, t is not guaranteed to define its + ! eigenvalues to high relative accuracy anyway. + ! set rtol to the order of the tolerance used in stdlib_slasq2 + ! this is an estimated error, the worst case bound is 4*n*eps + ! which is usually too large and requires unnecessary work to be + ! done by bisection when computing the eigenvectors + rtol = log(real(in,KIND=sp)) * four * eps + j = ibegin + do i = 1, in - 1 + work( 2*i-1 ) = abs( d( j ) ) + work( 2*i ) = e( j )*e( j )*work( 2*i-1 ) + j = j + 1 + end do + work( 2*in-1 ) = abs( d( iend ) ) + work( 2*in ) = zero + call stdlib_slasq2( in, work, iinfo ) + if( iinfo /= 0 ) then + ! if iinfo = -5 then an index is part of a tight cluster + ! and should be changed. the index is in iwork(1) and the + ! gap is in work(n+1) + info = -5 + return + else + ! test that all eigenvalues are positive as expected + do i = 1, in + if( work( i )zero ) then + do i = indl, indu + m = m + 1 + w( m ) = work( in-i+1 ) + iblock( m ) = jblk + indexw( m ) = i + end do + else + do i = indl, indu + m = m + 1 + w( m ) = -work( i ) + iblock( m ) = jblk + indexw( m ) = i + end do + end if + do i = m - mb + 1, m + ! the value of rtol below should be the tolerance in stdlib_slasq2 + werr( i ) = rtol * abs( w(i) ) + end do + do i = m - mb + 1, m - 1 + ! compute the right gap between the intervals + wgap( i ) = max( zero,w(i+1)-werr(i+1) - (w(i)+werr(i)) ) + end do + wgap( m ) = max( zero,( vu-sigma ) - ( w( m ) + werr( m ) ) ) + end if + ! proceed with next block + ibegin = iend + 1 + wbegin = wend + 1 + end do loop_170 + return + end subroutine stdlib_slarre + + !> Using a divide and conquer approach, SLASD0: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M + !> matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + !> The algorithm computes orthogonal matrices U and VT such that + !> B = U * S * VT. The singular values S are overwritten on D. + !> A related subroutine, SLASDA, computes only the singular values, + !> and optionally, the singular vectors in compact form. + + pure subroutine stdlib_slasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu, ldvt, n, smlsiz, sqre + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,& + nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei + real(sp) :: alpha, beta + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -2 + end if + m = n + sqre + if( ldu Using a divide and conquer approach, SLASDA: computes the singular + !> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + !> B with diagonal D and offdiagonal E, where M = N + SQRE. The + !> algorithm computes the singular values in the SVD B = U * S * VT. + !> The orthogonal matrices U and VT are optionally computed in + !> compact form. + !> A related subroutine, SLASD0, computes the singular values and + !> the singular vectors in explicit form. + + pure subroutine stdlib_slasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, & + poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,& + *) + real(sp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), & + s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*) + real(sp), intent(inout) :: d(*), e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, & + m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, & + nwork2, smlszp, sqrei, vf, vfi, vl, vli + real(sp) :: alpha, beta + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( ldu<( n+sqre ) ) then + info = -8 + else if( ldgcol SLASDQ: computes the singular value decomposition (SVD) of a real + !> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + !> E, accumulating the transformations if desired. Letting B denote + !> the input bidiagonal matrix, the algorithm computes orthogonal + !> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose + !> of P). The singular values S are overwritten on D. + !> The input matrix U is changed to U * Q if desired. + !> The input matrix VT is changed to P**T * VT if desired. + !> The input matrix C is changed to Q**T * C if desired. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3, for a detailed description of the algorithm. + + pure subroutine stdlib_slasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, & + work, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre + ! Array Arguments + real(sp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: rotate + integer(ilp) :: i, isub, iuplo, j, np1, sqre1 + real(sp) :: cs, r, smin, sn + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + iuplo = 0 + if( stdlib_lsame( uplo, 'U' ) )iuplo = 1 + if( stdlib_lsame( uplo, 'L' ) )iuplo = 2 + if( iuplo==0 ) then + info = -1 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncvt<0 ) then + info = -4 + else if( nru<0 ) then + info = -5 + else if( ncc<0 ) then + info = -6 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + np1 = n + 1 + sqre1 = sqre + ! if matrix non-square upper bidiagonal, rotate to be lower + ! bidiagonal. the rotations are on the right. + if( ( iuplo==1 ) .and. ( sqre1==1 ) ) then + do i = 1, n - 1 + call stdlib_slartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( rotate ) then + work( i ) = cs + work( n+i ) = sn + end if + end do + call stdlib_slartg( d( n ), e( n ), cs, sn, r ) + d( n ) = r + e( n ) = zero + if( rotate ) then + work( n ) = cs + work( n+n ) = sn + end if + iuplo = 2 + sqre1 = 0 + ! update singular vectors if desired. + if( ncvt>0 )call stdlib_slasr( 'L', 'V', 'F', np1, ncvt, work( 1 ),work( np1 ), vt, & + ldvt ) + end if + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left. + if( iuplo==2 ) then + do i = 1, n - 1 + call stdlib_slartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( rotate ) then + work( i ) = cs + work( n+i ) = sn + end if + end do + ! if matrix (n+1)-by-n lower bidiagonal, one additional + ! rotation is needed. + if( sqre1==1 ) then + call stdlib_slartg( d( n ), e( n ), cs, sn, r ) + d( n ) = r + if( rotate ) then + work( n ) = cs + work( n+n ) = sn + end if + end if + ! update singular vectors if desired. + if( nru>0 ) then + if( sqre1==0 ) then + call stdlib_slasr( 'R', 'V', 'F', nru, n, work( 1 ),work( np1 ), u, ldu ) + + else + call stdlib_slasr( 'R', 'V', 'F', nru, np1, work( 1 ),work( np1 ), u, ldu ) + + end if + end if + if( ncc>0 ) then + if( sqre1==0 ) then + call stdlib_slasr( 'L', 'V', 'F', n, ncc, work( 1 ),work( np1 ), c, ldc ) + + else + call stdlib_slasr( 'L', 'V', 'F', np1, ncc, work( 1 ),work( np1 ), c, ldc ) + + end if + end if + end if + ! call stdlib_sbdsqr to compute the svd of the reduced real + ! n-by-n upper bidiagonal matrix. + call stdlib_sbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info ) + + ! sort the singular values into ascending order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n + ! scan for smallest d(i). + isub = i + smin = d( i ) + do j = i + 1, n + if( d( j )0 )call stdlib_sswap( ncvt, vt( isub, 1 ), ldvt, vt( i, 1 ), ldvt ) + + if( nru>0 )call stdlib_sswap( nru, u( 1, isub ), 1, u( 1, i ), 1 ) + if( ncc>0 )call stdlib_sswap( ncc, c( isub, 1 ), ldc, c( i, 1 ), ldc ) + end if + end do + return + end subroutine stdlib_slasdq + + !> SLASQ1: computes the singular values of a real N-by-N bidiagonal + !> matrix with diagonal D and off-diagonal E. The singular values + !> are computed to high relative accuracy, in the absence of + !> denormalization, underflow and overflow. The algorithm was first + !> presented in + !> "Accurate singular values and differential qd algorithms" by K. V. + !> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + !> 1994, + !> and the present implementation is described in "An implementation of + !> the dqds Algorithm (Positive Case)", LAPACK Working Note. + + pure subroutine stdlib_slasq1( n, d, e, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo + real(sp) :: eps, scale, safmin, sigmn, sigmx + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'SLASQ1', -info ) + return + else if( n==0 ) then + return + else if( n==1 ) then + d( 1 ) = abs( d( 1 ) ) + return + else if( n==2 ) then + call stdlib_slas2( d( 1 ), e( 1 ), d( 2 ), sigmn, sigmx ) + d( 1 ) = sigmx + d( 2 ) = sigmn + return + end if + ! estimate the largest singular value. + sigmx = zero + do i = 1, n - 1 + d( i ) = abs( d( i ) ) + sigmx = max( sigmx, abs( e( i ) ) ) + end do + d( n ) = abs( d( n ) ) + ! early return if sigmx is zero (matrix is already diagonal). + if( sigmx==zero ) then + call stdlib_slasrt( 'D', n, d, iinfo ) + return + end if + do i = 1, n + sigmx = max( sigmx, d( i ) ) + end do + ! copy d and e into work (in the z format) and scale (squaring the + ! input data makes scaling by a power of the radix pointless). + eps = stdlib_slamch( 'PRECISION' ) + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + scale = sqrt( eps / safmin ) + call stdlib_scopy( n, d, 1, work( 1 ), 2 ) + call stdlib_scopy( n-1, e, 1, work( 2 ), 2 ) + call stdlib_slascl( 'G', 0, 0, sigmx, scale, 2*n-1, 1, work, 2*n-1,iinfo ) + ! compute the q's and e's. + do i = 1, 2*n - 1 + work( i ) = work( i )**2 + end do + work( 2*n ) = zero + call stdlib_slasq2( n, work, info ) + if( info==0 ) then + do i = 1, n + d( i ) = sqrt( work( i ) ) + end do + call stdlib_slascl( 'G', 0, 0, scale, sigmx, n, 1, d, n, iinfo ) + else if( info==2 ) then + ! maximum number of iterations exceeded. move data from work + ! into d and e so the calling subroutine can try to finish + do i = 1, n + d( i ) = sqrt( work( 2*i-1 ) ) + e( i ) = sqrt( work( 2*i ) ) + end do + call stdlib_slascl( 'G', 0, 0, scale, sigmx, n, 1, d, n, iinfo ) + call stdlib_slascl( 'G', 0, 0, scale, sigmx, n, 1, e, n, iinfo ) + end if + return + end subroutine stdlib_slasq1 + + !> SLASQ2: computes all the eigenvalues of the symmetric positive + !> definite tridiagonal matrix associated with the qd array Z to high + !> relative accuracy are computed to high relative accuracy, in the + !> absence of denormalization, underflow and overflow. + !> To see the relation of Z to the tridiagonal matrix, let L be a + !> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + !> let U be an upper bidiagonal matrix with 1's above and diagonal + !> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + !> symmetric tridiagonal to which it is similar. + !> Note : SLASQ2 defines a logical variable, IEEE, which is true + !> on machines which follow ieee-754 floating-point standard in their + !> handling of infinities and NaNs, and false otherwise. This variable + !> is passed to SLASQ3. + + pure subroutine stdlib_slasq2( n, z, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(sp), intent(inout) :: z(*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: cbias = 1.50_sp + real(sp), parameter :: hundrd = 100.0_sp + + + ! Local Scalars + logical(lk) :: ieee + integer(ilp) :: i0, i4, iinfo, ipn4, iter, iwhila, iwhilb, k, kmin, n0, nbig, ndiv, & + nfail, pp, splt, ttype, i1, n1 + real(sp) :: d, dee, deemin, desig, dmin, dmin1, dmin2, dn, dn1, dn2, e, emax, emin, & + eps, g, oldemn, qmax, qmin, s, safmin, sigma, t, tau, temp, tol, tol2, trace, zmax, & + tempe, tempq + ! Intrinsic Functions + intrinsic :: abs,max,min,real,sqrt + ! Executable Statements + ! test the input arguments. + ! (in case stdlib_slasq2 is not called by stdlib_slasq1) + info = 0 + eps = stdlib_slamch( 'PRECISION' ) + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + tol = eps*hundrd + tol2 = tol**2 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'SLASQ2', 1 ) + return + else if( n==0 ) then + return + else if( n==1 ) then + ! 1-by-1 case. + if( z( 1 )z( 1 ) ) then + d = z( 3 ) + z( 3 ) = z( 1 ) + z( 1 ) = d + end if + z( 5 ) = z( 1 ) + z( 2 ) + z( 3 ) + if( z( 2 )>z( 3 )*tol2 ) then + t = half*( ( z( 1 )-z( 3 ) )+z( 2 ) ) + s = z( 3 )*( z( 2 ) / t ) + if( s<=t ) then + s = z( 3 )*( z( 2 ) / ( t*( one+sqrt( one+s / t ) ) ) ) + else + s = z( 3 )*( z( 2 ) / ( t+sqrt( t )*sqrt( t+s ) ) ) + end if + t = z( 1 ) + ( s+z( 2 ) ) + z( 3 ) = z( 3 )*( z( 1 ) / t ) + z( 1 ) = t + end if + z( 2 ) = z( 3 ) + z( 6 ) = z( 2 ) + z( 1 ) + return + end if + ! check for negative data and compute sums of q's and e's. + z( 2*n ) = zero + emin = z( 2 ) + qmax = zero + zmax = zero + d = zero + e = zero + do k = 1, 2*( n-1 ), 2 + if( z( k )i0 ) then + emin = abs( z( 4*n0-5 ) ) + else + emin = zero + end if + qmin = z( 4*n0-3 ) + qmax = qmin + do i4 = 4*n0, 8, -4 + if( z( i4-5 )<=zero )go to 100 + if( qmin>=four*emax ) then + qmin = min( qmin, z( i4-3 ) ) + emax = max( emax, z( i4-5 ) ) + end if + qmax = max( qmax, z( i4-7 )+z( i4-5 ) ) + emin = min( emin, z( i4-5 ) ) + end do + i4 = 4 + 100 continue + i0 = i4 / 4 + pp = 0 + if( n0-i0>1 ) then + dee = z( 4*i0-3 ) + deemin = dee + kmin = i0 + do i4 = 4*i0+1, 4*n0-3, 4 + dee = z( i4 )*( dee /( dee+z( i4-2 ) ) ) + if( dee<=deemin ) then + deemin = dee + kmin = ( i4+3 )/4 + end if + end do + if( (kmin-i0)*2n0 )go to 150 + ! while submatrix unfinished take a good dqds step. + call stdlib_slasq3( i0, n0, z, pp, dmin, sigma, desig, qmax, nfail,iter, ndiv, & + ieee, ttype, dmin1, dmin2, dn, dn1,dn2, g, tau ) + pp = 1 - pp + ! when emin is very small check for splits. + if( pp==0 .and. n0-i0>=3 ) then + if( z( 4*n0 )<=tol2*qmax .or.z( 4*n0-1 )<=tol2*sigma ) then + splt = i0 - 1 + qmax = z( 4*i0-3 ) + emin = z( 4*i0-1 ) + oldemn = z( 4*i0 ) + do i4 = 4*i0, 4*( n0-3 ), 4 + if( z( i4 )<=tol2*z( i4-3 ) .or.z( i4-1 )<=tol2*sigma ) then + z( i4-1 ) = -sigma + splt = i4 / 4 + qmax = zero + emin = z( i4+3 ) + oldemn = z( i4+4 ) + else + qmax = max( qmax, z( i4+1 ) ) + emin = min( emin, z( i4-1 ) ) + oldemn = min( oldemn, z( i4 ) ) + end if + end do + z( 4*n0-1 ) = emin + z( 4*n0 ) = oldemn + i0 = splt + 1 + end if + end if + end do loop_140 + info = 2 + ! maximum number of iterations exceeded, restore the shift + ! sigma and place the new d's and e's in a qd array. + ! this might need to be done for several blocks + i1 = i0 + n1 = n0 + 145 continue + tempq = z( 4*i0-3 ) + z( 4*i0-3 ) = z( 4*i0-3 ) + sigma + do k = i0+1, n0 + tempe = z( 4*k-5 ) + z( 4*k-5 ) = z( 4*k-5 ) * (tempq / z( 4*k-7 )) + tempq = z( 4*k-3 ) + z( 4*k-3 ) = z( 4*k-3 ) + sigma + tempe - z( 4*k-5 ) + end do + ! prepare to do this on the previous block if there is one + if( i1>1 ) then + n1 = i1-1 + do while( ( i1>=2 ) .and. ( z(4*i1-5)>=zero ) ) + i1 = i1 - 1 + end do + if( i1>=1 ) then + sigma = -z(4*n1-1) + go to 145 + end if + end if + do k = 1, n + z( 2*k-1 ) = z( 4*k-3 ) + ! only the block 1..n0 is unfinished. the rest of the e's + ! must be essentially zero, although sometimes other data + ! has been stored in them. + if( k DLATRF_AA factorizes a panel of a real symmetric matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_slasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), h(ldh,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + real(sp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_ssytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j, j:m) - h(j:m, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:m, j) has been initialized to be a(j, j:m) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_sgemv( 'NO TRANSPOSE', mj, j-k1,-one, h( j, k1 ), ldh,a( 1, j ), 1,& + one, h( j, j ), 1 ) + end if + ! copy h(i:m, i) into work + call stdlib_scopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:m) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:m) stores u(j-1, j:m) + alpha = -a( k-1, j ) + call stdlib_saxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = work( 1 ) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_saxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_isamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:m) with a(i1+1:m, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_sswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + ! swap a(i1, i2+1:m) with a(i2, i2+1:m) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_sswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_ssytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j:m, j) - h(j:m, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:m, j) has been initialized to be a(j:m, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_sgemv( 'NO TRANSPOSE', mj, j-k1,-one, h( j, k1 ), ldh,a( j, 1 ), lda,& + one, h( j, j ), 1 ) + end if + ! copy h(j:m, j) into work + call stdlib_scopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:m, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -a( j, k-1 ) + call stdlib_saxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = work( 1 ) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_saxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_isamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:m, i1) with a(i2, i1+1:m) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_sswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + ! swap a(i2+1:m, i1) with a(i2+1:m, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_sswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j SPTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric positive definite tridiagonal matrix by first factoring the + !> matrix using SPTTRF, and then calling SBDSQR to compute the singular + !> values of the bidiagonal factor. + !> This routine computes the eigenvalues of the positive definite + !> tridiagonal matrix to high relative accuracy. This means that if the + !> eigenvalues range over many orders of magnitude in size, then the + !> small eigenvalues and corresponding eigenvectors will be computed + !> more accurately than, for example, with the standard QR method. + !> The eigenvectors of a full or band symmetric positive definite matrix + !> can also be found if SSYTRD, SSPTRD, or SSBTRD has been used to + !> reduce this matrix to tridiagonal form. (The reduction to tridiagonal + !> form, however, may preclude the possibility of obtaining high + !> relative accuracy in the small eigenvalues of the original matrix, if + !> these eigenvalues range over many orders of magnitude.) + + pure subroutine stdlib_spteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(sp), intent(inout) :: d(*), e(*), z(ldz,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Arrays + real(sp) :: c(1,1), vt(1,1) + ! Local Scalars + integer(ilp) :: i, icompz, nru + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldz0 )z( 1, 1 ) = one + return + end if + if( icompz==2 )call stdlib_slaset( 'FULL', n, n, zero, one, z, ldz ) + ! call stdlib_spttrf to factor the matrix. + call stdlib_spttrf( n, d, e, info ) + if( info/=0 )return + do i = 1, n + d( i ) = sqrt( d( i ) ) + end do + do i = 1, n - 1 + e( i ) = e( i )*d( i ) + end do + ! call stdlib_sbdsqr to compute the singular values/vectors of the + ! bidiagonal factor. + if( icompz>0 ) then + nru = n + else + nru = 0 + end if + call stdlib_sbdsqr( 'LOWER', n, 0, nru, 0, d, e, vt, 1, z, ldz, c, 1,work, info ) + + ! square the singular values. + if( info==0 ) then + do i = 1, n + d( i ) = d( i )*d( i ) + end do + else + info = n + info + end if + return + end subroutine stdlib_spteqr + + !> SSTEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> SSTEGR is a compatibility wrapper around the improved SSTEMR routine. + !> See SSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : SSTEGR and SSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + + pure subroutine stdlib_sstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: w(*), work(*) + real(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: tryrac + ! Executable Statements + info = 0 + tryrac = .false. + call stdlib_sstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, n, isuppz, & + tryrac, work, lwork,iwork, liwork, info ) + end subroutine stdlib_sstegr + + !> SSTEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.SSTEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + + pure subroutine stdlib_sstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il, iu, ldz, nzc, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: w(*), work(*) + real(sp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(sp), parameter :: minrgp = 3.0e-3_sp + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, valeig, wantz, zquery + integer(ilp) :: i, ibegin, iend, ifirst, iil, iindbl, iindw, iindwk, iinfo, iinspl, & + iiu, ilast, in, indd, inde2, inderr, indgp, indgrs, indwrk, itmp, itmp2, j, jblk, jj, & + liwmin, lwmin, nsplit, nzcmin, offset, wbegin, wend + real(sp) :: bignum, cs, eps, pivmin, r1, r2, rmax, rmin, rtol1, rtol2, safmin, scale, & + smlnum, sn, thresh, tmp, tnrm, wl, wu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ).or.( liwork==-1 ) ) + zquery = ( nzc==-1 ) + ! stdlib_sstemr needs work of size 6*n, iwork of size 3*n. + ! in addition, stdlib_slarre needs work of size 6*n, iwork of size 5*n. + ! furthermore, stdlib_slarrv needs work of size 12*n, iwork of size 7*n. + if( wantz ) then + lwmin = 18*n + liwmin = 10*n + else + ! need less workspace if only the eigenvalues are wanted + lwmin = 12*n + liwmin = 8*n + endif + wl = zero + wu = zero + iil = 0 + iiu = 0 + nsplit = 0 + if( valeig ) then + ! we do not reference vl, vu in the cases range = 'i','a' + ! the interval (wl, wu] contains all the wanted eigenvalues. + ! it is either given by the user or computed in stdlib_slarre. + wl = vl + wu = vu + elseif( indeig ) then + ! we do not reference il, iu in the cases range = 'v','a' + iil = il + iiu = iu + endif + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( valeig .and. n>0 .and. wu<=wl ) then + info = -7 + else if( indeig .and. ( iil<1 .or. iil>n ) ) then + info = -8 + else if( indeig .and. ( iiun ) ) then + info = -9 + else if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz.and.(.not.zquery) ) then + z( 1, 1 ) = one + isuppz(1) = 1 + isuppz(2) = 1 + end if + return + end if + if( n==2 ) then + if( .not.wantz ) then + call stdlib_slae2( d(1), e(1), d(2), r1, r2 ) + else if( wantz.and.(.not.zquery) ) then + call stdlib_slaev2( d(1), e(1), d(2), r1, r2, cs, sn ) + end if + if( alleig.or.(valeig.and.(r2>wl).and.(r2<=wu)).or.(indeig.and.(iil==1)) ) & + then + m = m+1 + w( m ) = r2 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = -sn + z( 2, m ) = cs + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + if( alleig.or.(valeig.and.(r1>wl).and.(r1<=wu)).or.(indeig.and.(iiu==2)) ) & + then + m = m+1 + w( m ) = r1 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = cs + z( 2, m ) = sn + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + else + ! continue with general n + indgrs = 1 + inderr = 2*n + 1 + indgp = 3*n + 1 + indd = 4*n + 1 + inde2 = 5*n + 1 + indwrk = 6*n + 1 + iinspl = 1 + iindbl = n + 1 + iindw = 2*n + 1 + iindwk = 3*n + 1 + ! scale matrix to allowable range, if necessary. + ! the allowable range is related to the pivmin parameter; see the + ! comments in stdlib_slarrd. the preference for scaling small values + ! up is heuristic; we expect users' matrices not to be close to the + ! rmax threshold. + scale = one + tnrm = stdlib_slanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + scale = rmax / tnrm + end if + if( scale/=one ) then + call stdlib_sscal( n, scale, d, 1 ) + call stdlib_sscal( n-1, scale, e, 1 ) + tnrm = tnrm*scale + if( valeig ) then + ! if eigenvalues in interval have to be found, + ! scale (wl, wu] accordingly + wl = wl*scale + wu = wu*scale + endif + end if + ! compute the desired eigenvalues of the tridiagonal after splitting + ! into smaller subblocks if the corresponding off-diagonal elements + ! are small + ! thresh is the splitting parameter for stdlib_slarre + ! a negative thresh forces the old splitting criterion based on the + ! size of the off-diagonal. a positive thresh switches to splitting + ! which preserves relative accuracy. + if( tryrac ) then + ! test whether the matrix warrants the more expensive relative approach. + call stdlib_slarrr( n, d, e, iinfo ) + else + ! the user does not care about relative accurately eigenvalues + iinfo = -1 + endif + ! set the splitting criterion + if (iinfo==0) then + thresh = eps + else + thresh = -eps + ! relative accuracy is desired but t does not guarantee it + tryrac = .false. + endif + if( tryrac ) then + ! copy original diagonal, needed to guarantee relative accuracy + call stdlib_scopy(n,d,1,work(indd),1) + endif + ! store the squares of the offdiagonal values of t + do j = 1, n-1 + work( inde2+j-1 ) = e(j)**2 + end do + ! set the tolerance parameters for bisection + if( .not.wantz ) then + ! stdlib_slarre computes the eigenvalues to full precision. + rtol1 = four * eps + rtol2 = four * eps + else + ! stdlib_slarre computes the eigenvalues to less than full precision. + ! stdlib_slarrv will refine the eigenvalue approximations, and we can + ! need less accurate initial bisection in stdlib_slarre. + ! note: these settings do only affect the subset case and stdlib_slarre + rtol1 = max( sqrt(eps)*5.0e-2_sp, four * eps ) + rtol2 = max( sqrt(eps)*5.0e-3_sp, four * eps ) + endif + call stdlib_slarre( range, n, wl, wu, iil, iiu, d, e,work(inde2), rtol1, rtol2, & + thresh, nsplit,iwork( iinspl ), m, w, work( inderr ),work( indgp ), iwork( iindbl ),& + iwork( iindw ), work( indgrs ), pivmin,work( indwrk ), iwork( iindwk ), iinfo ) + + if( iinfo/=0 ) then + info = 10 + abs( iinfo ) + return + end if + ! note that if range /= 'v', stdlib_slarre computes bounds on the desired + ! part of the spectrum. all desired eigenvalues are contained in + ! (wl,wu] + if( wantz ) then + ! compute the desired eigenvectors corresponding to the computed + ! eigenvalues + call stdlib_slarrv( n, wl, wu, d, e,pivmin, iwork( iinspl ), m,1, m, minrgp, & + rtol1, rtol2,w, work( inderr ), work( indgp ), iwork( iindbl ),iwork( iindw ), & + work( indgrs ), z, ldz,isuppz, work( indwrk ), iwork( iindwk ), iinfo ) + if( iinfo/=0 ) then + info = 20 + abs( iinfo ) + return + end if + else + ! stdlib_slarre computes eigenvalues of the (shifted) root representation + ! stdlib_slarrv returns the eigenvalues of the unshifted matrix. + ! however, if the eigenvectors are not desired by the user, we need + ! to apply the corresponding shifts from stdlib_slarre to obtain the + ! eigenvalues of the original matrix. + do j = 1, m + itmp = iwork( iindbl+j-1 ) + w( j ) = w( j ) + e( iwork( iinspl+itmp-1 ) ) + end do + end if + if ( tryrac ) then + ! refine computed eigenvalues so that they are relatively accurate + ! with respect to the original matrix t. + ibegin = 1 + wbegin = 1 + loop_39: do jblk = 1, iwork( iindbl+m-1 ) + iend = iwork( iinspl+jblk-1 ) + in = iend - ibegin + 1 + wend = wbegin - 1 + ! check if any eigenvalues have to be refined in this block + 36 continue + if( wend1 .or. n==2 ) then + if( .not. wantz ) then + call stdlib_slasrt( 'I', m, w, iinfo ) + if( iinfo/=0 ) then + info = 3 + return + end if + else + do j = 1, m - 1 + i = 0 + tmp = w( j ) + do jj = j + 1, m + if( w( jj ) SSTEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Eigenvalues and + !> eigenvectors can be selected by specifying either a range of values + !> or a range of indices for the desired eigenvalues. + !> Whenever possible, SSTEVR calls SSTEMR to compute the + !> eigenspectrum using Relatively Robust Representations. SSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. For the i-th + !> unreduced block of T, + !> (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T + !> is a relatively robust representation, + !> (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high + !> relative accuracy by the dqds algorithm, + !> (c) If there is a cluster of close eigenvalues, "choose" sigma_i + !> close to the cluster, and go to step (a), + !> (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, + !> compute the corresponding eigenvector by forming a + !> rank-revealing twisted factorization. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, + !> Computer Science Division Technical Report No. UCB//CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : SSTEVR calls SSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> SSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of SSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + pure subroutine stdlib_sstevr( jobz, range, n, d, e, vl, vu, il, iu, abstol,m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: d(*), e(*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, test, lquery, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, imax, indibl, indifl, indisp, indiwo, iscale, j, jj, liwmin,& + lwmin, nsplit + real(sp) :: bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, tnrm, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'SSTEVR', 'N', 1, 2, 3, 4 ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( liwork==-1 ) ) + lwmin = max( 1, 20*n ) + liwmin = max(1, 10*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz )z( 1, 1 ) = one + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + if( valeig ) then + vll = vl + vuu = vu + end if + tnrm = stdlib_slanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + iscale = 1 + sigma = rmax / tnrm + end if + if( iscale==1 ) then + call stdlib_sscal( n, sigma, d, 1 ) + call stdlib_sscal( n-1, sigma, e( 1 ), 1 ) + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: these indices are used only + ! if stdlib_ssterf or stdlib_sstemr fail. + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_sstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_sstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_sstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indisp + n + ! if all eigenvalues are desired, then + ! call stdlib_ssterf or stdlib_sstemr. if this fails for some eigenvalue, then + ! try stdlib_sstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ieeeok==1 ) then + call stdlib_scopy( n-1, e( 1 ), 1, work( 1 ), 1 ) + if( .not.wantz ) then + call stdlib_scopy( n, d, 1, w, 1 ) + call stdlib_ssterf( n, w, work, info ) + else + call stdlib_scopy( n, d, 1, work( n+1 ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_sstemr( jobz, 'A', n, work( n+1 ), work, vl, vu, il,iu, m, w, z, ldz,& + n, isuppz, tryrac,work( 2*n+1 ), lwork-2*n, iwork, liwork, info ) + end if + if( info==0 ) then + m = n + go to 10 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_sstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstol, d, e, m,nsplit, w, & + iwork( indibl ), iwork( indisp ), work,iwork( indiwo ), info ) + if( wantz ) then + call stdlib_sstein( n, d, e, m, w, iwork( indibl ), iwork( indisp ),z, ldz, work, & + iwork( indiwo ), iwork( indifl ),info ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 10 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) SSYEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric matrix A. Eigenvalues and eigenvectors can be + !> selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> SSYEVR first reduces the matrix A to tridiagonal form T with a call + !> to SSYTRD. Then, whenever possible, SSYEVR calls SSTEMR to compute + !> the eigenspectrum using Relatively Robust Representations. SSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see SSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : SSYEVR calls SSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> SSYEVR calls SSTEBZ and SSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of SSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + subroutine stdlib_ssyevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork,iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(sp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: w(*), work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, iinfo, imax, indd, inddd, inde, indee, indibl, indifl, & + indisp, indiwo, indtau, indwk, indwkn, iscale, j, jj, liwmin, llwork, llwrkn, lwkopt, & + lwmin, nb, nsplit + real(sp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'SSYEVR', 'N', 1, 2, 3, 4 ) + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( liwork==-1 ) ) + lwmin = max( 1, 26*n ) + liwmin = max( 1, 10*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=a( 1, 1 ) ) then + m = 1 + w( 1 ) = a( 1, 1 ) + end if + end if + if( wantz ) then + z( 1, 1 ) = one + isuppz( 1 ) = 1 + isuppz( 2 ) = 1 + end if + return + end if + ! get machine constants. + safmin = stdlib_slamch( 'SAFE MINIMUM' ) + eps = stdlib_slamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if (valeig) then + vll = vl + vuu = vu + end if + anrm = stdlib_slansy( 'M', uplo, n, a, lda, work ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_sscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_sscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: the iwork indices are + ! used only if stdlib_ssterf or stdlib_sstemr fail. + ! work(indtau:indtau+n-1) stores the scalar factors of the + ! elementary reflectors used in stdlib_ssytrd. + indtau = 1 + ! work(indd:indd+n-1) stores the tridiagonal's diagonal entries. + indd = indtau + n + ! work(inde:inde+n-1) stores the off-diagonal entries of the + ! tridiagonal matrix from stdlib_ssytrd. + inde = indd + n + ! work(inddd:inddd+n-1) is a copy of the diagonal entries over + ! -written by stdlib_sstemr (the stdlib_ssterf path copies the diagonal to w). + inddd = inde + n + ! work(indee:indee+n-1) is a copy of the off-diagonal entries over + ! -written while computing the eigenvalues in stdlib_ssterf and stdlib_sstemr. + indee = inddd + n + ! indwk is the starting offset of the left-over workspace, and + ! llwork is the remaining workspace size. + indwk = indee + n + llwork = lwork - indwk + 1 + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_sstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_sstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_sstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indifl + n + ! call stdlib_ssytrd to reduce symmetric matrix to tridiagonal form. + call stdlib_ssytrd( uplo, n, a, lda, work( indd ), work( inde ),work( indtau ), work( & + indwk ), llwork, iinfo ) + ! if all eigenvalues are desired + ! then call stdlib_ssterf or stdlib_sstemr and stdlib_sormtr. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig.or.test ) .and. ( ieeeok==1 ) ) then + if( .not.wantz ) then + call stdlib_scopy( n, work( indd ), 1, w, 1 ) + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_ssterf( n, w, work( indee ), info ) + else + call stdlib_scopy( n-1, work( inde ), 1, work( indee ), 1 ) + call stdlib_scopy( n, work( indd ), 1, work( inddd ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_sstemr( jobz, 'A', n, work( inddd ), work( indee ),vl, vu, il, iu, m,& + w, z, ldz, n, isuppz,tryrac, work( indwk ), lwork, iwork, liwork,info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_sstemr. + if( wantz .and. info==0 ) then + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_sormtr( 'L', uplo, 'N', n, m, a, lda,work( indtau ), z, ldz, work(& + indwkn ),llwrkn, iinfo ) + end if + end if + if( info==0 ) then + ! everything worked. skip stdlib_sstebz/stdlib_sstein. iwork(:) are + ! undefined. + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_sstebz and, if eigenvectors are desired, stdlib_sstein. + ! also call stdlib_sstebz and stdlib_sstein if stdlib_sstemr fails. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_sstebz( range, order, n, vll, vuu, il, iu, abstll,work( indd ), work( inde & + ), m, nsplit, w,iwork( indibl ), iwork( indisp ), work( indwk ),iwork( indiwo ), info ) + + if( wantz ) then + call stdlib_sstein( n, work( indd ), work( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,work( indwk ), iwork( indiwo ), iwork( indifl ),info ) + ! apply orthogonal matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_sstein. + indwkn = inde + llwrkn = lwork - indwkn + 1 + call stdlib_sormtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + ! jump here if stdlib_sstemr/stdlib_sstein succeeded. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_sscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. note: we do not sort the ifail portion of iwork. + ! it may not be initialized (if stdlib_sstemr/stdlib_sstein succeeded), and we do + ! not return this detailed information to the user. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) SSYSV computes the solution to a real system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_ssysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*), b(ldb,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_sytrf, lwkopt_sytrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda SSYTRF_AA: computes the factorization of a real symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_ssytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(sp), intent(inout) :: a(lda,*) + real(sp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + real(sp) :: alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'SSYTRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_slasyf_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_sswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j, j+1 ) + a( j, j+1 ) = one + call stdlib_scopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_sgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_sgemv( 'NO TRANSPOSE', mj, jb+1,-one, work( j3-j1+1+k1*n ), & + n,a( j1-k2, j3 ), 1,one, a( j3, j3 ), lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_sgemm + call stdlib_sgemm( 'TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-one, a( j1-& + k2, j2 ), lda,work( j3-j1+1+k1*n ), n,one, a( j2, j3 ), lda ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_scopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**t using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_scopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_slasyf; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_slasyf_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_sswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j+1, j ) + a( j+1, j ) = one + call stdlib_scopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_sgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_sgemv( 'NO TRANSPOSE', mj, jb+1,-one, work( j3-j1+1+k1*n ), & + n,a( j3, j1-k2 ), lda,one, a( j3, j3 ), 1 ) + j3 = j3 + 1 + end do + ! update off-diagonal block in j2-th block column with stdlib_sgemm + call stdlib_sgemm( 'NO TRANSPOSE', 'TRANSPOSE',n-j3+1, nj, jb+1,-one, work(& + j3-j1+1+k1*n ), n,a( j2, j1-k2 ), lda,one, a( j3, j2 ), lda ) + end do + ! recover t( j+1, j ) + a( j+1, j ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_scopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_ssytrf_aa + + + +end module stdlib_linalg_lapack_s diff --git a/src/stdlib_linalg_lapack_w.fypp b/src/stdlib_linalg_lapack_w.fypp new file mode 100644 index 000000000..a685fd87f --- /dev/null +++ b/src/stdlib_linalg_lapack_w.fypp @@ -0,0 +1,81218 @@ +#:include "common.fypp" +#:if WITH_QP +module stdlib_linalg_lapack_w + use stdlib_linalg_constants + use stdlib_linalg_blas + use stdlib_linalg_lapack_aux + use stdlib_linalg_lapack_s + use stdlib_linalg_lapack_c + use stdlib_linalg_lapack_d + use stdlib_linalg_lapack_z + use stdlib_linalg_lapack_q + implicit none(type,external) + private + + + public :: sp,dp,qp,lk,ilp + public :: stdlib_wlag2w + public :: stdlib_wbbcsd + public :: stdlib_wbdsqr + public :: stdlib_wcgesv + public :: stdlib_wcposv + public :: stdlib_wdrscl + public :: stdlib_wgbbrd + public :: stdlib_wgbcon + public :: stdlib_wgbequ + public :: stdlib_wgbequb + public :: stdlib_wgbrfs + public :: stdlib_wgbsv + public :: stdlib_wgbsvx + public :: stdlib_wgbtf2 + public :: stdlib_wgbtrf + public :: stdlib_wgbtrs + public :: stdlib_wgebak + public :: stdlib_wgebal + public :: stdlib_wgebd2 + public :: stdlib_wgebrd + public :: stdlib_wgecon + public :: stdlib_wgeequ + public :: stdlib_wgeequb + public :: stdlib_wgees + public :: stdlib_wgeesx + public :: stdlib_wgeev + public :: stdlib_wgeevx + public :: stdlib_wgehd2 + public :: stdlib_wgehrd + public :: stdlib_wgejsv + public :: stdlib_wgelq + public :: stdlib_wgelq2 + public :: stdlib_wgelqf + public :: stdlib_wgelqt + public :: stdlib_wgelqt3 + public :: stdlib_wgels + public :: stdlib_wgelsd + public :: stdlib_wgelss + public :: stdlib_wgelsy + public :: stdlib_wgemlq + public :: stdlib_wgemlqt + public :: stdlib_wgemqr + public :: stdlib_wgemqrt + public :: stdlib_wgeql2 + public :: stdlib_wgeqlf + public :: stdlib_wgeqp3 + public :: stdlib_wgeqr + public :: stdlib_wgeqr2 + public :: stdlib_wgeqr2p + public :: stdlib_wgeqrf + public :: stdlib_wgeqrfp + public :: stdlib_wgeqrt + public :: stdlib_wgeqrt2 + public :: stdlib_wgeqrt3 + public :: stdlib_wgerfs + public :: stdlib_wgerq2 + public :: stdlib_wgerqf + public :: stdlib_wgesc2 + public :: stdlib_wgesdd + public :: stdlib_wgesv + public :: stdlib_wgesvd + public :: stdlib_wgesvdq + public :: stdlib_wgesvj + public :: stdlib_wgesvx + public :: stdlib_wgetc2 + public :: stdlib_wgetf2 + public :: stdlib_wgetrf + public :: stdlib_wgetrf2 + public :: stdlib_wgetri + public :: stdlib_wgetrs + public :: stdlib_wgetsls + public :: stdlib_wgetsqrhrt + public :: stdlib_wggbak + public :: stdlib_wggbal + public :: stdlib_wgges + public :: stdlib_wgges3 + public :: stdlib_wggesx + public :: stdlib_wggev + public :: stdlib_wggev3 + public :: stdlib_wggevx + public :: stdlib_wggglm + public :: stdlib_wgghd3 + public :: stdlib_wgghrd + public :: stdlib_wgglse + public :: stdlib_wggqrf + public :: stdlib_wggrqf + public :: stdlib_wgsvj0 + public :: stdlib_wgsvj1 + public :: stdlib_wgtcon + public :: stdlib_wgtrfs + public :: stdlib_wgtsv + public :: stdlib_wgtsvx + public :: stdlib_wgttrf + public :: stdlib_wgttrs + public :: stdlib_wgtts2 + public :: stdlib_whb2st_kernels + public :: stdlib_whbev + public :: stdlib_whbevd + public :: stdlib_whbevx + public :: stdlib_whbgst + public :: stdlib_whbgv + public :: stdlib_whbgvd + public :: stdlib_whbgvx + public :: stdlib_whbtrd + public :: stdlib_whecon + public :: stdlib_whecon_rook + public :: stdlib_wheequb + public :: stdlib_wheev + public :: stdlib_wheevd + public :: stdlib_wheevr + public :: stdlib_wheevx + public :: stdlib_whegs2 + public :: stdlib_whegst + public :: stdlib_whegv + public :: stdlib_whegvd + public :: stdlib_whegvx + public :: stdlib_wherfs + public :: stdlib_whesv + public :: stdlib_whesv_aa + public :: stdlib_whesv_rk + public :: stdlib_whesv_rook + public :: stdlib_whesvx + public :: stdlib_wheswapr + public :: stdlib_whetd2 + public :: stdlib_whetf2 + public :: stdlib_whetf2_rk + public :: stdlib_whetf2_rook + public :: stdlib_whetrd + public :: stdlib_whetrd_hb2st + public :: stdlib_whetrd_he2hb + public :: stdlib_whetrf + public :: stdlib_whetrf_aa + public :: stdlib_whetrf_rk + public :: stdlib_whetrf_rook + public :: stdlib_whetri + public :: stdlib_whetri_rook + public :: stdlib_whetrs + public :: stdlib_whetrs2 + public :: stdlib_whetrs_3 + public :: stdlib_whetrs_aa + public :: stdlib_whetrs_rook + public :: stdlib_whfrk + public :: stdlib_whgeqz + public :: stdlib_whpcon + public :: stdlib_whpev + public :: stdlib_whpevd + public :: stdlib_whpevx + public :: stdlib_whpgst + public :: stdlib_whpgv + public :: stdlib_whpgvd + public :: stdlib_whpgvx + public :: stdlib_whprfs + public :: stdlib_whpsv + public :: stdlib_whpsvx + public :: stdlib_whptrd + public :: stdlib_whptrf + public :: stdlib_whptri + public :: stdlib_whptrs + public :: stdlib_whsein + public :: stdlib_whseqr + public :: stdlib_wla_gbamv + public :: stdlib_wla_gbrcond_c + public :: stdlib_wla_gbrpvgrw + public :: stdlib_wla_geamv + public :: stdlib_wla_gercond_c + public :: stdlib_wla_gerpvgrw + public :: stdlib_wla_heamv + public :: stdlib_wla_hercond_c + public :: stdlib_wla_herpvgrw + public :: stdlib_wla_lin_berr + public :: stdlib_wla_porcond_c + public :: stdlib_wla_porpvgrw + public :: stdlib_wla_syamv + public :: stdlib_wla_syrcond_c + public :: stdlib_wla_syrpvgrw + public :: stdlib_wla_wwaddw + public :: stdlib_wlabrd + public :: stdlib_wlacgv + public :: stdlib_wlacn2 + public :: stdlib_wlacon + public :: stdlib_wlacp2 + public :: stdlib_wlacpy + public :: stdlib_wlacrm + public :: stdlib_wlacrt + public :: stdlib_wladiv + public :: stdlib_wlaed0 + public :: stdlib_wlaed7 + public :: stdlib_wlaed8 + public :: stdlib_wlaein + public :: stdlib_wlaesy + public :: stdlib_wlaev2 + public :: stdlib_wlag2c + public :: stdlib_wlags2 + public :: stdlib_wlagtm + public :: stdlib_wlahef + public :: stdlib_wlahef_aa + public :: stdlib_wlahef_rk + public :: stdlib_wlahef_rook + public :: stdlib_wlahqr + public :: stdlib_wlahr2 + public :: stdlib_wlaic1 + public :: stdlib_wlals0 + public :: stdlib_wlalsa + public :: stdlib_wlalsd + public :: stdlib_wlamswlq + public :: stdlib_wlamtsqr + public :: stdlib_wlangb + public :: stdlib_wlange + public :: stdlib_wlangt + public :: stdlib_wlanhb + public :: stdlib_wlanhe + public :: stdlib_wlanhf + public :: stdlib_wlanhp + public :: stdlib_wlanhs + public :: stdlib_wlanht + public :: stdlib_wlansb + public :: stdlib_wlansp + public :: stdlib_wlansy + public :: stdlib_wlantb + public :: stdlib_wlantp + public :: stdlib_wlantr + public :: stdlib_wlapll + public :: stdlib_wlapmr + public :: stdlib_wlapmt + public :: stdlib_wlaqgb + public :: stdlib_wlaqge + public :: stdlib_wlaqhb + public :: stdlib_wlaqhe + public :: stdlib_wlaqhp + public :: stdlib_wlaqp2 + public :: stdlib_wlaqps + public :: stdlib_wlaqr0 + public :: stdlib_wlaqr1 + public :: stdlib_wlaqr2 + public :: stdlib_wlaqr3 + public :: stdlib_wlaqr4 + public :: stdlib_wlaqr5 + public :: stdlib_wlaqsb + public :: stdlib_wlaqsp + public :: stdlib_wlaqsy + public :: stdlib_wlaqz0 + public :: stdlib_wlaqz1 + public :: stdlib_wlaqz2 + public :: stdlib_wlaqz3 + public :: stdlib_wlar1v + public :: stdlib_wlar2v + public :: stdlib_wlarcm + public :: stdlib_wlarf + public :: stdlib_wlarfb + public :: stdlib_wlarfb_gett + public :: stdlib_wlarfg + public :: stdlib_wlarfgp + public :: stdlib_wlarft + public :: stdlib_wlarfx + public :: stdlib_wlarfy + public :: stdlib_wlargv + public :: stdlib_wlarnv + public :: stdlib_wlarrv + public :: stdlib_wlartg + public :: stdlib_wlartv + public :: stdlib_wlarz + public :: stdlib_wlarzb + public :: stdlib_wlarzt + public :: stdlib_wlascl + public :: stdlib_wlaset + public :: stdlib_wlasr + public :: stdlib_wlassq + public :: stdlib_wlaswlq + public :: stdlib_wlaswp + public :: stdlib_wlasyf + public :: stdlib_wlasyf_aa + public :: stdlib_wlasyf_rk + public :: stdlib_wlasyf_rook + public :: stdlib_wlat2c + public :: stdlib_wlatbs + public :: stdlib_wlatdf + public :: stdlib_wlatps + public :: stdlib_wlatrd + public :: stdlib_wlatrs + public :: stdlib_wlatrz + public :: stdlib_wlatsqr + public :: stdlib_wlaunhr_col_getrfnp + public :: stdlib_wlaunhr_col_getrfnp2 + public :: stdlib_wlauu2 + public :: stdlib_wlauum + public :: stdlib_wpbcon + public :: stdlib_wpbequ + public :: stdlib_wpbrfs + public :: stdlib_wpbstf + public :: stdlib_wpbsv + public :: stdlib_wpbsvx + public :: stdlib_wpbtf2 + public :: stdlib_wpbtrf + public :: stdlib_wpbtrs + public :: stdlib_wpftrf + public :: stdlib_wpftri + public :: stdlib_wpftrs + public :: stdlib_wpocon + public :: stdlib_wpoequ + public :: stdlib_wpoequb + public :: stdlib_wporfs + public :: stdlib_wposv + public :: stdlib_wposvx + public :: stdlib_wpotf2 + public :: stdlib_wpotrf + public :: stdlib_wpotrf2 + public :: stdlib_wpotri + public :: stdlib_wpotrs + public :: stdlib_wppcon + public :: stdlib_wppequ + public :: stdlib_wpprfs + public :: stdlib_wppsv + public :: stdlib_wppsvx + public :: stdlib_wpptrf + public :: stdlib_wpptri + public :: stdlib_wpptrs + public :: stdlib_wpstf2 + public :: stdlib_wpstrf + public :: stdlib_wptcon + public :: stdlib_wpteqr + public :: stdlib_wptrfs + public :: stdlib_wptsv + public :: stdlib_wptsvx + public :: stdlib_wpttrf + public :: stdlib_wpttrs + public :: stdlib_wptts2 + public :: stdlib_wrot + public :: stdlib_wspcon + public :: stdlib_wspmv + public :: stdlib_wspr + public :: stdlib_wsprfs + public :: stdlib_wspsv + public :: stdlib_wspsvx + public :: stdlib_wsptrf + public :: stdlib_wsptri + public :: stdlib_wsptrs + public :: stdlib_wstedc + public :: stdlib_wstegr + public :: stdlib_wstein + public :: stdlib_wstemr + public :: stdlib_wsteqr + public :: stdlib_wsycon + public :: stdlib_wsycon_rook + public :: stdlib_wsyconv + public :: stdlib_wsyconvf + public :: stdlib_wsyconvf_rook + public :: stdlib_wsyequb + public :: stdlib_wsymv + public :: stdlib_wsyr + public :: stdlib_wsyrfs + public :: stdlib_wsysv + public :: stdlib_wsysv_aa + public :: stdlib_wsysv_rk + public :: stdlib_wsysv_rook + public :: stdlib_wsysvx + public :: stdlib_wsyswapr + public :: stdlib_wsytf2 + public :: stdlib_wsytf2_rk + public :: stdlib_wsytf2_rook + public :: stdlib_wsytrf + public :: stdlib_wsytrf_aa + public :: stdlib_wsytrf_rk + public :: stdlib_wsytrf_rook + public :: stdlib_wsytri + public :: stdlib_wsytri_rook + public :: stdlib_wsytrs + public :: stdlib_wsytrs2 + public :: stdlib_wsytrs_3 + public :: stdlib_wsytrs_aa + public :: stdlib_wsytrs_rook + public :: stdlib_wtbcon + public :: stdlib_wtbrfs + public :: stdlib_wtbtrs + public :: stdlib_wtfsm + public :: stdlib_wtftri + public :: stdlib_wtfttp + public :: stdlib_wtfttr + public :: stdlib_wtgevc + public :: stdlib_wtgex2 + public :: stdlib_wtgexc + public :: stdlib_wtgsen + public :: stdlib_wtgsja + public :: stdlib_wtgsna + public :: stdlib_wtgsy2 + public :: stdlib_wtgsyl + public :: stdlib_wtpcon + public :: stdlib_wtplqt + public :: stdlib_wtplqt2 + public :: stdlib_wtpmlqt + public :: stdlib_wtpmqrt + public :: stdlib_wtpqrt + public :: stdlib_wtpqrt2 + public :: stdlib_wtprfb + public :: stdlib_wtprfs + public :: stdlib_wtptri + public :: stdlib_wtptrs + public :: stdlib_wtpttf + public :: stdlib_wtpttr + public :: stdlib_wtrcon + public :: stdlib_wtrevc + public :: stdlib_wtrevc3 + public :: stdlib_wtrexc + public :: stdlib_wtrrfs + public :: stdlib_wtrsen + public :: stdlib_wtrsna + public :: stdlib_wtrsyl + public :: stdlib_wtrti2 + public :: stdlib_wtrtri + public :: stdlib_wtrtrs + public :: stdlib_wtrttf + public :: stdlib_wtrttp + public :: stdlib_wtzrzf + public :: stdlib_wunbdb + public :: stdlib_wunbdb1 + public :: stdlib_wunbdb2 + public :: stdlib_wunbdb3 + public :: stdlib_wunbdb4 + public :: stdlib_wunbdb5 + public :: stdlib_wunbdb6 + public :: stdlib_wuncsd + public :: stdlib_wuncsd2by1 + public :: stdlib_wung2l + public :: stdlib_wung2r + public :: stdlib_wungbr + public :: stdlib_wunghr + public :: stdlib_wungl2 + public :: stdlib_wunglq + public :: stdlib_wungql + public :: stdlib_wungqr + public :: stdlib_wungr2 + public :: stdlib_wungrq + public :: stdlib_wungtr + public :: stdlib_wungtsqr + public :: stdlib_wungtsqr_row + public :: stdlib_wunhr_col + public :: stdlib_wunm22 + public :: stdlib_wunm2l + public :: stdlib_wunm2r + public :: stdlib_wunmbr + public :: stdlib_wunmhr + public :: stdlib_wunml2 + public :: stdlib_wunmlq + public :: stdlib_wunmql + public :: stdlib_wunmqr + public :: stdlib_wunmr2 + public :: stdlib_wunmr3 + public :: stdlib_wunmrq + public :: stdlib_wunmrz + public :: stdlib_wunmtr + public :: stdlib_wupgtr + public :: stdlib_wupmtr + + ! 128-bit real constants + real(qp), parameter, private :: negone = -1.00_qp + real(qp), parameter, private :: zero = 0.00_qp + real(qp), parameter, private :: half = 0.50_qp + real(qp), parameter, private :: one = 1.00_qp + real(qp), parameter, private :: two = 2.00_qp + real(qp), parameter, private :: three = 3.00_qp + real(qp), parameter, private :: four = 4.00_qp + real(qp), parameter, private :: eight = 8.00_qp + real(qp), parameter, private :: ten = 10.00_qp + + ! 128-bit complex constants + complex(qp), parameter, private :: czero = ( 0.0_qp,0.0_qp) + complex(qp), parameter, private :: chalf = ( 0.5_qp,0.0_qp) + complex(qp), parameter, private :: cone = ( 1.0_qp,0.0_qp) + complex(qp), parameter, private :: cnegone = (-1.0_qp,0.0_qp) + + ! 128-bit scaling constants + integer, parameter, private :: maxexp = maxexponent(zero) + integer, parameter, private :: minexp = minexponent(zero) + real(qp), parameter, private :: rradix = real(radix(zero),qp) + real(qp), parameter, private :: ulp = epsilon(zero) + real(qp), parameter, private :: eps = ulp*half + real(qp), parameter, private :: safmin = rradix**max(minexp-1,1-maxexp) + real(qp), parameter, private :: safmax = one/safmin + real(qp), parameter, private :: smlnum = safmin/ulp + real(qp), parameter, private :: bignum = safmax*ulp + real(qp), parameter, private :: rtmin = sqrt(smlnum) + real(qp), parameter, private :: rtmax = sqrt(bignum) + + ! 128-bit Blue's scaling constants + ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(qp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(qp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(qp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(qp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + + !> ZLAG2W: converts a COMPLEX matrix, SA, to a COMPLEX*16 matrix, A. + !> Note that while it is possible to overflow while converting + !> from double to single, it is not possible to overflow when + !> converting from single to double. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_wlag2w( m, n, sa, ldsa, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + complex(qp), intent(in) :: sa(ldsa,*) + complex(qp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + info = 0 + do j = 1, n + do i = 1, m + a( i, j ) = sa( i, j ) + end do + end do + return + end subroutine stdlib_wlag2w + + !> ZBBCSD: computes the CS decomposition of a unitary matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See ZUNCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The unitary matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + + pure subroutine stdlib_wbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & + lrwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q + ! Array Arguments + real(qp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& + b22e(*), rwork(*) + real(qp), intent(inout) :: phi(*), theta(*) + complex(qp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) + + ! =================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 6 + real(qp), parameter :: hundred = 100.0_qp + real(qp), parameter :: meighth = -0.125_qp + real(qp), parameter :: piover2 = 1.57079632679489661923132169163975144210_qp + + + + + ! Local Scalars + logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & + wantu2, wantv1t, wantv2t + integer(ilp) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & + iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini + real(qp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & + sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 + ! Intrinsic Functions + intrinsic :: abs,atan2,cos,max,min,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lrwork == -1 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( m < 0 ) then + info = -6 + else if( p < 0 .or. p > m ) then + info = -7 + else if( q < 0 .or. q > m ) then + info = -8 + else if( q > p .or. q > m-p .or. q > m-q ) then + info = -8 + else if( wantu1 .and. ldu1 < p ) then + info = -12 + else if( wantu2 .and. ldu2 < m-p ) then + info = -14 + else if( wantv1t .and. ldv1t < q ) then + info = -16 + else if( wantv2t .and. ldv2t < m-q ) then + info = -18 + end if + ! quick return if q = 0 + if( info == 0 .and. q == 0 ) then + lrworkmin = 1 + rwork(1) = lrworkmin + return + end if + ! compute workspace + if( info == 0 ) then + iu1cs = 1 + iu1sn = iu1cs + q + iu2cs = iu1sn + q + iu2sn = iu2cs + q + iv1tcs = iu2sn + q + iv1tsn = iv1tcs + q + iv2tcs = iv1tsn + q + iv2tsn = iv2tcs + q + lrworkopt = iv2tsn + q - 1 + lrworkmin = lrworkopt + rwork(1) = lrworkopt + if( lrwork < lrworkmin .and. .not. lquery ) then + info = -28 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZBBCSD', -info ) + return + else if( lquery ) then + return + end if + ! get machine constants + eps = stdlib_qlamch( 'EPSILON' ) + unfl = stdlib_qlamch( 'SAFE MINIMUM' ) + tolmul = max( ten, min( hundred, eps**meighth ) ) + tol = tolmul*eps + thresh = max( tol, maxitr*q*q*unfl ) + ! test for negligible sines or cosines + do i = 1, q + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = 1, q-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! initial deflation + imax = q + do while( imax > 1 ) + if( phi(imax-1) /= zero ) then + exit + end if + imax = imax - 1 + end do + imin = imax - 1 + if ( imin > 1 ) then + do while( phi(imin-1) /= zero ) + imin = imin - 1 + if ( imin <= 1 ) exit + end do + end if + ! initialize iteration counter + maxit = maxitr*q*q + iter = 0 + ! begin main iteration loop + do while( imax > 1 ) + ! compute the matrix entries + b11d(imin) = cos( theta(imin) ) + b21d(imin) = -sin( theta(imin) ) + do i = imin, imax - 1 + b11e(i) = -sin( theta(i) ) * sin( phi(i) ) + b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) + b12d(i) = sin( theta(i) ) * cos( phi(i) ) + b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) + b21e(i) = -cos( theta(i) ) * sin( phi(i) ) + b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) + b22d(i) = cos( theta(i) ) * cos( phi(i) ) + b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) + end do + b12d(imax) = sin( theta(imax) ) + b22d(imax) = cos( theta(imax) ) + ! abort if not converging; otherwise, increment iter + if( iter > maxit ) then + info = 0 + do i = 1, q + if( phi(i) /= zero )info = info + 1 + end do + return + end if + iter = iter + imax - imin + ! compute shifts + thetamax = theta(imin) + thetamin = theta(imin) + do i = imin+1, imax + if( theta(i) > thetamax )thetamax = theta(i) + if( theta(i) < thetamin )thetamin = theta(i) + end do + if( thetamax > piover2 - thresh ) then + ! zero on diagonals of b11 and b22; induce deflation with a + ! zero shift + mu = zero + nu = one + else if( thetamin < thresh ) then + ! zero on diagonals of b12 and b22; induce deflation with a + ! zero shift + mu = one + nu = zero + else + ! compute shifts for b11 and b21 and use the lesser + call stdlib_qlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) + + call stdlib_qlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) + + if( sigma11 <= sigma21 ) then + mu = sigma11 + nu = sqrt( one - mu**2 ) + if( mu < thresh ) then + mu = zero + nu = one + end if + else + nu = sigma21 + mu = sqrt( 1.0_qp - nu**2 ) + if( nu < thresh ) then + mu = one + nu = zero + end if + end if + end if + ! rotate to produce bulges in b11 and b21 + if( mu <= nu ) then + call stdlib_qlartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1) ) + else + call stdlib_qlartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1) ) + end if + temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) + b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) + + b11d(imin) = temp + b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) + b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) + temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) + b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) + + b21d(imin) = temp + b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) + b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) + ! compute theta(imin) + theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),sqrt( b11d(imin)**2+& + b11bulge**2 ) ) + ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) + if( b11d(imin)**2+b11bulge**2 > thresh**2 ) then + call stdlib_qlartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& + 1), r ) + else if( mu <= nu ) then + call stdlib_qlartgs( b11e( imin ), b11d( imin + 1 ), mu,rwork(iu1cs+imin-1), & + rwork(iu1sn+imin-1) ) + else + call stdlib_qlartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1) ) + end if + if( b21d(imin)**2+b21bulge**2 > thresh**2 ) then + call stdlib_qlartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& + 1), r ) + else if( nu < mu ) then + call stdlib_qlartgs( b21e( imin ), b21d( imin + 1 ), nu,rwork(iu2cs+imin-1), & + rwork(iu2sn+imin-1) ) + else + call stdlib_qlartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& + imin-1) ) + end if + rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) + rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) + temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) + b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) + + b11e(imin) = temp + if( imax > imin+1 ) then + b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) + b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) + end if + temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) + b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) + b12d(imin) = temp + b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) + b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) + temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) + b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) + + b21e(imin) = temp + if( imax > imin+1 ) then + b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) + b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) + end if + temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) + b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) + b22d(imin) = temp + b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) + b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) + ! inner loop: chase bulges from b11(imin,imin+2), + ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to + ! bottom-right + do i = imin+1, imax-1 + ! compute phi(i-1) + x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) + x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge + y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) + y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge + phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11e(i-1)**2 + b11bulge**2 <= thresh**2 + restart21 = b21e(i-1)**2 + b21bulge**2 <= thresh**2 + restart12 = b12d(i-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), + ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart21 ) then + call stdlib_qlartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) + else if( .not. restart11 .and. restart21 ) then + call stdlib_qlartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& + r ) + else if( restart11 .and. .not. restart21 ) then + call stdlib_qlartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& + r ) + else if( mu <= nu ) then + call stdlib_qlartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& + ) + else + call stdlib_qlartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& + ) + end if + rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) + rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_qlartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& + 1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& + 1-1), r ) + else if( nu < mu ) then + call stdlib_qlartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& + i-1-1) ) + else + call stdlib_qlartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& + i-1-1) ) + end if + temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) + b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) + b11d(i) = temp + b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) + b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) + temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) + b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) + b21d(i) = temp + b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) + b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) + temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) + b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) + b12e(i-1) = temp + b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) + b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) + temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) + b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) + b22e(i-1) = temp + b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) + b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) + ! compute theta(i) + x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) + x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge + y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) + y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge + theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11d(i)**2 + b11bulge**2 <= thresh**2 + restart12 = b12e(i-1)**2 + b12bulge**2 <= thresh**2 + restart21 = b21d(i)**2 + b21bulge**2 <= thresh**2 + restart22 = b22e(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), + ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart12 ) then + call stdlib_qlartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) + else if( .not. restart11 .and. restart12 ) then + call stdlib_qlartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) + + else if( restart11 .and. .not. restart12 ) then + call stdlib_qlartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & + r ) + else if( mu <= nu ) then + call stdlib_qlartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& + ) + else + call stdlib_qlartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) + + end if + if( .not. restart21 .and. .not. restart22 ) then + call stdlib_qlartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) + else if( .not. restart21 .and. restart22 ) then + call stdlib_qlartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) + + else if( restart21 .and. .not. restart22 ) then + call stdlib_qlartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & + r ) + else if( nu < mu ) then + call stdlib_qlartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& + ) + else + call stdlib_qlartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) + + end if + rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) + rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) + temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) + b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) + b11e(i) = temp + if( i < imax - 1 ) then + b11bulge = rwork(iu1sn+i-1)*b11e(i+1) + b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) + end if + temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) + b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) + b21e(i) = temp + if( i < imax - 1 ) then + b21bulge = rwork(iu2sn+i-1)*b21e(i+1) + b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) + end if + temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) + b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) + b12d(i) = temp + b12bulge = rwork(iu1sn+i-1)*b12d(i+1) + b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) + temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) + b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) + b22d(i) = temp + b22bulge = rwork(iu2sn+i-1)*b22d(i+1) + b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) + end do + ! compute phi(imax-1) + x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) + y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) + y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge + phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) ) + ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) + restart12 = b12d(imax-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(imax-1)**2 + b22bulge**2 <= thresh**2 + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( y2, y1, rwork(iv2tsn+imax-1-1),rwork(iv2tcs+imax-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_qlartgp( b12bulge, b12d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& + imax-1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_qlartgp( b22bulge, b22d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& + imax-1-1), r ) + else if( nu < mu ) then + call stdlib_qlartgs( b12e(imax-1), b12d(imax), nu,rwork(iv2tcs+imax-1-1),rwork(& + iv2tsn+imax-1-1) ) + else + call stdlib_qlartgs( b22e(imax-1), b22d(imax), mu,rwork(iv2tcs+imax-1-1),rwork(& + iv2tsn+imax-1-1) ) + end if + temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +rwork(iv2tsn+imax-1-1)*b12d(imax) + + b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -rwork(iv2tsn+imax-1-1)*b12e(imax-1) + + b12e(imax-1) = temp + temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +rwork(iv2tsn+imax-1-1)*b22d(imax) + + b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -rwork(iv2tsn+imax-1-1)*b22e(imax-1) + + b22e(imax-1) = temp + ! update singular vectors + if( wantu1 ) then + if( colmajor ) then + call stdlib_wlasr( 'R', 'V', 'F', p, imax-imin+1,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1),u1(1,imin), ldu1 ) + else + call stdlib_wlasr( 'L', 'V', 'F', imax-imin+1, p,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1),u1(imin,1), ldu1 ) + end if + end if + if( wantu2 ) then + if( colmajor ) then + call stdlib_wlasr( 'R', 'V', 'F', m-p, imax-imin+1,rwork(iu2cs+imin-1), rwork(& + iu2sn+imin-1),u2(1,imin), ldu2 ) + else + call stdlib_wlasr( 'L', 'V', 'F', imax-imin+1, m-p,rwork(iu2cs+imin-1), rwork(& + iu2sn+imin-1),u2(imin,1), ldu2 ) + end if + end if + if( wantv1t ) then + if( colmajor ) then + call stdlib_wlasr( 'L', 'V', 'F', imax-imin+1, q,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1),v1t(imin,1), ldv1t ) + else + call stdlib_wlasr( 'R', 'V', 'F', q, imax-imin+1,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1),v1t(1,imin), ldv1t ) + end if + end if + if( wantv2t ) then + if( colmajor ) then + call stdlib_wlasr( 'L', 'V', 'F', imax-imin+1, m-q,rwork(iv2tcs+imin-1), & + rwork(iv2tsn+imin-1),v2t(imin,1), ldv2t ) + else + call stdlib_wlasr( 'R', 'V', 'F', m-q, imax-imin+1,rwork(iv2tcs+imin-1), & + rwork(iv2tsn+imin-1),v2t(1,imin), ldv2t ) + end if + end if + ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) + if( b11e(imax-1)+b21e(imax-1) > 0 ) then + b11d(imax) = -b11d(imax) + b21d(imax) = -b21d(imax) + if( wantv1t ) then + if( colmajor ) then + call stdlib_wscal( q, cnegone, v1t(imax,1), ldv1t ) + else + call stdlib_wscal( q, cnegone, v1t(1,imax), 1 ) + end if + end if + end if + ! compute theta(imax) + x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) + y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) + theta(imax) = atan2( abs(y1), abs(x1) ) + ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), + ! and b22(imax,imax-1) + if( b11d(imax)+b12e(imax-1) < 0 ) then + b12d(imax) = -b12d(imax) + if( wantu1 ) then + if( colmajor ) then + call stdlib_wscal( p, cnegone, u1(1,imax), 1 ) + else + call stdlib_wscal( p, cnegone, u1(imax,1), ldu1 ) + end if + end if + end if + if( b21d(imax)+b22e(imax-1) > 0 ) then + b22d(imax) = -b22d(imax) + if( wantu2 ) then + if( colmajor ) then + call stdlib_wscal( m-p, cnegone, u2(1,imax), 1 ) + else + call stdlib_wscal( m-p, cnegone, u2(imax,1), ldu2 ) + end if + end if + end if + ! fix signs on b12(imax,imax) and b22(imax,imax) + if( b12d(imax)+b22d(imax) < 0 ) then + if( wantv2t ) then + if( colmajor ) then + call stdlib_wscal( m-q, cnegone, v2t(imax,1), ldv2t ) + else + call stdlib_wscal( m-q, cnegone, v2t(1,imax), 1 ) + end if + end if + end if + ! test for negligible sines or cosines + do i = imin, imax + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = imin, imax-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! deflate + if (imax > 1) then + do while( phi(imax-1) == zero ) + imax = imax - 1 + if (imax <= 1) exit + end do + end if + if( imin > imax - 1 )imin = imax - 1 + if (imin > 1) then + do while (phi(imin-1) /= zero) + imin = imin - 1 + if (imin <= 1) exit + end do + end if + ! repeat main iteration loop + end do + ! postprocessing: order theta from least to greatest + do i = 1, q + mini = i + thetamin = theta(i) + do j = i+1, q + if( theta(j) < thetamin ) then + mini = j + thetamin = theta(j) + end if + end do + if( mini /= i ) then + theta(mini) = theta(i) + theta(i) = thetamin + if( colmajor ) then + if( wantu1 )call stdlib_wswap( p, u1(1,i), 1, u1(1,mini), 1 ) + if( wantu2 )call stdlib_wswap( m-p, u2(1,i), 1, u2(1,mini), 1 ) + if( wantv1t )call stdlib_wswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t ) + + if( wantv2t )call stdlib_wswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),ldv2t ) + + else + if( wantu1 )call stdlib_wswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 ) + if( wantu2 )call stdlib_wswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 ) + if( wantv1t )call stdlib_wswap( q, v1t(1,i), 1, v1t(1,mini), 1 ) + if( wantv2t )call stdlib_wswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 ) + end if + end if + end do + return + end subroutine stdlib_wbbcsd + + !> ZBDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**H + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**H*VT instead of + !> P**H, for given complex input matrices U and VT. When U and VT are + !> the unitary matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by ZGEBRD, then + !> A = (U*Q) * S * (P**H*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**H*C + !> for a given complex input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + + pure subroutine stdlib_wbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, rwork,& + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: c(ldc,*), u(ldu,*), vt(ldvt,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: hndrth = 0.01_qp + real(qp), parameter :: hndrd = 100.0_qp + real(qp), parameter :: meigth = -0.125_qp + integer(ilp), parameter :: maxitr = 6 + + + + + + + + + ! Local Scalars + logical(lk) :: lower, rotate + integer(ilp) :: i, idir, isub, iter, j, ll, lll, m, maxit, nm1, nm12, nm13, oldll, & + oldm + real(qp) :: abse, abss, cosl, cosr, cs, eps, f, g, h, mu, oldcs, oldsn, r, shift, & + sigmn, sigmx, sinl, sinr, sll, smax, smin, sminl, sminoa, sn, thresh, tol, tolmul, & + unfl + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.lower ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ncvt<0 ) then + info = -3 + else if( nru<0 ) then + info = -4 + else if( ncc<0 ) then + info = -5 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + ! if no singular vectors desired, use qd algorithm + if( .not.rotate ) then + call stdlib_qlasq1( n, d, e, rwork, info ) + ! if info equals 2, dqds didn't finish, try to finish + if( info /= 2 ) return + info = 0 + end if + nm1 = n - 1 + nm12 = nm1 + nm1 + nm13 = nm12 + nm1 + idir = 0 + ! get machine constants + eps = stdlib_qlamch( 'EPSILON' ) + unfl = stdlib_qlamch( 'SAFE MINIMUM' ) + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left + if( lower ) then + do i = 1, n - 1 + call stdlib_qlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + rwork( i ) = cs + rwork( nm1+i ) = sn + end do + ! update singular vectors if desired + if( nru>0 )call stdlib_wlasr( 'R', 'V', 'F', nru, n, rwork( 1 ), rwork( n ),u, ldu ) + + if( ncc>0 )call stdlib_wlasr( 'L', 'V', 'F', n, ncc, rwork( 1 ), rwork( n ),c, ldc ) + + end if + ! compute singular values to relative accuracy tol + ! (by setting tol to be negative, algorithm will compute + ! singular values to absolute accuracy abs(tol)*norm(input matrix)) + tolmul = max( ten, min( hndrd, eps**meigth ) ) + tol = tolmul*eps + ! compute approximate maximum, minimum singular values + smax = zero + do i = 1, n + smax = max( smax, abs( d( i ) ) ) + end do + do i = 1, n - 1 + smax = max( smax, abs( e( i ) ) ) + end do + sminl = zero + if( tol>=zero ) then + ! relative accuracy desired + sminoa = abs( d( 1 ) ) + if( sminoa==zero )go to 50 + mu = sminoa + do i = 2, n + mu = abs( d( i ) )*( mu / ( mu+abs( e( i-1 ) ) ) ) + sminoa = min( sminoa, mu ) + if( sminoa==zero )go to 50 + end do + 50 continue + sminoa = sminoa / sqrt( real( n,KIND=qp) ) + thresh = max( tol*sminoa, maxitr*n*n*unfl ) + else + ! absolute accuracy desired + thresh = max( abs( tol )*smax, maxitr*n*n*unfl ) + end if + ! prepare for main iteration loop for the singular values + ! (maxit is the maximum number of passes through the inner + ! loop permitted before nonconvergence signalled.) + maxit = maxitr*n*n + iter = 0 + oldll = -1 + oldm = -1 + ! m points to last element of unconverged part of matrix + m = n + ! begin main iteration loop + 60 continue + ! check for convergence or exceeding iteration count + if( m<=1 )go to 160 + if( iter>maxit )go to 200 + ! find diagonal block of matrix to work on + if( tol0 )call stdlib_wdrot( ncvt, vt( m-1, 1 ), ldvt, vt( m, 1 ), ldvt,cosr, & + sinr ) + if( nru>0 )call stdlib_wdrot( nru, u( 1, m-1 ), 1, u( 1, m ), 1, cosl, sinl ) + + if( ncc>0 )call stdlib_wdrot( ncc, c( m-1, 1 ), ldc, c( m, 1 ), ldc, cosl,sinl ) + + m = m - 2 + go to 60 + end if + ! if working on new submatrix, choose shift direction + ! (from larger end diagonal element towards smaller) + if( ll>oldm .or. m=abs( d( m ) ) ) then + ! chase bulge from top (big end) to bottom (small end) + idir = 1 + else + ! chase bulge from bottom (big end) to top (small end) + idir = 2 + end if + end if + ! apply convergence tests + if( idir==1 ) then + ! run convergence test in forward direction + ! first apply standard test to bottom of matrix + if( abs( e( m-1 ) )<=abs( tol )*abs( d( m ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion forward + mu = abs( d( ll ) ) + sminl = mu + do lll = ll, m - 1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll+1 ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + else + ! run convergence test in backward direction + ! first apply standard test to top of matrix + if( abs( e( ll ) )<=abs( tol )*abs( d( ll ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion backward + mu = abs( d( m ) ) + sminl = mu + do lll = m - 1, ll, -1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + end if + oldll = ll + oldm = m + ! compute shift. first, test if shifting would ruin relative + ! accuracy, and if so set the shift to zero. + if( tol>=zero .and. n*tol*( sminl / smax )<=max( eps, hndrth*tol ) ) then + ! use a zero shift to avoid loss of relative accuracy + shift = zero + else + ! compute the shift from 2-by-2 block at end of matrix + if( idir==1 ) then + sll = abs( d( ll ) ) + call stdlib_qlas2( d( m-1 ), e( m-1 ), d( m ), shift, r ) + else + sll = abs( d( m ) ) + call stdlib_qlas2( d( ll ), e( ll ), d( ll+1 ), shift, r ) + end if + ! test if shift negligible, and if so set to zero + if( sll>zero ) then + if( ( shift / sll )**2ll )e( i-1 ) = oldsn*r + call stdlib_qlartg( oldcs*r, d( i+1 )*sn, oldcs, oldsn, d( i ) ) + rwork( i-ll+1 ) = cs + rwork( i-ll+1+nm1 ) = sn + rwork( i-ll+1+nm12 ) = oldcs + rwork( i-ll+1+nm13 ) = oldsn + end do + h = d( m )*cs + d( m ) = h*oldcs + e( m-1 ) = h*oldsn + ! update singular vectors + if( ncvt>0 )call stdlib_wlasr( 'L', 'V', 'F', m-ll+1, ncvt, rwork( 1 ),rwork( n )& + , vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_wlasr( 'R', 'V', 'F', nru, m-ll+1, rwork( nm12+1 ),rwork( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_wlasr( 'L', 'V', 'F', m-ll+1, ncc, rwork( nm12+1 ),rwork( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + cs = one + oldcs = one + do i = m, ll + 1, -1 + call stdlib_qlartg( d( i )*cs, e( i-1 ), cs, sn, r ) + if( i0 )call stdlib_wlasr( 'L', 'V', 'B', m-ll+1, ncvt, rwork( nm12+1 ),& + rwork( nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_wlasr( 'R', 'V', 'B', nru, m-ll+1, rwork( 1 ),rwork( n ), & + u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_wlasr( 'L', 'V', 'B', m-ll+1, ncc, rwork( 1 ),rwork( n ), & + c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + end if + else + ! use nonzero shift + if( idir==1 ) then + ! chase bulge from top to bottom + ! save cosines and sines for later singular vector updates + f = ( abs( d( ll ) )-shift )*( sign( one, d( ll ) )+shift / d( ll ) ) + g = e( ll ) + do i = ll, m - 1 + call stdlib_qlartg( f, g, cosr, sinr, r ) + if( i>ll )e( i-1 ) = r + f = cosr*d( i ) + sinr*e( i ) + e( i ) = cosr*e( i ) - sinr*d( i ) + g = sinr*d( i+1 ) + d( i+1 ) = cosr*d( i+1 ) + call stdlib_qlartg( f, g, cosl, sinl, r ) + d( i ) = r + f = cosl*e( i ) + sinl*d( i+1 ) + d( i+1 ) = cosl*d( i+1 ) - sinl*e( i ) + if( i0 )call stdlib_wlasr( 'L', 'V', 'F', m-ll+1, ncvt, rwork( 1 ),rwork( n )& + , vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_wlasr( 'R', 'V', 'F', nru, m-ll+1, rwork( nm12+1 ),rwork( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_wlasr( 'L', 'V', 'F', m-ll+1, ncc, rwork( nm12+1 ),rwork( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + f = ( abs( d( m ) )-shift )*( sign( one, d( m ) )+shift /d( m ) ) + g = e( m-1 ) + do i = m, ll + 1, -1 + call stdlib_qlartg( f, g, cosr, sinr, r ) + if( ill+1 ) then + g = sinl*e( i-2 ) + e( i-2 ) = cosl*e( i-2 ) + end if + rwork( i-ll ) = cosr + rwork( i-ll+nm1 ) = -sinr + rwork( i-ll+nm12 ) = cosl + rwork( i-ll+nm13 ) = -sinl + end do + e( ll ) = f + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + ! update singular vectors if desired + if( ncvt>0 )call stdlib_wlasr( 'L', 'V', 'B', m-ll+1, ncvt, rwork( nm12+1 ),& + rwork( nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_wlasr( 'R', 'V', 'B', nru, m-ll+1, rwork( 1 ),rwork( n ), & + u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_wlasr( 'L', 'V', 'B', m-ll+1, ncc, rwork( 1 ),rwork( n ), & + c( ll, 1 ), ldc ) + end if + end if + ! qr iteration finished, go back and check convergence + go to 60 + ! all singular values converged, so make them positive + 160 continue + do i = 1, n + if( d( i )0 )call stdlib_wdscal( ncvt, negone, vt( i, 1 ), ldvt ) + end if + end do + ! sort the singular values into decreasing order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n - 1 + ! scan for smallest d(i) + isub = 1 + smin = d( 1 ) + do j = 2, n + 1 - i + if( d( j )<=smin ) then + isub = j + smin = d( j ) + end if + end do + if( isub/=n+1-i ) then + ! swap singular values and vectors + d( isub ) = d( n+1-i ) + d( n+1-i ) = smin + if( ncvt>0 )call stdlib_wswap( ncvt, vt( isub, 1 ), ldvt, vt( n+1-i, 1 ),ldvt ) + + if( nru>0 )call stdlib_wswap( nru, u( 1, isub ), 1, u( 1, n+1-i ), 1 ) + if( ncc>0 )call stdlib_wswap( ncc, c( isub, 1 ), ldc, c( n+1-i, 1 ), ldc ) + + end if + end do + go to 220 + ! maximum number of iterations exceeded, failure to converge + 200 continue + info = 0 + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + 220 continue + return + end subroutine stdlib_wbdsqr + + !> ZCGESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> ZCGESV first attempts to factorize the matrix in COMPLEX and use this + !> factorization within an iterative refinement procedure to produce a + !> solution with COMPLEX*16 normwise backward error quality (see below). + !> If the approach fails the method switches to a COMPLEX*16 + !> factorization and solve. + !> The iterative refinement is not going to be a winning strategy if + !> the ratio COMPLEX performance over COMPLEX*16 performance is too + !> small. A reasonable strategy should take the number of right-hand + !> sides and the size of the matrix into account. This might be done + !> with a call to ILAENV in the future. Up to now, we always try + !> iterative refinement. + !> The iterative refinement process is stopped if + !> ITER > ITERMAX + !> or for all the RHS we have: + !> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX + !> where + !> o ITER is the number of the current iteration in the iterative + !> refinement process + !> o RNRM is the infinity-norm of the residual + !> o XNRM is the infinity-norm of the solution + !> o ANRM is the infinity-operator-norm of the matrix A + !> o EPS is the machine epsilon returned by DLAMCH('Epsilon') + !> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 + !> respectively. + + subroutine stdlib_wcgesv( n, nrhs, a, lda, ipiv, b, ldb, x, ldx, work,swork, rwork, iter, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, iter + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(qp), intent(out) :: rwork(*) + complex(dp), intent(out) :: swork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: b(ldb,*) + complex(qp), intent(out) :: work(n,*), x(ldx,*) + ! ===================================================================== + ! Parameters + logical(lk), parameter :: doitref = .true. + integer(ilp), parameter :: itermax = 30 + real(qp), parameter :: bwdmax = 1.0e+00_qp + + + + + ! Local Scalars + integer(ilp) :: i, iiter, ptsa, ptsx + real(qp) :: anrm, cte, eps, rnrm, xnrm + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + iter = 0 + ! test the input parameters. + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldaxnrm*cte )go to 10 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion. we are good to exit. + iter = 0 + return + 10 continue + loop_30: do iiter = 1, itermax + ! convert r (in work) from quad precision to double precision + ! and store the result in sx. + call stdlib_wlag2c( n, nrhs, work, n, swork( ptsx ), n, info ) + if( info/=0 ) then + iter = -2 + go to 40 + end if + ! solve the system sa*sx = sr. + call stdlib_zgetrs( 'NO TRANSPOSE', n, nrhs, swork( ptsa ), n, ipiv,swork( ptsx ), & + n, info ) + ! convert sx back to quad precision and update the current + ! iterate. + call stdlib_zlag2w( n, nrhs, swork( ptsx ), n, work, n, info ) + do i = 1, nrhs + call stdlib_waxpy( n, cone, work( 1, i ), 1, x( 1, i ), 1 ) + end do + ! compute r = b - ax (r is work). + call stdlib_wlacpy( 'ALL', n, nrhs, b, ldb, work, n ) + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n, nrhs, n, cnegone,a, lda, x, & + ldx, cone, work, n ) + ! check whether the nrhs normwise backward errors satisfy the + ! stopping criterion. if yes, set iter=iiter>0 and return. + do i = 1, nrhs + xnrm = cabs1( x( stdlib_iwamax( n, x( 1, i ), 1 ), i ) ) + rnrm = cabs1( work( stdlib_iwamax( n, work( 1, i ), 1 ), i ) ) + if( rnrm>xnrm*cte )go to 20 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion, we are good to exit. + iter = iiter + return + 20 continue + end do loop_30 + ! if we are at this place of the code, this is because we have + ! performed iter=itermax iterations and never satisfied the stopping + ! criterion, set up the iter flag accordingly and follow up on double + ! precision routine. + iter = -itermax - 1 + 40 continue + ! single-precision iterative refinement failed to converge to a + ! satisfactory solution, so we resort to quad precision. + call stdlib_wgetrf( n, n, a, lda, ipiv, info ) + if( info/=0 )return + call stdlib_wlacpy( 'ALL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wgetrs( 'NO TRANSPOSE', n, nrhs, a, lda, ipiv, x, ldx,info ) + return + end subroutine stdlib_wcgesv + + !> ZCPOSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> ZCPOSV first attempts to factorize the matrix in COMPLEX and use this + !> factorization within an iterative refinement procedure to produce a + !> solution with COMPLEX*16 normwise backward error quality (see below). + !> If the approach fails the method switches to a COMPLEX*16 + !> factorization and solve. + !> The iterative refinement is not going to be a winning strategy if + !> the ratio COMPLEX performance over COMPLEX*16 performance is too + !> small. A reasonable strategy should take the number of right-hand + !> sides and the size of the matrix into account. This might be done + !> with a call to ILAENV in the future. Up to now, we always try + !> iterative refinement. + !> The iterative refinement process is stopped if + !> ITER > ITERMAX + !> or for all the RHS we have: + !> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX + !> where + !> o ITER is the number of the current iteration in the iterative + !> refinement process + !> o RNRM is the infinity-norm of the residual + !> o XNRM is the infinity-norm of the solution + !> o ANRM is the infinity-operator-norm of the matrix A + !> o EPS is the machine epsilon returned by DLAMCH('Epsilon') + !> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 + !> respectively. + + subroutine stdlib_wcposv( uplo, n, nrhs, a, lda, b, ldb, x, ldx, work,swork, rwork, iter, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, iter + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(dp), intent(out) :: swork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: b(ldb,*) + complex(qp), intent(out) :: work(n,*), x(ldx,*) + ! ===================================================================== + ! Parameters + logical(lk), parameter :: doitref = .true. + integer(ilp), parameter :: itermax = 30 + real(qp), parameter :: bwdmax = 1.0e+00_qp + + + + + ! Local Scalars + integer(ilp) :: i, iiter, ptsa, ptsx + real(qp) :: anrm, cte, eps, rnrm, xnrm + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + iter = 0 + ! test the input parameters. + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldaxnrm*cte )go to 10 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion. we are good to exit. + iter = 0 + return + 10 continue + loop_30: do iiter = 1, itermax + ! convert r (in work) from quad precision to double precision + ! and store the result in sx. + call stdlib_wlag2c( n, nrhs, work, n, swork( ptsx ), n, info ) + if( info/=0 ) then + iter = -2 + go to 40 + end if + ! solve the system sa*sx = sr. + call stdlib_zpotrs( uplo, n, nrhs, swork( ptsa ), n, swork( ptsx ), n,info ) + ! convert sx back to quad precision and update the current + ! iterate. + call stdlib_zlag2w( n, nrhs, swork( ptsx ), n, work, n, info ) + do i = 1, nrhs + call stdlib_waxpy( n, cone, work( 1, i ), 1, x( 1, i ), 1 ) + end do + ! compute r = b - ax (r is work). + call stdlib_wlacpy( 'ALL', n, nrhs, b, ldb, work, n ) + call stdlib_whemm( 'L', uplo, n, nrhs, cnegone, a, lda, x, ldx, cone,work, n ) + + ! check whether the nrhs normwise backward errors satisfy the + ! stopping criterion. if yes, set iter=iiter>0 and return. + do i = 1, nrhs + xnrm = cabs1( x( stdlib_iwamax( n, x( 1, i ), 1 ), i ) ) + rnrm = cabs1( work( stdlib_iwamax( n, work( 1, i ), 1 ), i ) ) + if( rnrm>xnrm*cte )go to 20 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion, we are good to exit. + iter = iiter + return + 20 continue + end do loop_30 + ! if we are at this place of the code, this is because we have + ! performed iter=itermax iterations and never satisfied the + ! stopping criterion, set up the iter flag accordingly and follow + ! up on quad precision routine. + iter = -itermax - 1 + 40 continue + ! single-precision iterative refinement failed to converge to a + ! satisfactory solution, so we resort to quad precision. + call stdlib_wpotrf( uplo, n, a, lda, info ) + if( info/=0 )return + call stdlib_wlacpy( 'ALL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wpotrs( uplo, n, nrhs, a, lda, x, ldx, info ) + return + end subroutine stdlib_wcposv + + !> ZDRSCL: multiplies an n-element complex vector x by the real scalar + !> 1/a. This is done without overflow or underflow as long as + !> the final result x/a does not overflow or underflow. + + pure subroutine stdlib_wdrscl( n, sa, sx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(qp), intent(in) :: sa + ! Array Arguments + complex(qp), intent(inout) :: sx(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + real(qp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 )return + ! get machine parameters + smlnum = stdlib_qlamch( 'S' ) + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! initialize the denominator to sa and the numerator to 1. + cden = sa + cnum = one + 10 continue + cden1 = cden*smlnum + cnum1 = cnum / bignum + if( abs( cden1 )>abs( cnum ) .and. cnum/=zero ) then + ! pre-multiply x by smlnum if cden is large compared to cnum. + mul = smlnum + done = .false. + cden = cden1 + else if( abs( cnum1 )>abs( cden ) ) then + ! pre-multiply x by bignum if cden is small compared to cnum. + mul = bignum + done = .false. + cnum = cnum1 + else + ! multiply x by cnum / cden and return. + mul = cnum / cden + done = .true. + end if + ! scale the vector x by mul + call stdlib_wdscal( n, mul, sx, incx ) + if( .not.done )go to 10 + return + end subroutine stdlib_wdrscl + + !> ZGBBRD: reduces a complex general m-by-n band matrix A to real upper + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> The routine computes B, and optionally forms Q or P**H, or computes + !> Q**H*C for a given matrix C. + + pure subroutine stdlib_wgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & + ldc, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc + ! Array Arguments + real(qp), intent(out) :: d(*), e(*), rwork(*) + complex(qp), intent(inout) :: ab(ldab,*), c(ldc,*) + complex(qp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: wantb, wantc, wantpt, wantq + integer(ilp) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,& + mu0, nr, nrt + real(qp) :: abst, rc + complex(qp) :: ra, rb, rs, t + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min + ! Executable Statements + ! test the input parameters + wantb = stdlib_lsame( vect, 'B' ) + wantq = stdlib_lsame( vect, 'Q' ) .or. wantb + wantpt = stdlib_lsame( vect, 'P' ) .or. wantb + wantc = ncc>0 + klu1 = kl + ku + 1 + info = 0 + if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncc<0 ) then + info = -4 + else if( kl<0 ) then + info = -5 + else if( ku<0 ) then + info = -6 + else if( ldab1 ) then + ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce + ! first to lower bidiagonal form and then transform to upper + ! bidiagonal + if( ku>0 ) then + ml0 = 1 + mu0 = 2 + else + ml0 = 2 + mu0 = 1 + end if + ! wherever possible, plane rotations are generated and applied in + ! vector operations of length nr over the index set j1:j2:klu1. + ! the complex sines of the plane rotations are stored in work, + ! and the real cosines in rwork. + klm = min( m-1, kl ) + kun = min( n-1, ku ) + kb = klm + kun + kb1 = kb + 1 + inca = kb1*ldab + nr = 0 + j1 = klm + 2 + j2 = 1 - kun + loop_90: do i = 1, minmn + ! reduce i-th column and i-th row of matrix to bidiagonal form + ml = klm + 1 + mu = kun + 1 + loop_80: do kk = 1, kb + j1 = j1 + kb + j2 = j2 + kb + ! generate plane rotations to annihilate nonzero elements + ! which have been created below the band + if( nr>0 )call stdlib_wlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & + rwork( j1 ), kb1 ) + ! apply plane rotations from the left + do l = 1, kb + if( j2-klm+l-1>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_wlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & + klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 ) + end do + if( ml>ml0 ) then + if( ml<=m-i+1 ) then + ! generate plane rotation to annihilate a(i+ml-1,i) + ! within the band, and apply rotation from the left + call stdlib_wlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), & + work( i+ml-1 ), ra ) + ab( ku+ml-1, i ) = ra + if( in ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j-1,j+ku) above the band + ! and store it in work(n+1:2*n) + work( j+kun ) = work( j )*ab( 1, j+kun ) + ab( 1, j+kun ) = rwork( j )*ab( 1, j+kun ) + end do + ! generate plane rotations to annihilate nonzero elements + ! which have been generated above the band + if( nr>0 )call stdlib_wlargv( nr, ab( 1, j1+kun-1 ), inca,work( j1+kun ), kb1,& + rwork( j1+kun ),kb1 ) + ! apply plane rotations from the right + do l = 1, kb + if( j2+l-1>m ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_wlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& + kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 ) + end do + if( ml==ml0 .and. mu>mu0 ) then + if( mu<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+mu-1) + ! within the band, and apply rotation from the right + call stdlib_wlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( & + i+mu-1 ), work( i+mu-1 ), ra ) + ab( ku-mu+3, i+mu-2 ) = ra + call stdlib_wrot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1,ab( ku-& + mu+3, i+mu-1 ), 1,rwork( i+mu-1 ), work( i+mu-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kb1 + end if + if( wantpt ) then + ! accumulate product of plane rotations in p**h + do j = j1, j2, kb1 + call stdlib_wrot( n, pt( j+kun-1, 1 ), ldpt,pt( j+kun, 1 ), ldpt, rwork(& + j+kun ),conjg( work( j+kun ) ) ) + end do + end if + if( j2+kb>m ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j+kl+ku,j+ku-1) below the + ! band and store it in work(1:n) + work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) + ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun ) + end do + if( ml>ml0 ) then + ml = ml - 1 + else + mu = mu - 1 + end if + end do loop_80 + end do loop_90 + end if + if( ku==0 .and. kl>0 ) then + ! a has been reduced to complex lower bidiagonal form + ! transform lower bidiagonal form to upper bidiagonal by applying + ! plane rotations from the left, overwriting superdiagonal + ! elements on subdiagonal elements + do i = 1, min( m-1, n ) + call stdlib_wlartg( ab( 1, i ), ab( 2, i ), rc, rs, ra ) + ab( 1, i ) = ra + if( i0 .and. m1 ) then + rb = -conjg( rs )*ab( ku, i ) + ab( ku, i ) = rc*ab( ku, i ) + end if + if( wantpt )call stdlib_wrot( n, pt( i, 1 ), ldpt, pt( m+1, 1 ), ldpt,rc, & + conjg( rs ) ) + end do + end if + end if + ! make diagonal and superdiagonal elements real, storing them in d + ! and e + t = ab( ku+1, 1 ) + loop_120: do i = 1, minmn + abst = abs( t ) + d( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( wantq )call stdlib_wscal( m, t, q( 1, i ), 1 ) + if( wantc )call stdlib_wscal( ncc, conjg( t ), c( i, 1 ), ldc ) + if( i ZGBCON: estimates the reciprocal of the condition number of a complex + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by ZGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_wgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, rwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, onenrm + character :: normin + integer(ilp) :: ix, j, jp, kase, kase1, kd, lm + real(qp) :: ainvnm, scale, smlnum + complex(qp) :: t, zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( anorm0 + kase = 0 + 10 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(l). + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + jp = ipiv( j ) + t = work( jp ) + if( jp/=j ) then + work( jp ) = work( j ) + work( j ) = t + end if + call stdlib_waxpy( lm, -t, ab( kd+1, j ), 1, work( j+1 ), 1 ) + end do + end if + ! multiply by inv(u). + call stdlib_wlatbs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, & + ldab, work, scale, rwork, info ) + else + ! multiply by inv(u**h). + call stdlib_wlatbs( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',normin, n, kl+ku, & + ab, ldab, work, scale, rwork,info ) + ! multiply by inv(l**h). + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + work( j ) = work( j ) - stdlib_wdotc( lm, ab( kd+1, j ), 1,work( j+1 ), 1 ) + + jp = ipiv( j ) + if( jp/=j ) then + t = work( jp ) + work( jp ) = work( j ) + work( j ) = t + end if + end do + end if + end if + ! divide x by 1/scale if doing so will not cause overflow. + normin = 'Y' + if( scale/=one ) then + ix = stdlib_iwamax( n, work, 1 ) + if( scale ZGBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_wgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(out) :: c(*), r(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(qp) :: bignum, rcmax, rcmin, smlnum + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab ZGBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from ZGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_wgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(out) :: c(*), r(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(qp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,log,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabzero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = max( j-ku, 1 ), min( j+kl, m ) + c( j ) = max( c( j ), cabs1( ab( kd+i-j, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_wgbequb + + !> ZGBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_wgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, x, & + ldx, ferr, berr, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, k, kase, kk, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv, work, n,info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_wgbtrs( transt, n, kl, ku, 1, afb, ldafb, ipiv,work, n, info ) + + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wgbtrs( transn, n, kl, ku, 1, afb, ldafb, ipiv,work, n, info ) + + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wgbrfs + + !> ZGBSV: computes the solution to a complex system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_wgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( kl<0 ) then + info = -2 + else if( ku<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( ldb ZGBSVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a band matrix of order N with KL subdiagonals and KU + !> superdiagonals, and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wgbsvx( fact, trans, n, kl, ku, nrhs, ab, ldab, afb,ldafb, ipiv, equed, r, & + c, b, ldb, x, ldx,rcond, ferr, berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(inout) :: c(*), r(*) + complex(qp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + ! moved setting of info = n+1 so info does not subsequently get + ! overwritten. sven, 17 mar 05. + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j, j1, j2 + real(qp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kl<0 ) then + info = -4 + else if( ku<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -14 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + anorm = zero + do j = 1, info + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + anorm = max( anorm, abs( ab( i, j ) ) ) + end do + end do + rpvgrw = stdlib_wlantb( 'M', 'U', 'N', info, min( info-1, kl+ku ),afb( max( 1, & + kl+ku+2-info ), 1 ), ldafb,rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = anorm / rpvgrw + end if + rwork( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_wlangb( norm, n, kl, ku, ab, ldab, rwork ) + rpvgrw = stdlib_wlantb( 'M', 'U', 'N', n, kl+ku, afb, ldafb, rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_wlangb( 'M', n, kl, ku, ab, ldab, rwork ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_wgbcon( norm, n, kl, ku, afb, ldafb, ipiv, anorm, rcond,work, rwork, info ) + + ! compute the solution matrix x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wgbtrs( trans, n, kl, ku, nrhs, afb, ldafb, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_wgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb, ipiv,b, ldb, x, ldx, & + ferr, berr, work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZGBTF2: computes an LU factorization of a complex m-by-n band matrix + !> A using partial pivoting with row interchanges. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_wgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jp, ju, km, kv + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in. + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab0 ) then + ! compute multipliers. + call stdlib_wscal( km, cone / ab( kv+1, j ), ab( kv+2, j ), 1 ) + ! update trailing submatrix within the band. + if( ju>j )call stdlib_wgeru( km, ju-j, -cone, ab( kv+2, j ), 1,ab( kv, j+1 ), & + ldab-1, ab( kv+1, j+1 ),ldab-1 ) + end if + else + ! if pivot is czero, set info to the index of the pivot + ! unless a czero pivot has already been found. + if( info==0 )info = j + end if + end do loop_40 + return + end subroutine stdlib_wgbtf2 + + !> ZGBTRF: computes an LU factorization of a complex m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp, ju, k2, km, kv, nb, & + nw + complex(qp) :: temp + ! Local Arrays + complex(qp) :: work13(ldwork,nbmax), work31(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabkl ) then + ! use unblocked code + call stdlib_wgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + else + ! use blocked code + ! czero the superdiagonal elements of the work array work13 + do j = 1, nb + do i = 1, j - 1 + work13( i, j ) = czero + end do + end do + ! czero the subdiagonal elements of the work array work31 + do j = 1, nb + do i = j + 1, nb + work31( i, j ) = czero + end do + end do + ! gaussian elimination with partial pivoting + ! set fill-in elements in columns ku+2 to kv to czero + do j = ku + 2, min( kv, n ) + do i = kv - j + 2, kl + ab( i, j ) = czero + end do + end do + ! ju is the index of the last column affected by the current + ! stage of the factorization + ju = 1 + loop_180: do j = 1, min( m, n ), nb + jb = min( nb, min( m, n )-j+1 ) + ! the active part of the matrix is partitioned + ! a11 a12 a13 + ! a21 a22 a23 + ! a31 a32 a33 + ! here a11, a21 and a31 denote the current block of jb columns + ! which is about to be factorized. the number of rows in the + ! partitioning are jb, i2, i3 respectively, and the numbers + ! of columns are jb, j2, j3. the superdiagonal elements of a13 + ! and the subdiagonal elements of a31 lie outside the band. + i2 = min( kl-jb, m-j-jb+1 ) + i3 = min( jb, m-j-kl+1 ) + ! j2 and j3 are computed after ju has been updated. + ! factorize the current block of jb columns + loop_80: do jj = j, j + jb - 1 + ! set fill-in elements in column jj+kv to czero + if( jj+kv<=n ) then + do i = 1, kl + ab( i, jj+kv ) = czero + end do + end if + ! find pivot and test for singularity. km is the number of + ! subdiagonal elements in the current column. + km = min( kl, m-jj ) + jp = stdlib_iwamax( km+1, ab( kv+1, jj ), 1 ) + ipiv( jj ) = jp + jj - j + if( ab( kv+jp, jj )/=czero ) then + ju = max( ju, min( jj+ku+jp-1, n ) ) + if( jp/=1 ) then + ! apply interchange to columns j to j+jb-1 + if( jp+jj-1jj )call stdlib_wgeru( km, jm-jj, -cone, ab( kv+2, jj ), 1,ab( kv, & + jj+1 ), ldab-1,ab( kv+1, jj+1 ), ldab-1 ) + else + ! if pivot is czero, set info to the index of the pivot + ! unless a czero pivot has already been found. + if( info==0 )info = jj + end if + ! copy current column of a31 into the work array work31 + nw = min( jj-j+1, i3 ) + if( nw>0 )call stdlib_wcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,work31( 1, jj-j+1 )& + , 1 ) + end do loop_80 + if( j+jb<=n ) then + ! apply the row interchanges to the other blocks. + j2 = min( ju-j+1, kv ) - jb + j3 = max( 0, ju-j-kv+1 ) + ! use stdlib_wlaswp to apply the row interchanges to a12, a22, and + ! a32. + call stdlib_wlaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,ipiv( j ), 1 ) + + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + ! apply the row interchanges to a13, a23, and a33 + ! columnwise. + k2 = j - 1 + jb + j2 + do i = 1, j3 + jj = k2 + i + do ii = j + i - 1, j + jb - 1 + ip = ipiv( ii ) + if( ip/=ii ) then + temp = ab( kv+1+ii-jj, jj ) + ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj ) + ab( kv+1+ip-jj, jj ) = temp + end if + end do + end do + ! update the relevant part of the trailing submatrix + if( j2>0 ) then + ! update a12 + call stdlib_wtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j2, cone, & + ab( kv+1, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1 ) + if( i2>0 ) then + ! update a22 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j2,jb, -cone, ab(& + kv+1+jb, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1, cone,ab( kv+1, j+jb )& + , ldab-1 ) + end if + if( i3>0 ) then + ! update a32 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j2,jb, -cone, & + work31, ldwork,ab( kv+1-jb, j+jb ), ldab-1, cone,ab( kv+kl+1-jb, j+jb ),& + ldab-1 ) + end if + end if + if( j3>0 ) then + ! copy the lower triangle of a13 into the work array + ! work13 + do jj = 1, j3 + do ii = jj, jb + work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 ) + end do + end do + ! update a13 in the work array + call stdlib_wtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j3, cone, & + ab( kv+1, j ), ldab-1,work13, ldwork ) + if( i2>0 ) then + ! update a23 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j3,jb, -cone, ab(& + kv+1+jb, j ), ldab-1,work13, ldwork, cone, ab( 1+jb, j+kv ),ldab-1 ) + + end if + if( i3>0 ) then + ! update a33 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j3,jb, -cone, & + work31, ldwork, work13,ldwork, cone, ab( 1+kl, j+kv ), ldab-1 ) + end if + ! copy the lower triangle of a13 back into place + do jj = 1, j3 + do ii = jj, jb + ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj ) + end do + end do + end if + else + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + end if + ! partially undo the interchanges in the current block to + ! restore the upper triangular form of a31 and copy the upper + ! triangle of a31 back into place + do jj = j + jb - 1, j, -1 + jp = ipiv( jj ) - jj + 1 + if( jp/=1 ) then + ! apply interchange to columns j to jj-1 + if( jp+jj-10 )call stdlib_wcopy( nw, work31( 1, jj-j+1 ), 1,ab( kv+kl+1-jj+j, jj )& + , 1 ) + end do + end do loop_180 + end if + return + end subroutine stdlib_wgbtrf + + !> ZGBTRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general band matrix A using the LU factorization computed + !> by ZGBTRF. + + pure subroutine stdlib_wgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, notran + integer(ilp) :: i, j, kd, l, lm + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab<( 2*kl+ku+1 ) ) then + info = -7 + else if( ldb0 + if( notran ) then + ! solve a*x = b. + ! solve l*x = b, overwriting b with x. + ! l is represented as a product of permutations and unit lower + ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1), + ! where each transformation l(i) is a rank-cone modification of + ! the identity matrix. + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + l = ipiv( j ) + if( l/=j )call stdlib_wswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + call stdlib_wgeru( lm, nrhs, -cone, ab( kd+1, j ), 1, b( j, 1 ),ldb, b( j+1, & + 1 ), ldb ) + end do + end if + do i = 1, nrhs + ! solve u*x = b, overwriting b with x. + call stdlib_wtbsv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, kl+ku,ab, ldab, b( 1, & + i ), 1 ) + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! solve a**t * x = b. + do i = 1, nrhs + ! solve u**t * x = b, overwriting b with x. + call stdlib_wtbsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, kl+ku, ab,ldab, b( 1, i )& + , 1 ) + end do + ! solve l**t * x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_wgemv( 'TRANSPOSE', lm, nrhs, -cone, b( j+1, 1 ),ldb, ab( kd+1, j & + ), 1, cone, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_wswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + else + ! solve a**h * x = b. + do i = 1, nrhs + ! solve u**h * x = b, overwriting b with x. + call stdlib_wtbsv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT', n,kl+ku, ab, ldab,& + b( 1, i ), 1 ) + end do + ! solve l**h * x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_wlacgv( nrhs, b( j, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', lm, nrhs, -cone,b( j+1, 1 ), ldb, & + ab( kd+1, j ), 1, cone,b( j, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_wswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_wgbtrs + + !> ZGEBAK: forms the right or left eigenvectors of a complex general + !> matrix by backward transformation on the computed eigenvectors of the + !> balanced matrix output by ZGEBAL. + + pure subroutine stdlib_wgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(in) :: scale(*) + complex(qp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, ii, k + real(qp) :: s + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! decode and test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( m<0 ) then + info = -7 + else if( ldv=ilo .and. i<=ihi )cycle loop_40 + if( i=ilo .and. i<=ihi )cycle loop_50 + if( i ZGEBAL: balances a general complex matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + + pure subroutine stdlib_wgebal( job, n, a, lda, ilo, ihi, scale, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(out) :: scale(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sclfac = 2.0e+0_qp + real(qp), parameter :: factor = 0.95e+0_qp + + + + ! Local Scalars + logical(lk) :: noconv + integer(ilp) :: i, ica, iexc, ira, j, k, l, m + real(qp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2 + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=g .or. max( f, c, ca )>=sfmax2 .or.min( r, g, ra )<=sfmin2 )go to 170 + if( stdlib_qisnan( c+f+ca+r+g+ra ) ) then + ! exit if nan to avoid infinite loop + info = -3 + call stdlib_xerbla( 'ZGEBAL', -info ) + return + end if + f = f*sclfac + c = c*sclfac + ca = ca*sclfac + r = r / sclfac + g = g / sclfac + ra = ra / sclfac + go to 160 + 170 continue + g = c / sclfac + 180 continue + if( g=sfmax2 .or.min( f, c, g, ca )<=sfmin2 )go to 190 + f = f / sclfac + c = c / sclfac + g = g / sclfac + ca = ca / sclfac + r = r*sclfac + ra = ra*sclfac + go to 180 + ! now balance. + 190 continue + if( ( c+r )>=factor*s )cycle loop_200 + if( fone .and. scale( i )>one ) then + if( scale( i )>=sfmax1 / f )cycle loop_200 + end if + g = one / f + scale( i ) = scale( i )*f + noconv = .true. + call stdlib_wdscal( n-k+1, g, a( i, k ), lda ) + call stdlib_wdscal( l, f, a( 1, i ), 1 ) + end do loop_200 + if( noconv )go to 140 + 210 continue + ilo = k + ihi = l + return + end subroutine stdlib_wgebal + + !> ZGEBD2: reduces a complex general m by n matrix A to upper or lower + !> real bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_wgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! reduce to upper bidiagonal form + do i = 1, n + ! generate elementary reflector h(i) to annihilate a(i+1:m,i) + alpha = a( i, i ) + call stdlib_wlarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1,tauq( i ) ) + d( i ) = real( alpha,KIND=qp) + a( i, i ) = cone + ! apply h(i)**h to a(i:m,i+1:n) from the left + if( i ZGEBRD: reduces a general complex M-by-N matrix A to upper or lower + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_wgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb = max( 1, stdlib_ilaenv( 1, 'ZGEBRD', ' ', m, n, -1, -1 ) ) + lwkopt = ( m+n )*nb + work( 1 ) = real( lwkopt,KIND=qp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=( m+n )*nbmin ) then + nb = lwork / ( m+n ) + else + nb = 1 + nx = minmn + end if + end if + end if + else + nx = minmn + end if + do i = 1, minmn - nx, nb + ! reduce rows and columns i:i+ib-1 to bidiagonal form and return + ! the matrices x and y which are needed to update the unreduced + ! part of the matrix + call stdlib_wlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & + taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) + ! update the trailing submatrix a(i+ib:m,i+ib:n), using + ! an update of the form a := a - v*y**h - x*u**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -& + cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda ) + + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, & + work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda ) + ! copy diagonal and off-diagonal elements of b back into a + if( m>=n ) then + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j, j+1 ) = e( j ) + end do + else + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j+1, j ) = e( j ) + end do + end if + end do + ! use unblocked code to reduce the remainder of the matrix + call stdlib_wgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & + work, iinfo ) + work( 1 ) = ws + return + end subroutine stdlib_wgebrd + + !> ZGECON: estimates the reciprocal of the condition number of a general + !> complex matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by ZGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_wgecon( norm, n, a, lda, anorm, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, scale, sl, smlnum, su + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_wgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(out) :: c(*), r(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: bignum, rcmax, rcmin, smlnum + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from ZGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_wgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(out) :: c(*), r(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,log,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldazero ) then + r( i ) = radix**int( log(r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = 1, m + c( j ) = max( c( j ), cabs1( a( i, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_wgeequb + + !> ZGEES: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A complex matrix is in Schur form if it is upper triangular. + + subroutine stdlib_wgees( jobvs, sort, select, n, a, lda, sdim, w, vs,ldvs, work, lwork, & + rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: vs(ldvs,*), w(*), work(*) + ! Function Arguments + procedure(stdlib_select_w) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantst, wantvs + integer(ilp) :: hswork, i, ibal, icond, ierr, ieval, ihi, ilo, itau, iwrk, maxwrk, & + minwrk + real(qp) :: anrm, bignum, cscale, eps, s, sep, smlnum + ! Local Arrays + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_wlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_wgebal( 'P', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = n + itau + call stdlib_wgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_wlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate unitary matrix in vs + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wunghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'S', jobvs, n, ilo, ihi, a, lda, w, vs, ldvs,work( iwrk ), lwork-& + iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea )call stdlib_wlascl( 'G', 0, 0, cscale, anrm, n, 1, w, n, ierr ) + do i = 1, n + bwork( i ) = select( w( i ) ) + end do + ! reorder eigenvalues and transform schur vectors + ! (cworkspace: none) + ! (rworkspace: none) + call stdlib_wtrsen( 'N', jobvs, bwork, n, a, lda, vs, ldvs, w, sdim,s, sep, work( & + iwrk ), lwork-iwrk+1, icond ) + end if + if( wantvs ) then + ! undo balancing + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_wgebak( 'P', 'R', n, ilo, ihi, rwork( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_wlascl( 'U', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_wcopy( n, a, lda+1, w, 1 ) + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_wgees + + !> ZGEESX: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left; + !> computes a reciprocal condition number for the average of the + !> selected eigenvalues (RCONDE); and computes a reciprocal condition + !> number for the right invariant subspace corresponding to the + !> selected eigenvalues (RCONDV). The leading columns of Z form an + !> orthonormal basis for this invariant subspace. + !> For further explanation of the reciprocal condition numbers RCONDE + !> and RCONDV, see Section 4.10_qp of the LAPACK Users' Guide (where + !> these quantities are called s and sep respectively). + !> A complex matrix is in Schur form if it is upper triangular. + + subroutine stdlib_wgeesx( jobvs, sort, select, sense, n, a, lda, sdim, w,vs, ldvs, rconde, & + rcondv, work, lwork, rwork,bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + real(qp), intent(out) :: rconde, rcondv + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: vs(ldvs,*), w(*), work(*) + ! Function Arguments + procedure(stdlib_select_w) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantsb, wantse, wantsn, wantst, wantsv, wantvs + integer(ilp) :: hswork, i, ibal, icond, ierr, ieval, ihi, ilo, itau, iwrk, lwrk, & + maxwrk, minwrk + real(qp) :: anrm, bignum, cscale, eps, smlnum + ! Local Arrays + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_wlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_wgebal( 'P', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = n + itau + call stdlib_wgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_wlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate unitary matrix in vs + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wunghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'S', jobvs, n, ilo, ihi, a, lda, w, vs, ldvs,work( iwrk ), lwork-& + iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea )call stdlib_wlascl( 'G', 0, 0, cscale, anrm, n, 1, w, n, ierr ) + do i = 1, n + bwork( i ) = select( w( i ) ) + end do + ! reorder eigenvalues, transform schur vectors, and compute + ! reciprocal condition numbers + ! (cworkspace: if sense is not 'n', need 2*sdim*(n-sdim) + ! otherwise, need none ) + ! (rworkspace: none) + call stdlib_wtrsen( sense, jobvs, bwork, n, a, lda, vs, ldvs, w, sdim,rconde, & + rcondv, work( iwrk ), lwork-iwrk+1,icond ) + if( .not.wantsn )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( icond==-14 ) then + ! not enough complex workspace + info = -15 + end if + end if + if( wantvs ) then + ! undo balancing + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_wgebak( 'P', 'R', n, ilo, ihi, rwork( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_wlascl( 'U', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_wcopy( n, a, lda+1, w, 1 ) + if( ( wantsv .or. wantsb ) .and. info==0 ) then + dum( 1 ) = rcondv + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + rcondv = dum( 1 ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_wgeesx + + !> ZGEEV: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + + subroutine stdlib_wgeev( jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: vl(ldvl,*), vr(ldvr,*), w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr + character :: side + integer(ilp) :: hswork, i, ibal, ierr, ihi, ilo, irwork, itau, iwrk, k, lwork_trevc, & + maxwrk, minwrk, nout + real(qp) :: anrm, bignum, cscale, eps, scl, smlnum + complex(qp) :: tmp + ! Local Arrays + logical(lk) :: select(1) + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag,max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -1 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_wlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_wgebal( 'B', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = itau + n + call stdlib_wgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_wlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate unitary matrix in vl + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wunghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vl, ldvl,work( iwrk ), lwork-& + iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_wlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_wlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate unitary matrix in vr + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wunghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + else + ! compute eigenvalues only + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'E', 'N', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + end if + ! if info /= 0 from stdlib_whseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (cworkspace: need 2*n, prefer n + 2*n*nb) + ! (rworkspace: need 2*n) + irwork = ibal + n + call stdlib_wtrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1,rwork( irwork ), n, ierr ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_wgebak( 'B', 'L', n, ilo, ihi, rwork( ibal ), n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_qznrm2( n, vl( 1, i ), 1 ) + call stdlib_wdscal( n, scl, vl( 1, i ), 1 ) + do k = 1, n + rwork( irwork+k-1 ) = real( vl( k, i ),KIND=qp)**2 +aimag( vl( k, i ) )& + **2 + end do + k = stdlib_iqamax( n, rwork( irwork ), 1 ) + tmp = conjg( vl( k, i ) ) / sqrt( rwork( irwork+k-1 ) ) + call stdlib_wscal( n, tmp, vl( 1, i ), 1 ) + vl( k, i ) = cmplx( real( vl( k, i ),KIND=qp), zero,KIND=qp) + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_wgebak( 'B', 'R', n, ilo, ihi, rwork( ibal ), n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_qznrm2( n, vr( 1, i ), 1 ) + call stdlib_wdscal( n, scl, vr( 1, i ), 1 ) + do k = 1, n + rwork( irwork+k-1 ) = real( vr( k, i ),KIND=qp)**2 +aimag( vr( k, i ) )& + **2 + end do + k = stdlib_iqamax( n, rwork( irwork ), 1 ) + tmp = conjg( vr( k, i ) ) / sqrt( rwork( irwork+k-1 ) ) + call stdlib_wscal( n, tmp, vr( 1, i ), 1 ) + vr( k, i ) = cmplx( real( vr( k, i ),KIND=qp), zero,KIND=qp) + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_wlascl( 'G', 0, 0, cscale, anrm, n-info, 1, w( info+1 ),max( n-info, 1 )& + , ierr ) + if( info>0 ) then + call stdlib_wlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, w, n, ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_wgeev + + !> ZGEEVX: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues + !> (RCONDE), and reciprocal condition numbers for the right + !> eigenvectors (RCONDV). + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + !> Balancing a matrix means permuting the rows and columns to make it + !> more nearly upper triangular, and applying a diagonal similarity + !> transformation D * A * D**(-1), where D is a diagonal matrix, to + !> make its rows and columns closer in norm and the condition numbers + !> of its eigenvalues and eigenvectors smaller. The computed + !> reciprocal condition numbers correspond to the balanced matrix. + !> Permuting rows and columns will not change the condition numbers + !> (in exact arithmetic) but diagonal scaling will. For further + !> explanation of balancing, see section 4.10.2_qp of the LAPACK + !> Users' Guide. + + subroutine stdlib_wgeevx( balanc, jobvl, jobvr, sense, n, a, lda, w, vl,ldvl, vr, ldvr, ilo, & + ihi, scale, abnrm, rconde,rcondv, work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + real(qp), intent(out) :: abnrm + ! Array Arguments + real(qp), intent(out) :: rconde(*), rcondv(*), rwork(*), scale(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: vl(ldvl,*), vr(ldvr,*), w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr, wntsnb, wntsne, wntsnn, wntsnv + character :: job, side + integer(ilp) :: hswork, i, icond, ierr, itau, iwrk, k, lwork_trevc, maxwrk, minwrk, & + nout + real(qp) :: anrm, bignum, cscale, eps, scl, smlnum + complex(qp) :: tmp + ! Local Arrays + logical(lk) :: select(1) + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag,max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + wntsnn = stdlib_lsame( sense, 'N' ) + wntsne = stdlib_lsame( sense, 'E' ) + wntsnv = stdlib_lsame( sense, 'V' ) + wntsnb = stdlib_lsame( sense, 'B' ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'S' ) & + .or.stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) ) then + info = -1 + else if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -2 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -3 + else if( .not.( wntsnn .or. wntsne .or. wntsnb .or. wntsnv ) .or.( ( wntsne .or. & + wntsnb ) .and. .not.( wantvl .and.wantvr ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_wlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix and compute abnrm + call stdlib_wgebal( balanc, n, a, lda, ilo, ihi, scale, ierr ) + abnrm = stdlib_wlange( '1', n, n, a, lda, dum ) + if( scalea ) then + dum( 1 ) = abnrm + call stdlib_qlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + abnrm = dum( 1 ) + end if + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = itau + n + call stdlib_wgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_wlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate unitary matrix in vl + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wunghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vl, ldvl,work( iwrk ), lwork-& + iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_wlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_wlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate unitary matrix in vr + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wunghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + else + ! compute eigenvalues only + ! if condition numbers desired, compute schur form + if( wntsnn ) then + job = 'E' + else + job = 'S' + end if + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_whseqr( job, 'N', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + end if + ! if info /= 0 from stdlib_whseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (cworkspace: need 2*n, prefer n + 2*n*nb) + ! (rworkspace: need n) + call stdlib_wtrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1,rwork, n, ierr ) + end if + ! compute condition numbers if desired + ! (cworkspace: need n*n+2*n unless sense = 'e') + ! (rworkspace: need 2*n unless sense = 'e') + if( .not.wntsnn ) then + call stdlib_wtrsna( sense, 'A', select, n, a, lda, vl, ldvl, vr, ldvr,rconde, & + rcondv, n, nout, work( iwrk ), n, rwork,icond ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + call stdlib_wgebak( balanc, 'L', n, ilo, ihi, scale, n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_qznrm2( n, vl( 1, i ), 1 ) + call stdlib_wdscal( n, scl, vl( 1, i ), 1 ) + do k = 1, n + rwork( k ) = real( vl( k, i ),KIND=qp)**2 +aimag( vl( k, i ) )**2 + end do + k = stdlib_iqamax( n, rwork, 1 ) + tmp = conjg( vl( k, i ) ) / sqrt( rwork( k ) ) + call stdlib_wscal( n, tmp, vl( 1, i ), 1 ) + vl( k, i ) = cmplx( real( vl( k, i ),KIND=qp), zero,KIND=qp) + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + call stdlib_wgebak( balanc, 'R', n, ilo, ihi, scale, n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_qznrm2( n, vr( 1, i ), 1 ) + call stdlib_wdscal( n, scl, vr( 1, i ), 1 ) + do k = 1, n + rwork( k ) = real( vr( k, i ),KIND=qp)**2 +aimag( vr( k, i ) )**2 + end do + k = stdlib_iqamax( n, rwork, 1 ) + tmp = conjg( vr( k, i ) ) / sqrt( rwork( k ) ) + call stdlib_wscal( n, tmp, vr( 1, i ), 1 ) + vr( k, i ) = cmplx( real( vr( k, i ),KIND=qp), zero,KIND=qp) + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_wlascl( 'G', 0, 0, cscale, anrm, n-info, 1, w( info+1 ),max( n-info, 1 )& + , ierr ) + if( info==0 ) then + if( ( wntsnv .or. wntsnb ) .and. icond==0 )call stdlib_qlascl( 'G', 0, 0, cscale,& + anrm, n, 1, rcondv, n,ierr ) + else + call stdlib_wlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, w, n, ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_wgeevx + + !> ZGEHD2: reduces a complex general matrix A to upper Hessenberg form H + !> by a unitary similarity transformation: Q**H * A * Q = H . + + pure subroutine stdlib_wgehd2( n, ilo, ihi, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda ZGEHRD: reduces a complex general matrix A to upper Hessenberg form H by + !> an unitary similarity transformation: Q**H * A * Q = H . + + pure subroutine stdlib_wgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iwt, j, ldwork, lwkopt, nb, nbmin, nh, nx + complex(qp) :: ei + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda1 .and. nb=(n*nbmin + tsize) ) then + nb = (lwork-tsize) / n + else + nb = 1 + end if + end if + end if + end if + ldwork = n + if( nb=nh ) then + ! use unblocked code below + i = ilo + else + ! use blocked code + iwt = 1 + n*nb + do i = ilo, ihi - 1 - nx, nb + ib = min( nb, ihi-i ) + ! reduce columns i:i+ib-1 to hessenberg form, returning the + ! matrices v and t of the block reflector h = i - v*t*v**h + ! which performs the reduction, and also the matrix y = a*v*t + call stdlib_wlahr2( ihi, i, ib, a( 1, i ), lda, tau( i ),work( iwt ), ldt, work, & + ldwork ) + ! apply the block reflector h to a(1:ihi,i+ib:ihi) from the + ! right, computing a := a - y * v**h. v(i+ib,ib-1) must be set + ! to 1 + ei = a( i+ib, i+ib-1 ) + a( i+ib, i+ib-1 ) = cone + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',ihi, ihi-i-ib+1,ib, -& + cone, work, ldwork, a( i+ib, i ), lda, cone,a( 1, i+ib ), lda ) + a( i+ib, i+ib-1 ) = ei + ! apply the block reflector h to a(1:i,i+1:i+ib-1) from the + ! right + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', i, ib-1,cone, & + a( i+1, i ), lda, work, ldwork ) + do j = 0, ib-2 + call stdlib_waxpy( i, -cone, work( ldwork*j+1 ), 1,a( 1, i+j+1 ), 1 ) + end do + ! apply the block reflector h to a(i+1:ihi,i+ib:n) from the + ! left + call stdlib_wlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE',ihi-i, & + n-i-ib+1, ib, a( i+1, i ), lda,work( iwt ), ldt, a( i+1, i+ib ), lda,work, & + ldwork ) + end do + end if + ! use unblocked code to reduce the rest of the matrix + call stdlib_wgehd2( n, i, ihi, a, lda, tau, work, iinfo ) + work( 1 ) = lwkopt + return + end subroutine stdlib_wgehrd + + !> ZGEJSV: computes the singular value decomposition (SVD) of a complex M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^*, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and + !> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + + pure subroutine stdlib_wgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & + v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldv, lwork, lrwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: u(ldu,*), v(ldv,*), cwork(lwork) + real(qp), intent(out) :: sva(n), rwork(lrwork) + integer(ilp), intent(out) :: iwork(*) + character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv + ! =========================================================================== + + + ! Local Scalars + complex(qp) :: ctemp + real(qp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & + entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc + integer(ilp) :: ierr, n1, nr, numrank, p, q, warning + logical(lk) :: almort, defr, errest, goscal, jracc, kill, lquery, lsvec, l2aber, & + l2kill, l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp + integer(ilp) :: optwrk, minwrk, minrwrk, miniwrk + integer(ilp) :: lwcon, lwlqf, lwqp3, lwqrf, lwunmlq, lwunmqr, lwunmqrm, lwsvdj, & + lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff + integer(ilp) :: lwrk_wgelqf, lwrk_wgeqp3, lwrk_wgeqp3n, lwrk_wgeqrf, lwrk_wgesvj, & + lwrk_wgesvjv, lwrk_wgesvju, lwrk_wunmlq, lwrk_wunmqr, lwrk_wunmqrm + ! Local Arrays + complex(qp) :: cdummy(1) + real(qp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,cmplx,conjg,log,max,min,real,nint,sqrt + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + jracc = stdlib_lsame( jobv, 'J' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc + rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) + l2rank = stdlib_lsame( joba, 'R' ) + l2aber = stdlib_lsame( joba, 'A' ) + errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) + l2tran = stdlib_lsame( jobt, 'T' ) .and. ( m == n ) + l2kill = stdlib_lsame( jobr, 'R' ) + defr = stdlib_lsame( jobr, 'N' ) + l2pert = stdlib_lsame( jobp, 'P' ) + lquery = ( lwork == -1 ) .or. ( lrwork == -1 ) + if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & + then + info = - 1 + else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.( stdlib_lsame( jobu, 'W' ) & + .and. rsvec .and. l2tran ) ) ) then + info = - 2 + else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.( stdlib_lsame( jobv, 'W' ) & + .and. lsvec .and. l2tran ) ) ) then + info = - 3 + else if ( .not. ( l2kill .or. defr ) ) then + info = - 4 + else if ( .not. ( stdlib_lsame(jobt,'T') .or. stdlib_lsame(jobt,'N') ) ) then + info = - 5 + else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = - 6 + else if ( m < 0 ) then + info = - 7 + else if ( ( n < 0 ) .or. ( n > m ) ) then + info = - 8 + else if ( lda < m ) then + info = - 10 + else if ( lsvec .and. ( ldu < m ) ) then + info = - 13 + else if ( rsvec .and. ( ldv < n ) ) then + info = - 15 + else + ! #:) + info = 0 + end if + if ( info == 0 ) then + ! Compute The Minimal And The Optimal Workspace Lengths + ! [[the expressions for computing the minimal and the optimal + ! values of lcwork, lrwork are written with a lot of redundancy and + ! can be simplified. however, this verbose form is useful for + ! maintenance and modifications of the code.]] + ! .. minimal workspace length for stdlib_wgeqp3 of an m x n matrix, + ! stdlib_wgeqrf of an n x n matrix, stdlib_wgelqf of an n x n matrix, + ! stdlib_wunmlq for computing n x n matrix, stdlib_wunmqr for computing n x n + ! matrix, stdlib_wunmqr for computing m x n matrix, respectively. + lwqp3 = n+1 + lwqrf = max( 1, n ) + lwlqf = max( 1, n ) + lwunmlq = max( 1, n ) + lwunmqr = max( 1, n ) + lwunmqrm = max( 1, m ) + ! Minimal Workspace Length For Stdlib_Zpocon Of An N X N Matrix + lwcon = 2 * n + ! .. minimal workspace length for stdlib_wgesvj of an n x n matrix, + ! without and with explicit accumulation of jacobi rotations + lwsvdj = max( 2 * n, 1 ) + lwsvdjv = max( 2 * n, 1 ) + ! .. minimal real workspace length for stdlib_wgeqp3, stdlib_wpocon, stdlib_wgesvj + lrwqp3 = 2 * n + lrwcon = n + lrwsvdj = n + if ( lquery ) then + call stdlib_wgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,rdummy, ierr ) + + lwrk_wgeqp3 = real( cdummy(1),KIND=qp) + call stdlib_wgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr ) + lwrk_wgeqrf = real( cdummy(1),KIND=qp) + call stdlib_wgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr ) + lwrk_wgelqf = real( cdummy(1),KIND=qp) + end if + minwrk = 2 + optwrk = 2 + miniwrk = n + if ( .not. (lsvec .or. rsvec ) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If + ! only the singular values are requested + if ( errest ) then + minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj ) + else + minwrk = max( n+lwqp3, n+lwqrf, lwsvdj ) + end if + if ( lquery ) then + call stdlib_wgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,ldv, cdummy, -1,& + rdummy, -1, ierr ) + lwrk_wgesvj = real( cdummy(1),KIND=qp) + if ( errest ) then + optwrk = max( n+lwrk_wgeqp3, n**2+lwcon,n+lwrk_wgeqrf, lwrk_wgesvj ) + + else + optwrk = max( n+lwrk_wgeqp3, n+lwrk_wgeqrf,lwrk_wgesvj ) + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else if ( rsvec .and. (.not.lsvec) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! singular values and the right singular vectors are requested + if ( errest ) then + minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,2*n+lwqrf, n+lwsvdj, n+& + lwunmlq ) + else + minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,n+lwsvdj, n+lwunmlq ) + + end if + if ( lquery ) then + call stdlib_wgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1, & + rdummy, -1, ierr ) + lwrk_wgesvj = real( cdummy(1),KIND=qp) + call stdlib_wunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -1, & + ierr ) + lwrk_wunmlq = real( cdummy(1),KIND=qp) + if ( errest ) then + optwrk = max( n+lwrk_wgeqp3, lwcon, lwrk_wgesvj,n+lwrk_wgelqf, 2*n+& + lwrk_wgeqrf,n+lwrk_wgesvj, n+lwrk_wunmlq ) + else + optwrk = max( n+lwrk_wgeqp3, lwrk_wgesvj,n+lwrk_wgelqf,2*n+lwrk_wgeqrf, n+& + lwrk_wgesvj,n+lwrk_wunmlq ) + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else if ( lsvec .and. (.not.rsvec) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! singular values and the left singular vectors are requested + if ( errest ) then + minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm ) + else + minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm ) + end if + if ( lquery ) then + call stdlib_wgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1, & + rdummy, -1, ierr ) + lwrk_wgesvj = real( cdummy(1),KIND=qp) + call stdlib_wunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_wunmqrm = real( cdummy(1),KIND=qp) + if ( errest ) then + optwrk = n + max( lwrk_wgeqp3, lwcon, n+lwrk_wgeqrf,lwrk_wgesvj, & + lwrk_wunmqrm ) + else + optwrk = n + max( lwrk_wgeqp3, n+lwrk_wgeqrf,lwrk_wgesvj, lwrk_wunmqrm ) + + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! full svd is requested + if ( .not. jracc ) then + if ( errest ) then + minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,2*n+lwqrf, 2*n+& + lwqp3,2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,2*n+n**2+n+lwsvdj, 2*n+& + n**2+n+lwsvdjv,2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,n+n**2+lwsvdj, n+& + lwunmqrm ) + else + minwrk = max( n+lwqp3, 2*n+n**2+lwcon,2*n+lwqrf, 2*n+& + lwqp3,2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,2*n+n**2+n+lwsvdj, 2*n+& + n**2+n+lwsvdjv,2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,n+n**2+lwsvdj, & + n+lwunmqrm ) + end if + miniwrk = miniwrk + n + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else + if ( errest ) then + minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,2*n+n**2+lwsvdjv, 2*n+n**2+n+& + lwunmqr,n+lwunmqrm ) + else + minwrk = max( n+lwqp3, 2*n+lwqrf,2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,n+& + lwunmqrm ) + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + end if + if ( lquery ) then + call stdlib_wunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_wunmqrm = real( cdummy(1),KIND=qp) + call stdlib_wunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_wunmqr = real( cdummy(1),KIND=qp) + if ( .not. jracc ) then + call stdlib_wgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1,rdummy, ierr ) + + lwrk_wgeqp3n = real( cdummy(1),KIND=qp) + call stdlib_wgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_wgesvj = real( cdummy(1),KIND=qp) + call stdlib_wgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_wgesvju = real( cdummy(1),KIND=qp) + call stdlib_wgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_wgesvjv = real( cdummy(1),KIND=qp) + call stdlib_wunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -& + 1, ierr ) + lwrk_wunmlq = real( cdummy(1),KIND=qp) + if ( errest ) then + optwrk = max( n+lwrk_wgeqp3, n+lwcon,2*n+n**2+lwcon, 2*n+lwrk_wgeqrf,& + 2*n+lwrk_wgeqp3n,2*n+n**2+n+lwrk_wgelqf,2*n+n**2+n+n**2+lwcon,2*n+& + n**2+n+lwrk_wgesvj,2*n+n**2+n+lwrk_wgesvjv,2*n+n**2+n+lwrk_wunmqr,2*n+& + n**2+n+lwrk_wunmlq,n+n**2+lwrk_wgesvju,n+lwrk_wunmqrm ) + else + optwrk = max( n+lwrk_wgeqp3,2*n+n**2+lwcon, 2*n+lwrk_wgeqrf,2*n+& + lwrk_wgeqp3n,2*n+n**2+n+lwrk_wgelqf,2*n+n**2+n+n**2+lwcon,2*n+n**2+n+& + lwrk_wgesvj,2*n+n**2+n+lwrk_wgesvjv,2*n+n**2+n+lwrk_wunmqr,2*n+n**2+n+& + lwrk_wunmlq,n+n**2+lwrk_wgesvju,n+lwrk_wunmqrm ) + end if + else + call stdlib_wgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_wgesvjv = real( cdummy(1),KIND=qp) + call stdlib_wunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,v, ldv, cdummy,& + -1, ierr ) + lwrk_wunmqr = real( cdummy(1),KIND=qp) + call stdlib_wunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -& + 1, ierr ) + lwrk_wunmqrm = real( cdummy(1),KIND=qp) + if ( errest ) then + optwrk = max( n+lwrk_wgeqp3, n+lwcon,2*n+lwrk_wgeqrf, 2*n+n**2,2*n+& + n**2+lwrk_wgesvjv,2*n+n**2+n+lwrk_wunmqr,n+lwrk_wunmqrm ) + else + optwrk = max( n+lwrk_wgeqp3, 2*n+lwrk_wgeqrf,2*n+n**2, 2*n+n**2+& + lwrk_wgesvjv,2*n+n**2+n+lwrk_wunmqr,n+lwrk_wunmqrm ) + end if + end if + end if + if ( l2tran .or. rowpiv ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + end if + end if + minwrk = max( 2, minwrk ) + optwrk = max( minwrk, optwrk ) + if ( lwork < minwrk .and. (.not.lquery) ) info = - 17 + if ( lrwork < minrwrk .and. (.not.lquery) ) info = - 19 + end if + if ( info /= 0 ) then + ! #:( + call stdlib_xerbla( 'ZGEJSV', - info ) + return + else if ( lquery ) then + cwork(1) = optwrk + cwork(2) = minwrk + rwork(1) = minrwrk + iwork(1) = max( 4, miniwrk ) + return + end if + ! quick return for void matrix (y3k safe) + ! #:) + if ( ( m == 0 ) .or. ( n == 0 ) ) then + iwork(1:4) = 0 + rwork(1:7) = 0 + return + endif + ! determine whether the matrix u should be m x n or m x m + if ( lsvec ) then + n1 = n + if ( stdlib_lsame( jobu, 'F' ) ) n1 = m + end if + ! set numerical parameters + ! ! note: make sure stdlib_qlamch() does not fail on the target architecture. + epsln = stdlib_qlamch('EPSILON') + sfmin = stdlib_qlamch('SAFEMINIMUM') + small = sfmin / epsln + big = stdlib_qlamch('O') + ! big = one / sfmin + ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n + ! (!) if necessary, scale sva() to protect the largest norm from + ! overflow. it is possible that this scaling pushes the smallest + ! column norm left from the underflow threshold (extreme case). + scalem = one / sqrt(real(m,KIND=qp)*real(n,KIND=qp)) + noscal = .true. + goscal = .true. + do p = 1, n + aapp = zero + aaqq = one + call stdlib_wlassq( m, a(1,p), 1, aapp, aaqq ) + if ( aapp > big ) then + info = - 9 + call stdlib_xerbla( 'ZGEJSV', -info ) + return + end if + aaqq = sqrt(aaqq) + if ( ( aapp < (big / aaqq) ) .and. noscal ) then + sva(p) = aapp * aaqq + else + noscal = .false. + sva(p) = aapp * ( aaqq * scalem ) + if ( goscal ) then + goscal = .false. + call stdlib_qscal( p-1, scalem, sva, 1 ) + end if + end if + end do + if ( noscal ) scalem = one + aapp = zero + aaqq = big + do p = 1, n + aapp = max( aapp, sva(p) ) + if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) + end do + ! quick return for zero m x n matrix + ! #:) + if ( aapp == zero ) then + if ( lsvec ) call stdlib_wlaset( 'G', m, n1, czero, cone, u, ldu ) + if ( rsvec ) call stdlib_wlaset( 'G', n, n, czero, cone, v, ldv ) + rwork(1) = one + rwork(2) = one + if ( errest ) rwork(3) = one + if ( lsvec .and. rsvec ) then + rwork(4) = one + rwork(5) = one + end if + if ( l2tran ) then + rwork(6) = zero + rwork(7) = zero + end if + iwork(1) = 0 + iwork(2) = 0 + iwork(3) = 0 + iwork(4) = -1 + return + end if + ! issue warning if denormalized column norms detected. override the + ! high relative accuracy request. issue licence to kill nonzero columns + ! (set them to zero) whose norm is less than sigma_max / big (roughly). + ! #:( + warning = 0 + if ( aaqq <= sfmin ) then + l2rank = .true. + l2kill = .true. + warning = 1 + end if + ! quick return for one-column matrix + ! #:) + if ( n == 1 ) then + if ( lsvec ) then + call stdlib_wlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr ) + call stdlib_wlacpy( 'A', m, 1, a, lda, u, ldu ) + ! computing all m left singular vectors of the m x 1 matrix + if ( n1 /= n ) then + call stdlib_wgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr ) + call stdlib_wungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr ) + call stdlib_wcopy( m, a(1,1), 1, u(1,1), 1 ) + end if + end if + if ( rsvec ) then + v(1,1) = cone + end if + if ( sva(1) < (big*scalem) ) then + sva(1) = sva(1) / scalem + scalem = one + end if + rwork(1) = one / scalem + rwork(2) = one + if ( sva(1) /= zero ) then + iwork(1) = 1 + if ( ( sva(1) / scalem) >= sfmin ) then + iwork(2) = 1 + else + iwork(2) = 0 + end if + else + iwork(1) = 0 + iwork(2) = 0 + end if + iwork(3) = 0 + iwork(4) = -1 + if ( errest ) rwork(3) = one + if ( lsvec .and. rsvec ) then + rwork(4) = one + rwork(5) = one + end if + if ( l2tran ) then + rwork(6) = zero + rwork(7) = zero + end if + return + end if + transp = .false. + aatmax = -one + aatmin = big + if ( rowpiv .or. l2tran ) then + ! compute the row norms, needed to determine row pivoting sequence + ! (in the case of heavily row weighted a, row pivoting is strongly + ! advised) and to collect information needed to compare the + ! structures of a * a^* and a^* * a (in the case l2tran==.true.). + if ( l2tran ) then + do p = 1, m + xsc = zero + temp1 = one + call stdlib_wlassq( n, a(p,1), lda, xsc, temp1 ) + ! stdlib_wlassq gets both the ell_2 and the ell_infinity norm + ! in one pass through the vector + rwork(m+p) = xsc * scalem + rwork(p) = xsc * (scalem*sqrt(temp1)) + aatmax = max( aatmax, rwork(p) ) + if (rwork(p) /= zero)aatmin = min(aatmin,rwork(p)) + end do + else + do p = 1, m + rwork(m+p) = scalem*abs( a(p,stdlib_iwamax(n,a(p,1),lda)) ) + aatmax = max( aatmax, rwork(m+p) ) + aatmin = min( aatmin, rwork(m+p) ) + end do + end if + end if + ! for square matrix a try to determine whether a^* would be better + ! input for the preconditioned jacobi svd, with faster convergence. + ! the decision is based on an o(n) function of the vector of column + ! and row norms of a, based on the shannon entropy. this should give + ! the right choice in most cases when the difference actually matters. + ! it may fail and pick the slower converging side. + entra = zero + entrat = zero + if ( l2tran ) then + xsc = zero + temp1 = one + call stdlib_qlassq( n, sva, 1, xsc, temp1 ) + temp1 = one / temp1 + entra = zero + do p = 1, n + big1 = ( ( sva(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entra = entra + big1 * log(big1) + end do + entra = - entra / log(real(n,KIND=qp)) + ! now, sva().^2/trace(a^* * a) is a point in the probability simplex. + ! it is derived from the diagonal of a^* * a. do the same with the + ! diagonal of a * a^*, compute the entropy of the corresponding + ! probability distribution. note that a * a^* and a^* * a have the + ! same trace. + entrat = zero + do p = 1, m + big1 = ( ( rwork(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entrat = entrat + big1 * log(big1) + end do + entrat = - entrat / log(real(m,KIND=qp)) + ! analyze the entropies and decide a or a^*. smaller entropy + ! usually means better input for the algorithm. + transp = ( entrat < entra ) + ! if a^* is better than a, take the adjoint of a. this is allowed + ! only for square matrices, m=n. + if ( transp ) then + ! in an optimal implementation, this trivial transpose + ! should be replaced with faster transpose. + do p = 1, n - 1 + a(p,p) = conjg(a(p,p)) + do q = p + 1, n + ctemp = conjg(a(q,p)) + a(q,p) = conjg(a(p,q)) + a(p,q) = ctemp + end do + end do + a(n,n) = conjg(a(n,n)) + do p = 1, n + rwork(m+p) = sva(p) + sva(p) = rwork(p) + ! previously computed row 2-norms are now column 2-norms + ! of the transposed matrix + end do + temp1 = aapp + aapp = aatmax + aatmax = temp1 + temp1 = aaqq + aaqq = aatmin + aatmin = temp1 + kill = lsvec + lsvec = rsvec + rsvec = kill + if ( lsvec ) n1 = n + rowpiv = .true. + end if + end if + ! end if l2tran + ! scale the matrix so that its maximal singular value remains less + ! than sqrt(big) -- the matrix is scaled so that its maximal column + ! has euclidean norm equal to sqrt(big/n). the only reason to keep + ! sqrt(big) instead of big is the fact that stdlib_wgejsv uses lapack and + ! blas routines that, in some implementations, are not capable of + ! working in the full interval [sfmin,big] and that they may provoke + ! overflows in the intermediate results. if the singular values spread + ! from sfmin to big, then stdlib_wgesvj will compute them. so, in that case, + ! one should use stdlib_wgesvj instead of stdlib_wgejsv. + ! >> change in the april 2016 update: allow bigger range, i.e. the + ! largest column is allowed up to big/n and stdlib_wgesvj will do the rest. + big1 = sqrt( big ) + temp1 = sqrt( big / real(n,KIND=qp) ) + ! temp1 = big/real(n,KIND=qp) + call stdlib_qlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr ) + if ( aaqq > (aapp * sfmin) ) then + aaqq = ( aaqq / aapp ) * temp1 + else + aaqq = ( aaqq * temp1 ) / aapp + end if + temp1 = temp1 * scalem + call stdlib_wlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr ) + ! to undo scaling at the end of this procedure, multiply the + ! computed singular values with uscal2 / uscal1. + uscal1 = temp1 + uscal2 = aapp + if ( l2kill ) then + ! l2kill enforces computation of nonzero singular values in + ! the restricted range of condition number of the initial a, + ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). + xsc = sqrt( sfmin ) + else + xsc = small + ! now, if the condition number of a is too big, + ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, + ! as a precaution measure, the full svd is computed using stdlib_wgesvj + ! with accumulated jacobi rotations. this provides numerically + ! more robust computation, at the cost of slightly increased run + ! time. depending on the concrete implementation of blas and lapack + ! (i.e. how they behave in presence of extreme ill-conditioning) the + ! implementor may decide to remove this switch. + if ( ( aaqq= (temp1*abs(a(1,1))) ) then + nr = nr + 1 + else + go to 3002 + end if + end do + 3002 continue + else if ( l2rank ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r1 is used as the criterion for + ! close-to-rank-deficient. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & + l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! the goal is high relative accuracy. however, if the matrix + ! has high scaled condition number the relative accuracy is in + ! general not feasible. later on, a condition number estimator + ! will be deployed to estimate the scaled condition number. + ! here we just remove the underflowed part of the triangular + ! factor. this prevents the situation in which the code is + ! working hard to get the accuracy not warranted by the data. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to & + 3302 + nr = nr + 1 + end do + 3302 continue + end if + almort = .false. + if ( nr == n ) then + maxprj = one + do p = 2, n + temp1 = abs(a(p,p)) / sva(iwork(p)) + maxprj = min( maxprj, temp1 ) + end do + if ( maxprj**2 >= one - real(n,KIND=qp)*epsln ) almort = .true. + end if + sconda = - one + condr1 = - one + condr2 = - one + if ( errest ) then + if ( n == nr ) then + if ( rsvec ) then + ! V Is Available As Workspace + call stdlib_wlacpy( 'U', n, n, a, lda, v, ldv ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_wdscal( p, one/temp1, v(1,p), 1 ) + end do + if ( lsvec )then + call stdlib_wpocon( 'U', n, v, ldv, one, temp1,cwork(n+1), rwork, ierr ) + + else + call stdlib_wpocon( 'U', n, v, ldv, one, temp1,cwork, rwork, ierr ) + + end if + else if ( lsvec ) then + ! U Is Available As Workspace + call stdlib_wlacpy( 'U', n, n, a, lda, u, ldu ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_wdscal( p, one/temp1, u(1,p), 1 ) + end do + call stdlib_wpocon( 'U', n, u, ldu, one, temp1,cwork(n+1), rwork, ierr ) + + else + call stdlib_wlacpy( 'U', n, n, a, lda, cwork, n ) + ! [] call stdlib_wlacpy( 'u', n, n, a, lda, cwork(n+1), n ) + ! change: here index shifted by n to the left, cwork(1:n) + ! not needed for sigma only computation + do p = 1, n + temp1 = sva(iwork(p)) + ! [] call stdlib_wdscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 ) + call stdlib_wdscal( p, one/temp1, cwork((p-1)*n+1), 1 ) + end do + ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths + ! [] call stdlib_wpocon( 'u', n, cwork(n+1), n, one, temp1, + ! [] $ cwork(n+n*n+1), rwork, ierr ) + call stdlib_wpocon( 'U', n, cwork, n, one, temp1,cwork(n*n+1), rwork, ierr ) + + end if + if ( temp1 /= zero ) then + sconda = one / sqrt(temp1) + else + sconda = - one + end if + ! sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1). + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + else + sconda = - one + end if + end if + l2pert = l2pert .and. ( abs( a(1,1)/a(nr,nr) ) > sqrt(big1) ) + ! if there is no violent scaling, artificial perturbation is not needed. + ! phase 3: + if ( .not. ( rsvec .or. lsvec ) ) then + ! singular values only + ! .. transpose a(1:nr,1:n) + do p = 1, min( n-1, nr ) + call stdlib_wcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 ) + call stdlib_wlacgv( n-p+1, a(p,p), 1 ) + end do + if ( nr == n ) a(n,n) = conjg(a(n,n)) + ! the following two do-loops introduce small relative perturbation + ! into the strict upper triangle of the lower triangular matrix. + ! small entries below the main diagonal are also changed. + ! this modification is useful if the computing environment does not + ! provide/allow flush to zero underflow, for it prevents many + ! annoying denormalized numbers in case of strongly scaled matrices. + ! the perturbation is structured so that it does not introduce any + ! new perturbation of the singular values, and it does not destroy + ! the job done by the preconditioner. + ! the licence for this perturbation is in the variable l2pert, which + ! should be .false. if flush to zero underflow is active. + if ( .not. almort ) then + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=qp) + do q = 1, nr + ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=qp) + do p = 1, n + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & + ctemp + ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) + end do + end do + else + call stdlib_wlaset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda ) + end if + ! Second Preconditioning Using The Qr Factorization + call stdlib_wgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr ) + ! And Transpose Upper To Lower Triangular + do p = 1, nr - 1 + call stdlib_wcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 ) + call stdlib_wlacgv( nr-p+1, a(p,p), 1 ) + end do + end if + ! row-cyclic jacobi svd algorithm with column pivoting + ! .. again some perturbation (a "background noise") is added + ! to drown denormals + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=qp) + do q = 1, nr + ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=qp) + do p = 1, nr + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & + ctemp + ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) + end do + end do + else + call stdlib_wlaset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda ) + end if + ! .. and one-sided jacobi rotations are started on a lower + ! triangular matrix (plus perturbation which is ignored in + ! the part which destroys triangular form (confusing?!)) + call stdlib_wgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,n, v, ldv, cwork, lwork, & + rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + else if ( ( rsvec .and. ( .not. lsvec ) .and. ( .not. jracc ) ).or.( jracc .and. ( & + .not. lsvec ) .and. ( nr /= n ) ) ) then + ! -> singular values and right singular vectors <- + if ( almort ) then + ! In This Case Nr Equals N + do p = 1, nr + call stdlib_wcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_wlacgv( n-p+1, v(p,p), 1 ) + end do + call stdlib_wlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv ) + call stdlib_wgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,cwork, lwork, & + rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + else + ! .. two more qr factorizations ( one qrf is not enough, two require + ! accumulated product of jacobi rotations, three are perfect ) + call stdlib_wlaset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda ) + call stdlib_wgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr) + call stdlib_wlacpy( 'L', nr, nr, a, lda, v, ldv ) + call stdlib_wlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv ) + call stdlib_wgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr + call stdlib_wcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 ) + call stdlib_wlacgv( nr-p+1, v(p,p), 1 ) + end do + call stdlib_wlaset('U', nr-1, nr-1, czero, czero, v(1,2), ldv) + call stdlib_wgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, cwork(n+1), & + lwork-n, rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_wlaset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv ) + call stdlib_wlaset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv ) + call stdlib_wlaset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv ) + end if + call stdlib_wunmlq( 'L', 'C', n, n, nr, a, lda, cwork,v, ldv, cwork(n+1), lwork-n, & + ierr ) + end if + ! Permute The Rows Of V + ! do 8991 p = 1, n + ! call stdlib_wcopy( n, v(p,1), ldv, a(iwork(p),1), lda ) + 8991 continue + ! call stdlib_wlacpy( 'all', n, n, a, lda, v, ldv ) + call stdlib_wlapmr( .false., n, n, v, ldv, iwork ) + if ( transp ) then + call stdlib_wlacpy( 'A', n, n, v, ldv, u, ldu ) + end if + else if ( jracc .and. (.not. lsvec) .and. ( nr== n ) ) then + call stdlib_wlaset( 'L', n-1,n-1, czero, czero, a(2,1), lda ) + call stdlib_wgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,cwork, lwork, rwork, & + lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + call stdlib_wlapmr( .false., n, n, v, ldv, iwork ) + else if ( lsvec .and. ( .not. rsvec ) ) then + ! Singular Values And Left Singular Vectors + ! Second Preconditioning Step To Avoid Need To Accumulate + ! jacobi rotations in the jacobi iterations. + do p = 1, nr + call stdlib_wcopy( n-p+1, a(p,p), lda, u(p,p), 1 ) + call stdlib_wlacgv( n-p+1, u(p,p), 1 ) + end do + call stdlib_wlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + call stdlib_wgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr - 1 + call stdlib_wcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 ) + call stdlib_wlacgv( n-p+1, u(p,p), 1 ) + end do + call stdlib_wlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + call stdlib_wgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, cwork(n+1), lwork-& + n, rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < m ) then + call stdlib_wlaset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_wlaset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu ) + call stdlib_wlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu ) + end if + end if + call stdlib_wunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & + ierr ) + if ( rowpiv )call stdlib_wlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + do p = 1, n1 + xsc = one / stdlib_qznrm2( m, u(1,p), 1 ) + call stdlib_wdscal( m, xsc, u(1,p), 1 ) + end do + if ( transp ) then + call stdlib_wlacpy( 'A', n, n, u, ldu, v, ldv ) + end if + else + ! Full Svd + if ( .not. jracc ) then + if ( .not. almort ) then + ! second preconditioning step (qrf [with pivoting]) + ! note that the composition of transpose, qrf and transpose is + ! equivalent to an lqf call. since in many libraries the qrf + ! seems to be better optimized than the lqf, we do explicit + ! transpose and use the qrf. this is subject to changes in an + ! optimized implementation of stdlib_wgejsv. + do p = 1, nr + call stdlib_wcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_wlacgv( n-p+1, v(p,p), 1 ) + end do + ! The Following Two Loops Perturb Small Entries To Avoid + ! denormals in the second qr factorization, where they are + ! as good as zeros. this is done to avoid painfully slow + ! computation with denormals. the relative size of the perturbation + ! is a parameter that can be changed by the implementer. + ! this perturbation device will be obsolete on machines with + ! properly implemented arithmetic. + ! to switch it off, set l2pert=.false. to remove it from the + ! code, remove the action under l2pert=.true., leave the else part. + ! the following two loops should be blocked and fused with the + ! transposed copy above. + if ( l2pert ) then + xsc = sqrt(small) + do q = 1, nr + ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=qp) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + ctemp + ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_wlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv ) + end if + ! estimate the row scaled condition number of r1 + ! (if r1 is rectangular, n > nr, then the condition number + ! of the leading nr x nr submatrix is estimated.) + call stdlib_wlacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr ) + do p = 1, nr + temp1 = stdlib_qznrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1) + call stdlib_wdscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1) + end do + call stdlib_wpocon('L',nr,cwork(2*n+1),nr,one,temp1,cwork(2*n+nr*nr+1),rwork,& + ierr) + condr1 = one / sqrt(temp1) + ! Here Need A Second Opinion On The Condition Number + ! Then Assume Worst Case Scenario + ! r1 is ok for inverse <=> condr1 < real(n,KIND=qp) + ! more conservative <=> condr1 < sqrt(real(n,KIND=qp)) + cond_ok = sqrt(sqrt(real(nr,KIND=qp))) + ! [tp] cond_ok is a tuning parameter. + if ( condr1 < cond_ok ) then + ! .. the second qrf without pivoting. note: in an optimized + ! implementation, this qrf should be implemented as the qrf + ! of a lower triangular matrix. + ! r1^* = q2 * r2 + call stdlib_wgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + if ( l2pert ) then + xsc = sqrt(small)/epsln + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=qp) + if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp + ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) + end do + end do + end if + if ( nr /= n )call stdlib_wlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n ) + + ! .. save ... + ! This Transposed Copy Should Be Better Than Naive + do p = 1, nr - 1 + call stdlib_wcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 ) + call stdlib_wlacgv(nr-p+1, v(p,p), 1 ) + end do + v(nr,nr)=conjg(v(nr,nr)) + condr2 = condr1 + else + ! .. ill-conditioned case: second qrf with pivoting + ! note that windowed pivoting would be equally good + ! numerically, and more run-time efficient. so, in + ! an optimal implementation, the next call to stdlib_wgeqp3 + ! should be replaced with eg. call zgeqpx (acm toms #782) + ! with properly (carefully) chosen parameters. + ! r1^* * p2 = q2 * r2 + do p = 1, nr + iwork(n+p) = 0 + end do + call stdlib_wgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),cwork(2*n+1), lwork-& + 2*n, rwork, ierr ) + ! * call stdlib_wgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1), + ! * $ lwork-2*n, ierr ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=qp) + if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp + ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) + end do + end do + end if + call stdlib_wlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=qp) + ! v(p,q) = - temp1*( v(q,p) / abs(v(q,p)) ) + v(p,q) = - ctemp + end do + end do + else + call stdlib_wlaset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv ) + end if + ! now, compute r2 = l3 * q3, the lq factorization. + call stdlib_wgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),cwork(2*n+n*nr+nr+1), & + lwork-2*n-n*nr-nr, ierr ) + ! And Estimate The Condition Number + call stdlib_wlacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr ) + do p = 1, nr + temp1 = stdlib_qznrm2( p, cwork(2*n+n*nr+nr+p), nr ) + call stdlib_wdscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr ) + end do + call stdlib_wpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,cwork(2*n+n*nr+& + nr+nr*nr+1),rwork,ierr ) + condr2 = one / sqrt(temp1) + if ( condr2 >= cond_ok ) then + ! Save The Householder Vectors Used For Q3 + ! (this overwrites the copy of r2, as it will not be + ! needed in this branch, but it does not overwritte the + ! huseholder vectors of q2.). + call stdlib_wlacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n ) + ! And The Rest Of The Information On Q3 Is In + ! work(2*n+n*nr+1:2*n+n*nr+n) + end if + end if + if ( l2pert ) then + xsc = sqrt(small) + do q = 2, nr + ctemp = xsc * v(q,q) + do p = 1, q - 1 + ! v(p,q) = - temp1*( v(p,q) / abs(v(p,q)) ) + v(p,q) = - ctemp + end do + end do + else + call stdlib_wlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + end if + ! second preconditioning finished; continue with jacobi svd + ! the input matrix is lower trinagular. + ! recover the right singular vectors as solution of a well + ! conditioned triangular matrix equation. + if ( condr1 < cond_ok ) then + call stdlib_wgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,cwork(2*n+n*nr+nr+1)& + ,lwork-2*n-n*nr-nr,rwork,lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, nr + call stdlib_wcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_wdscal( nr, sva(p), v(1,p), 1 ) + end do + ! Pick The Right Matrix Equation And Solve It + if ( nr == n ) then + ! :)) .. best case, r1 is inverted. the solution of this matrix + ! equation is q2*v2 = the product of the jacobi rotations + ! used in stdlib_wgesvj, premultiplied with the orthogonal matrix + ! from the second qr factorization. + call stdlib_wtrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv) + else + ! .. r1 is well conditioned, but non-square. adjoint of r2 + ! is inverted to get the product of the jacobi rotations + ! used in stdlib_wgesvj. the q-factor from the second qr + ! factorization is then built in explicitly. + call stdlib_wtrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),n,v,ldv) + if ( nr < n ) then + call stdlib_wlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_wlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_wlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_wunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(& + 2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) + end if + else if ( condr2 < cond_ok ) then + ! the matrix r2 is inverted. the solution of the matrix equation + ! is q3^* * v3 = the product of the jacobi rotations (appplied to + ! the lower triangular l3 from the lq factorization of + ! r2=l3*q3), pre-multiplied with the transposed q3. + call stdlib_wgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, nr + call stdlib_wcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_wdscal( nr, sva(p), u(1,p), 1 ) + end do + call stdlib_wtrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,u,ldu) + ! Apply The Permutation From The Second Qr Factorization + do q = 1, nr + do p = 1, nr + cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = cwork(2*n+n*nr+nr+p) + end do + end do + if ( nr < n ) then + call stdlib_wlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_wlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_wlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_wunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + else + ! last line of defense. + ! #:( this is a rather pathological case: no scaled condition + ! improvement after two pivoted qr factorizations. other + ! possibility is that the rank revealing qr factorization + ! or the condition estimator has failed, or the cond_ok + ! is set very close to one (which is unnecessary). normally, + ! this branch should never be executed, but in rare cases of + ! failure of the rrqr or condition estimator, the last line of + ! defense ensures that stdlib_wgejsv completes the task. + ! compute the full svd of l3 using stdlib_wgesvj with explicit + ! accumulation of jacobi rotations. + call stdlib_wgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_wlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_wlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_wlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_wunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + call stdlib_wunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,cwork(2*n+n*nr+1), & + u, ldu, cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) + do q = 1, nr + do p = 1, nr + cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = cwork(2*n+n*nr+nr+p) + end do + end do + end if + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=qp)) * epsln + do q = 1, n + do p = 1, n + cwork(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = cwork(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_qznrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_wdscal( n, xsc,& + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_wlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_wlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_wlaset('A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! matrix u. this applies to all cases. + call stdlib_wunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& + n, ierr ) + ! the columns of u are normalized. the cost is o(m*n) flops. + temp1 = sqrt(real(m,KIND=qp)) * epsln + do p = 1, nr + xsc = one / stdlib_qznrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_wdscal( m, xsc,& + u(1,p), 1 ) + end do + ! if the initial qrf is computed with row pivoting, the left + ! singular vectors must be adjusted. + if ( rowpiv )call stdlib_wlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + else + ! The Initial Matrix A Has Almost Orthogonal Columns And + ! the second qrf is not needed + call stdlib_wlacpy( 'U', n, n, a, lda, cwork(n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, n + ctemp = xsc * cwork( n + (p-1)*n + p ) + do q = 1, p - 1 + ! cwork(n+(q-1)*n+p)=-temp1 * ( cwork(n+(p-1)*n+q) / + ! $ abs(cwork(n+(p-1)*n+q)) ) + cwork(n+(q-1)*n+p)=-ctemp + end do + end do + else + call stdlib_wlaset( 'L',n-1,n-1,czero,czero,cwork(n+2),n ) + end if + call stdlib_wgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,n, u, ldu, cwork(n+& + n*n+1), lwork-n-n*n, rwork, lrwork,info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, n + call stdlib_wcopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 ) + call stdlib_wdscal( n, sva(p), cwork(n+(p-1)*n+1), 1 ) + end do + call stdlib_wtrsm( 'L', 'U', 'N', 'N', n, n,cone, a, lda, cwork(n+1), n ) + do p = 1, n + call stdlib_wcopy( n, cwork(n+p), n, v(iwork(p),1), ldv ) + end do + temp1 = sqrt(real(n,KIND=qp))*epsln + do p = 1, n + xsc = one / stdlib_qznrm2( n, v(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_wdscal( n, xsc,& + v(1,p), 1 ) + end do + ! assemble the left singular vector matrix u (m x n). + if ( n < m ) then + call stdlib_wlaset( 'A', m-n, n, czero, czero, u(n+1,1), ldu ) + if ( n < n1 ) then + call stdlib_wlaset('A',n, n1-n, czero, czero, u(1,n+1),ldu) + call stdlib_wlaset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu) + end if + end if + call stdlib_wunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& + n, ierr ) + temp1 = sqrt(real(m,KIND=qp))*epsln + do p = 1, n1 + xsc = one / stdlib_qznrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_wdscal( m, xsc,& + u(1,p), 1 ) + end do + if ( rowpiv )call stdlib_wlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + end if + ! end of the >> almost orthogonal case << in the full svd + else + ! this branch deploys a preconditioned jacobi svd with explicitly + ! accumulated rotations. it is included as optional, mainly for + ! experimental purposes. it does perform well, and can also be used. + ! in this implementation, this branch will be automatically activated + ! if the condition number sigma_max(a) / sigma_min(a) is predicted + ! to be greater than the overflow threshold. this is because the + ! a posteriori computation of the singular vectors assumes robust + ! implementation of blas and some lapack procedures, capable of working + ! in presence of extreme values, e.g. when the singular values spread from + ! the underflow to the overflow threshold. + do p = 1, nr + call stdlib_wcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_wlacgv( n-p+1, v(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 1, nr + ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=qp) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + ctemp + ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_wlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv ) + end if + call stdlib_wgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + call stdlib_wlacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n ) + do p = 1, nr + call stdlib_wcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 ) + call stdlib_wlacgv( nr-p+1, u(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 2, nr + do p = 1, q - 1 + ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),zero,KIND=qp) + ! u(p,q) = - temp1 * ( u(q,p) / abs(u(q,p)) ) + u(p,q) = - ctemp + end do + end do + else + call stdlib_wlaset('U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + end if + call stdlib_wgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, cwork(2*n+n*nr+1),& + lwork-2*n-n*nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_wlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_wlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_wlaset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv ) + end if + call stdlib_wunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+n*nr+& + nr+1),lwork-2*n-n*nr-nr,ierr ) + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=qp)) * epsln + do q = 1, n + do p = 1, n + cwork(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = cwork(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_qznrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_wdscal( n, xsc,& + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_wlaset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_wlaset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu) + call stdlib_wlaset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu) + end if + end if + call stdlib_wunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & + ierr ) + if ( rowpiv )call stdlib_wlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + end if + if ( transp ) then + ! .. swap u and v because the procedure worked on a^* + do p = 1, n + call stdlib_wswap( n, u(1,p), 1, v(1,p), 1 ) + end do + end if + end if + ! end of the full svd + ! undo scaling, if necessary (and possible) + if ( uscal2 <= (big/sva(1))*uscal1 ) then + call stdlib_qlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr ) + uscal1 = one + uscal2 = one + end if + if ( nr < n ) then + do p = nr+1, n + sva(p) = zero + end do + end if + rwork(1) = uscal2 * scalem + rwork(2) = uscal1 + if ( errest ) rwork(3) = sconda + if ( lsvec .and. rsvec ) then + rwork(4) = condr1 + rwork(5) = condr2 + end if + if ( l2tran ) then + rwork(6) = entra + rwork(7) = entrat + end if + iwork(1) = nr + iwork(2) = numrank + iwork(3) = warning + if ( transp ) then + iwork(4) = 1 + else + iwork(4) = -1 + end if + return + end subroutine stdlib_wgejsv + + !> ZGELQ: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_wgelq( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks, lwmin, lwopt, lwreq + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'ZGELQ ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'ZGELQ ', ' ', m, n, 2, -1 ) + else + mb = 1 + nb = n + end if + if( mb>min( m, n ) .or. mb<1 ) mb = 1 + if( nb>n .or. nb<=m ) nb = n + mintsz = m + 5 + if ( nb>m .and. n>m ) then + if( mod( n - m, nb - m )==0 ) then + nblcks = ( n - m ) / ( nb - m ) + else + nblcks = ( n - m ) / ( nb - m ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + if( ( n<=m ) .or. ( nb<=m ) .or. ( nb>=n ) ) then + lwmin = max( 1, n ) + lwopt = max( 1, mb*n ) + else + lwmin = max( 1, m ) + lwopt = max( 1, mb*m ) + end if + lminws = .false. + if( ( tsize=lwmin ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=n ) ) then + lwreq = max( 1, mb*n ) + else + lwreq = max( 1, mb*m ) + end if + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) ) then + call stdlib_wgelqt( m, n, mb, a, lda, t( 6 ), mb, work, info ) + else + call stdlib_wlaswlq( m, n, mb, nb, a, lda, t( 6 ), mb, work,lwork, info ) + end if + work( 1 ) = lwreq + return + end subroutine stdlib_wgelq + + !> ZGELQ2: computes an LQ factorization of a complex m-by-n matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a n-by-n orthogonal matrix; + !> L is a lower-triangular m-by-m matrix; + !> 0 is a m-by-(n-m) zero matrix, if m < n. + + pure subroutine stdlib_wgelq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGELQF: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_wgelqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 ) + lwkopt = m*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb ZGELQT: computes a blocked LQ factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_wgelqt( m, n, mb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, mb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, iinfo, k + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( mb<1 .or. (mb>min(m,n) .and. min(m,n)>0 ))then + info = -3 + else if( lda ZGELQT3: recursively computes a LQ factorization of a complex M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_wgelqt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, m1, m2, iinfo + ! Executable Statements + info = 0 + if( m < 0 ) then + info = -1 + else if( n < m ) then + info = -2 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, m ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZGELQT3', -info ) + return + end if + if( m==1 ) then + ! compute householder transform when m=1 + call stdlib_wlarfg( n, a(1,1), a( 1, min( 2, n ) ), lda, t(1,1) ) + t(1,1)=conjg(t(1,1)) + else + ! otherwise, split a into blocks... + m1 = m/2 + m2 = m-m1 + i1 = min( m1+1, m ) + j1 = min( m+1, n ) + ! compute a(1:m1,1:n) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_wgelqt3( m1, n, a, lda, t, ldt, iinfo ) + ! compute a(j1:m,1:n) = a(j1:m,1:n) q1^h [workspace: t(1:n1,j1:n)] + do i=1,m2 + do j=1,m1 + t( i+m1, j ) = a( i+m1, j ) + end do + end do + call stdlib_wtrmm( 'R', 'U', 'C', 'U', m2, m1, cone,a, lda, t( i1, 1 ), ldt ) + + call stdlib_wgemm( 'N', 'C', m2, m1, n-m1, cone, a( i1, i1 ), lda,a( 1, i1 ), lda, & + cone, t( i1, 1 ), ldt) + call stdlib_wtrmm( 'R', 'U', 'N', 'N', m2, m1, cone,t, ldt, t( i1, 1 ), ldt ) + + call stdlib_wgemm( 'N', 'N', m2, n-m1, m1, -cone, t( i1, 1 ), ldt,a( 1, i1 ), lda, & + cone, a( i1, i1 ), lda ) + call stdlib_wtrmm( 'R', 'U', 'N', 'U', m2, m1 , cone,a, lda, t( i1, 1 ), ldt ) + + do i=1,m2 + do j=1,m1 + a( i+m1, j ) = a( i+m1, j ) - t( i+m1, j ) + t( i+m1, j )= czero + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_wgelqt3( m2, n-m1, a( i1, i1 ), lda,t( i1, i1 ), ldt, iinfo ) + ! compute t3 = t(j1:n1,1:n) = -t1 y1^h y2 t2 + do i=1,m2 + do j=1,m1 + t( j, i+m1 ) = (a( j, i+m1 )) + end do + end do + call stdlib_wtrmm( 'R', 'U', 'C', 'U', m1, m2, cone,a( i1, i1 ), lda, t( 1, i1 ), & + ldt ) + call stdlib_wgemm( 'N', 'C', m1, m2, n-m, cone, a( 1, j1 ), lda,a( i1, j1 ), lda, & + cone, t( 1, i1 ), ldt ) + call stdlib_wtrmm( 'L', 'U', 'N', 'N', m1, m2, -cone, t, ldt,t( 1, i1 ), ldt ) + + call stdlib_wtrmm( 'R', 'U', 'N', 'N', m1, m2, cone,t( i1, i1 ), ldt, t( 1, i1 ), & + ldt ) + ! y = (y1,y2); l = [ l1 0 ]; t = [t1 t3] + ! [ a(1:n1,j1:n) l2 ] [ 0 t2] + end if + return + end subroutine stdlib_wgelqt3 + + !> ZGELS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, or its conjugate-transpose, using a QR + !> or LQ factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an underdetermined system A**H * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**H * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_wgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, tpsd + integer(ilp) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize + real(qp) :: anrm, bignum, bnrm, smlnum + ! Local Arrays + real(qp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input arguments. + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + nb = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMQR', 'LN', m, nrhs, n,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMQR', 'LC', m, nrhs, n,-1 ) ) + end if + else + nb = stdlib_ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMLQ', 'LC', n, nrhs, m,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMLQ', 'LN', n, nrhs, m,-1 ) ) + end if + end if + wsize = max( 1, mn+max( mn, nrhs )*nb ) + work( 1 ) = real( wsize,KIND=qp) + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZGELS ', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( min( m, n, nrhs )==0 ) then + call stdlib_wlaset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb ) + return + end if + ! get machine parameters + smlnum = stdlib_qlamch( 'S' ) / stdlib_qlamch( 'P' ) + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! scale a, b if max element outside range [smlnum,bignum] + anrm = stdlib_wlange( 'M', m, n, a, lda, rwork ) + iascl = 0 + if( anrm>zero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_wlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + go to 50 + end if + brow = m + if( tpsd )brow = n + bnrm = stdlib_wlange( 'M', brow, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if( m>=n ) then + ! compute qr factorization of a + call stdlib_wgeqrf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least n, optimally n*nb + if( .not.tpsd ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) + call stdlib_wunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1 ), & + b, ldb, work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_wtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = n + else + ! underdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs) + call stdlib_wtrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,& + ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = czero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_wunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_wgelqf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least m, optimally m*nb. + if( .not.tpsd ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_wtrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = czero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs) + call stdlib_wunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1 ), & + b, ldb, work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**h * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_wunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs) + call stdlib_wtrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, & + b, ldb, info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_wlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_wlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( wsize,KIND=qp) + return + end subroutine stdlib_wgels + + !> ZGELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_wgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(qp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), s(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, & + maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz + real(qp) :: anrm, bignum, bnrm, eps, sfmin, smlnum + ! Intrinsic Functions + intrinsic :: int,log,max,min,real + ! Executable Statements + ! test the input arguments. + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + smlsiz = stdlib_ilaenv( 9, 'ZGELSD', ' ', 0, 0, 0, 0 ) + mnthr = stdlib_ilaenv( 6, 'ZGELSD', ' ', m, n, nrhs, -1 ) + nlvl = max( int( log( real( minmn,KIND=qp) / real( smlsiz + 1,KIND=qp) ) /log( & + two ),KIND=ilp) + 1, 0 ) + liwork = 3*minmn*nlvl + 11*minmn + mm = m + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns. + mm = n + maxwrk = max( maxwrk, n*stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n,-1, -1 ) ) + + maxwrk = max( maxwrk, nrhs*stdlib_ilaenv( 1, 'ZUNMQR', 'LC', m,nrhs, n, -1 ) ) + + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined. + lrwork = 10*n + 2*n*smlsiz + 8*n*nlvl + 3*smlsiz*nrhs +max( (smlsiz+1)**2, n*(& + 1+nrhs) + 2*nrhs ) + maxwrk = max( maxwrk, 2*n + ( mm + n )*stdlib_ilaenv( 1,'ZGEBRD', ' ', mm, n, & + -1, -1 ) ) + maxwrk = max( maxwrk, 2*n + nrhs*stdlib_ilaenv( 1, 'ZUNMBR','QLC', mm, nrhs, & + n, -1 ) ) + maxwrk = max( maxwrk, 2*n + ( n - 1 )*stdlib_ilaenv( 1,'ZUNMBR', 'PLN', n, & + nrhs, n, -1 ) ) + maxwrk = max( maxwrk, 2*n + n*nrhs ) + minwrk = max( 2*n + mm, 2*n + n*nrhs ) + end if + if( n>m ) then + lrwork = 10*m + 2*m*smlsiz + 8*m*nlvl + 3*smlsiz*nrhs +max( (smlsiz+1)**2, n*(& + 1+nrhs) + 2*nrhs ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows. + maxwrk = m + m*stdlib_ilaenv( 1, 'ZGELQF', ' ', m, n, -1,-1 ) + maxwrk = max( maxwrk, m*m + 4*m + 2*m*stdlib_ilaenv( 1,'ZGEBRD', ' ', m, m,& + -1, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + nrhs*stdlib_ilaenv( 1,'ZUNMBR', 'QLC', m,& + nrhs, m, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + ( m - 1 )*stdlib_ilaenv( 1,'ZUNMLQ', & + 'LC', n, nrhs, m, -1 ) ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m*m + 4*m + m*nrhs ) + ! xxx: ensure the path 2a case below is triggered. the workspace + ! calculation should use queries for all routines eventually. + maxwrk = max( maxwrk,4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) + else + ! path 2 - underdetermined. + maxwrk = 2*m + ( n + m )*stdlib_ilaenv( 1, 'ZGEBRD', ' ', m,n, -1, -1 ) + + maxwrk = max( maxwrk, 2*m + nrhs*stdlib_ilaenv( 1, 'ZUNMBR','QLC', m, nrhs,& + m, -1 ) ) + maxwrk = max( maxwrk, 2*m + m*stdlib_ilaenv( 1, 'ZUNMBR','PLN', n, nrhs, m,& + -1 ) ) + maxwrk = max( maxwrk, 2*m + m*nrhs ) + end if + minwrk = max( 2*m + n, 2*m + m*nrhs ) + end if + end if + minwrk = min( minwrk, maxwrk ) + work( 1 ) = maxwrk + iwork( 1 ) = liwork + rwork( 1 ) = lrwork + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_wlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + call stdlib_qlaset( 'F', minmn, 1, zero, zero, s, 1 ) + rank = 0 + go to 10 + end if + ! scale b if max entry outside range [smlnum,bignum]. + bnrm = stdlib_wlange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_wlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! if m < n make sure b(m+1:n,:) = 0 + if( m=n ) then + ! path 1 - overdetermined or exactly determined. + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + nwork = itau + n + ! compute a=q*r. + ! (rworkspace: need n) + ! (cworkspace: need n, prefer n*nb) + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + info ) + ! multiply b by transpose(q). + ! (rworkspace: need n) + ! (cworkspace: need nrhs, prefer nrhs*nb) + call stdlib_wunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + nwork ), lwork-nwork+1, info ) + ! zero out below r. + if( n>1 ) then + call stdlib_wlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + end if + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ie = 1 + nrwork = ie + n + ! bidiagonalize r in a. + ! (rworkspace: need n) + ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) + call stdlib_wgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r. + ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) + call stdlib_wunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_wlalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of r. + call stdlib_wunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm. + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & + lda + itau = 1 + nwork = m + 1 + ! compute a=l*q. + ! (cworkspace: need 2*m, prefer m+m*nb) + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) + + il = nwork + ! copy l to work(il), zeroing out above its diagonal. + call stdlib_wlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) + itauq = il + ldwork*m + itaup = itauq + m + nwork = itaup + m + ie = 1 + nrwork = ie + m + ! bidiagonalize l in work(il). + ! (rworkspace: need m) + ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb) + call stdlib_wgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & + itaup ), work( nwork ),lwork-nwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l. + ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_wunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_wlalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of l. + call stdlib_wunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! zero out below first m rows of b. + call stdlib_wlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1 ), ldb ) + nwork = itau + m + ! multiply transpose(q) by b. + ! (cworkspace: need nrhs, prefer nrhs*nb) + call stdlib_wunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& + , lwork-nwork+1, info ) + else + ! path 2 - remaining underdetermined cases. + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ie = 1 + nrwork = ie + m + ! bidiagonalize a. + ! (rworkspace: need m) + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors. + ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) + call stdlib_wunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_wlalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of a. + call stdlib_wunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + end if + ! undo scaling. + if( iascl==1 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_wlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_wlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 10 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwork + rwork( 1 ) = lrwork + return + end subroutine stdlib_wgelsd + + !> ZGELSS: computes the minimum norm solution to a complex linear + !> least squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + + subroutine stdlib_wgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(qp), intent(in) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*), s(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & + ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr + integer(ilp) :: lwork_wgeqrf, lwork_wunmqr, lwork_wgebrd, lwork_wunmbr, lwork_wungbr, & + lwork_wunmlq, lwork_wgelqf + real(qp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr + ! Local Arrays + complex(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + mm = m + mnthr = stdlib_ilaenv( 6, 'ZGELSS', ' ', m, n, nrhs, -1 ) + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns + ! compute space needed for stdlib_wgeqrf + call stdlib_wgeqrf( m, n, a, lda, dum(1), dum(1), -1, info ) + lwork_wgeqrf = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wunmqr + call stdlib_wunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1), b,ldb, dum(1), -1, & + info ) + lwork_wunmqr = real( dum(1),KIND=qp) + mm = n + maxwrk = max( maxwrk, n + n*stdlib_ilaenv( 1, 'ZGEQRF', ' ', m,n, -1, -1 ) ) + + maxwrk = max( maxwrk, n + nrhs*stdlib_ilaenv( 1, 'ZUNMQR', 'LC',m, nrhs, n, -& + 1 ) ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + ! compute space needed for stdlib_wgebrd + call stdlib_wgebrd( mm, n, a, lda, s, s, dum(1), dum(1), dum(1),-1, info ) + + lwork_wgebrd = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wunmbr + call stdlib_wunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1),b, ldb, dum(1),& + -1, info ) + lwork_wunmbr = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wungbr + call stdlib_wungbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, info ) + lwork_wungbr = real( dum(1),KIND=qp) + ! compute total workspace needed + maxwrk = max( maxwrk, 2*n + lwork_wgebrd ) + maxwrk = max( maxwrk, 2*n + lwork_wunmbr ) + maxwrk = max( maxwrk, 2*n + lwork_wungbr ) + maxwrk = max( maxwrk, n*nrhs ) + minwrk = 2*n + max( nrhs, m ) + end if + if( n>m ) then + minwrk = 2*m + max( nrhs, n ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows + ! compute space needed for stdlib_wgelqf + call stdlib_wgelqf( m, n, a, lda, dum(1), dum(1),-1, info ) + lwork_wgelqf = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wgebrd + call stdlib_wgebrd( m, m, a, lda, s, s, dum(1), dum(1),dum(1), -1, info ) + + lwork_wgebrd = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wunmbr + call stdlib_wunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_wunmbr = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wungbr + call stdlib_wungbr( 'P', m, m, m, a, lda, dum(1),dum(1), -1, info ) + lwork_wungbr = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wunmlq + call stdlib_wunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1),b, ldb, dum(1), -& + 1, info ) + lwork_wunmlq = real( dum(1),KIND=qp) + ! compute total workspace needed + maxwrk = m + lwork_wgelqf + maxwrk = max( maxwrk, 3*m + m*m + lwork_wgebrd ) + maxwrk = max( maxwrk, 3*m + m*m + lwork_wunmbr ) + maxwrk = max( maxwrk, 3*m + m*m + lwork_wungbr ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + lwork_wunmlq ) + else + ! path 2 - underdetermined + ! compute space needed for stdlib_wgebrd + call stdlib_wgebrd( m, n, a, lda, s, s, dum(1), dum(1),dum(1), -1, info ) + + lwork_wgebrd = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wunmbr + call stdlib_wunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_wunmbr = real( dum(1),KIND=qp) + ! compute space needed for stdlib_wungbr + call stdlib_wungbr( 'P', m, n, m, a, lda, dum(1),dum(1), -1, info ) + lwork_wungbr = real( dum(1),KIND=qp) + maxwrk = 2*m + lwork_wgebrd + maxwrk = max( maxwrk, 2*m + lwork_wunmbr ) + maxwrk = max( maxwrk, 2*m + lwork_wungbr ) + maxwrk = max( maxwrk, n*nrhs ) + end if + end if + maxwrk = max( minwrk, maxwrk ) + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_wlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + call stdlib_qlaset( 'F', minmn, 1, zero, zero, s, minmn ) + rank = 0 + go to 70 + end if + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! overdetermined case + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + info ) + ! multiply b by transpose(q) + ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) + ! (rworkspace: none) + call stdlib_wunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + iwork ), lwork-iwork+1, info ) + ! zero out below r + if( n>1 )call stdlib_wlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r + ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) + ! (rworkspace: none) + call stdlib_wunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_wungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + irwork = ie + n + ! perform bidiagonal qr iteration + ! multiply b by transpose of left singular vectors + ! compute right singular vectors in a + ! (cworkspace: none) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, 0, nrhs, s, rwork( ie ), a, lda, dum,1, b, ldb, & + rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_wdrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_wlaset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors + ! (cworkspace: need n, prefer n*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_wgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) + + call stdlib_wlacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_wgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1, i ),ldb, czero, & + work, n ) + call stdlib_wlacpy( 'G', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_wgemv( 'C', n, n, cone, a, lda, b, 1, czero, work, 1 ) + call stdlib_wcopy( n, work, 1, b, 1 ) + end if + else if( n>=mnthr .and. lwork>=3*m+m*m+max( m, nrhs, n-2*m ) )then + ! underdetermined case, m much less than n + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm + ldwork = m + if( lwork>=3*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda + itau = 1 + iwork = m + 1 + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: none) + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) + + il = iwork + ! copy l to work(il), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) + ie = 1 + itauq = il + ldwork*m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(il) + ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & + itaup ), work( iwork ),lwork-iwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l + ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) + ! (rworkspace: none) + call stdlib_wunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( iwork ),lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in work(il) + ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) + ! (rworkspace: none) + call stdlib_wungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & + lwork-iwork+1, info ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right singular + ! vectors of l in work(il) and multiplying b by transpose of + ! left singular vectors + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, 0, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & + b, ldb, rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_wdrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_wlaset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + iwork = il + m*ldwork + ! multiply b by right singular vectors of l in work(il) + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1 ) then + call stdlib_wgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & + work( iwork ), ldb ) + call stdlib_wlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = ( lwork-iwork+1 ) / m + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_wgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1, i ), & + ldb, czero, work( iwork ), m ) + call stdlib_wlacpy( 'G', m, bl, work( iwork ), m, b( 1, i ),ldb ) + end do + else + call stdlib_wgemv( 'C', m, m, cone, work( il ), ldwork, b( 1, 1 ),1, czero, work(& + iwork ), 1 ) + call stdlib_wcopy( m, work( iwork ), 1, b( 1, 1 ), 1 ) + end if + ! zero out below first m rows of b + call stdlib_wlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1 ), ldb ) + iwork = itau + m + ! multiply transpose(q) by b + ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) + ! (rworkspace: none) + call stdlib_wunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& + , lwork-iwork+1, info ) + else + ! path 2 - remaining underdetermined cases + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors + ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) + ! (rworkspace: none) + call stdlib_wunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: none) + call stdlib_wungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + irwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of a in a and + ! multiplying b by transpose of left singular vectors + ! (cworkspace: none) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'L', m, n, 0, nrhs, s, rwork( ie ), a, lda, dum,1, b, ldb, & + rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_wdrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_wlaset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors of a + ! (cworkspace: need n, prefer n*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_wgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) + + call stdlib_wlacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_wgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1, i ),ldb, czero, & + work, n ) + call stdlib_wlacpy( 'F', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_wgemv( 'C', m, n, cone, a, lda, b, 1, czero, work, 1 ) + call stdlib_wcopy( n, work, 1, b, 1 ) + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_wlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_wlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 70 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_wgelss + + !> ZGELSY: computes the minimum-norm solution to a complex linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by unitary transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**H [ inv(T11)*Q1**H*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + + subroutine stdlib_wgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(qp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: imax = 1 + integer(ilp), parameter :: imin = 2 + + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, & + nb4 + real(qp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize + complex(qp) :: c1, c2, s1, s2 + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,max,min + ! Executable Statements + mn = min( m, n ) + ismin = mn + 1 + ismax = 2*mn + 1 + ! test the input arguments. + info = 0 + nb1 = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'ZGERQF', ' ', m, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'ZUNMQR', ' ', m, n, nrhs, -1 ) + nb4 = stdlib_ilaenv( 1, 'ZUNMRQ', ' ', m, n, nrhs, -1 ) + nb = max( nb1, nb2, nb3, nb4 ) + lwkopt = max( 1, mn+2*n+nb*( n+1 ), 2*mn+nb*nrhs ) + work( 1 ) = cmplx( lwkopt,KIND=qp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_wlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + rank = 0 + go to 70 + end if + bnrm = stdlib_wlange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! compute qr factorization with column pivoting of a: + ! a * p = q * r + call stdlib_wgeqp3( m, n, a, lda, jpvt, work( 1 ), work( mn+1 ),lwork-mn, rwork, info ) + + wsize = mn + real( work( mn+1 ),KIND=qp) + ! complex workspace: mn+nb*(n+1). real workspace 2*n. + ! details of householder rotations stored in work(1:mn). + ! determine rank using incremental condition estimation + work( ismin ) = cone + work( ismax ) = cone + smax = abs( a( 1, 1 ) ) + smin = smax + if( abs( a( 1, 1 ) )==zero ) then + rank = 0 + call stdlib_wlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + go to 70 + else + rank = 1 + end if + 10 continue + if( rank ZGEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by short wide + !> LQ factorization (ZGELQ) + + pure subroutine stdlib_wgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), t(*) + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * mb + mn = m + else + lw = m * mb + mn = n + end if + if( ( nb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, nb - k ) == 0 ) then + nblcks = ( mn - k ) / ( nb - k ) + else + nblcks = ( mn - k ) / ( nb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_wgemlqt( side, trans, m, n, k, mb, a, lda,t( 6 ), mb, c, ldc, work, info & + ) + else + call stdlib_wlamswlq( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),mb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_wgemlq + + !> ZGEMLQT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex unitary matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by ZGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_wgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, mb, ldt + ! Array Arguments + complex(qp), intent(in) :: v(ldv,*), t(ldt,*) + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( mb<1 .or. (mb>k .and. k>0)) then + info = -6 + else if( ldv ZGEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (ZGEQR) + + pure subroutine stdlib_wgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), t(*) + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * nb + mn = m + else + lw = mb * nb + mn = n + end if + if( ( mb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, mb - k )==0 ) then + nblcks = ( mn - k ) / ( mb - k ) + else + nblcks = ( mn - k ) / ( mb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_wgemqrt( side, trans, m, n, k, nb, a, lda, t( 6 ),nb, c, ldc, work, info & + ) + else + call stdlib_wlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),nb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_wgemqr + + !> ZGEMQRT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by ZGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_wgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, nb, ldt + ! Array Arguments + complex(qp), intent(in) :: v(ldv,*), t(ldt,*) + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( nb<1 .or. (nb>k .and. k>0)) then + info = -6 + else if( ldv ZGEQL2: computes a QL factorization of a complex m by n matrix A: + !> A = Q * L. + + pure subroutine stdlib_wgeql2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEQLF: computes a QL factorization of a complex M-by-N matrix A: + !> A = Q * L. + + pure subroutine stdlib_wgeqlf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h**h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_wlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'BACKWARD','COLUMNWISE', m-& + k+i+ib-1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), & + ldwork ) + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_wgeql2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_wgeqlf + + !> ZGEQP3: computes a QR factorization with column pivoting of a + !> matrix A: A*P = Q*R using Level 3 BLAS. + + pure subroutine stdlib_wgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: inb = 1 + integer(ilp), parameter :: inbmin = 2 + integer(ilp), parameter :: ixover = 3 + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, & + sminmn, sn, topbmn + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + ! ==================== + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 ) then + na = min( m, nfxd ) + ! cc call stdlib_wgeqr2( m, na, a, lda, tau, work, info ) + call stdlib_wgeqrf( m, na, a, lda, tau, work, lwork, info ) + iws = max( iws, int( work( 1 ),KIND=ilp) ) + if( na1 ) .and. ( nb=nbmin ) .and. ( nb ZGEQR: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_wgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min ( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'ZGEQR ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'ZGEQR ', ' ', m, n, 2, -1 ) + else + mb = m + nb = 1 + end if + if( mb>m .or. mb<=n ) mb = m + if( nb>min( m, n ) .or. nb<1 ) nb = 1 + mintsz = n + 5 + if( mb>n .and. m>n ) then + if( mod( m - n, mb - n )==0 ) then + nblcks = ( m - n ) / ( mb - n ) + else + nblcks = ( m - n ) / ( mb - n ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + lminws = .false. + if( ( tsize=n ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=m ) ) then + call stdlib_wgeqrt( m, n, nb, a, lda, t( 6 ), nb, work, info ) + else + call stdlib_wlatsqr( m, n, mb, nb, a, lda, t( 6 ), nb, work,lwork, info ) + end if + work( 1 ) = max( 1, nb*n ) + return + end subroutine stdlib_wgeqr + + !> ZGEQR2: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + pure subroutine stdlib_wgeqr2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEQR2P: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + subroutine stdlib_wgeqr2p( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEQRF: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_wgeqrf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + k = min( m, n ) + info = 0 + nb = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb ZGEQR2P computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + subroutine stdlib_wgeqrfp( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb ZGEQRT: computes a blocked QR factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_wgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk), parameter :: use_recursive_qr = .true. + integer(ilp) :: i, ib, iinfo, k + + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nb<1 .or. ( nb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda ZGEQRT2: computes a QR factorization of a complex M-by-N matrix A, + !> using the compact WY representation of Q. + + pure subroutine stdlib_wgeqrt2( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(qp) :: aii, alpha + ! Executable Statements + ! test the input arguments + info = 0 + if( n<0 ) then + info = -2 + else if( m t(i,1) + call stdlib_wlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,t( i, 1 ) ) + if( i ZGEQRT3: recursively computes a QR factorization of a complex M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_wgeqrt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, n1, n2, iinfo + ! Executable Statements + info = 0 + if( n < 0 ) then + info = -2 + else if( m < n ) then + info = -1 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, n ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZGEQRT3', -info ) + return + end if + if( n==1 ) then + ! compute householder transform when n=1 + call stdlib_wlarfg( m, a(1,1), a( min( 2, m ), 1 ), 1, t(1,1) ) + else + ! otherwise, split a into blocks... + n1 = n/2 + n2 = n-n1 + j1 = min( n1+1, n ) + i1 = min( n+1, m ) + ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_wgeqrt3( m, n1, a, lda, t, ldt, iinfo ) + ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)] + do j=1,n2 + do i=1,n1 + t( i, j+n1 ) = a( i, j+n1 ) + end do + end do + call stdlib_wtrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1, j1 ), ldt ) + + call stdlib_wgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1 ), lda,a( j1, j1 ), lda, & + cone, t( 1, j1 ), ldt) + call stdlib_wtrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1, j1 ), ldt ) + + call stdlib_wgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1 ), lda,t( 1, j1 ), ldt, & + cone, a( j1, j1 ), lda ) + call stdlib_wtrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1, j1 ), ldt ) + + do j=1,n2 + do i=1,n1 + a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 ) + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_wgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo ) + ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2 + do i=1,n1 + do j=1,n2 + t( i, j+n1 ) = conjg(a( j+n1, i )) + end do + end do + call stdlib_wtrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1, j1 ), & + ldt ) + call stdlib_wgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1 ), lda,a( i1, j1 ), lda, & + cone, t( 1, j1 ), ldt ) + call stdlib_wtrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1, j1 ), ldt ) + + call stdlib_wtrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1, j1 ), & + ldt ) + ! y = (y1,y2); r = [ r1 a(1:n1,j1:n) ]; t = [t1 t3] + ! [ 0 r2 ] [ 0 t2] + end if + return + end subroutine stdlib_wgeqrt3 + + !> ZGERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + + pure subroutine stdlib_wgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wgetrs( trans, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_wgetrs( transt, n, 1, af, ldaf, ipiv, work, n,info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wgetrs( transn, n, 1, af, ldaf, ipiv, work, n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wgerfs + + !> ZGERQ2: computes an RQ factorization of a complex m by n matrix A: + !> A = R * Q. + + pure subroutine stdlib_wgerq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGERQF: computes an RQ factorization of a complex M-by-N matrix A: + !> A = R * Q. + + pure subroutine stdlib_wgerqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_wlarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-& + k+i+ib-1, ib,a( m-k+i, 1 ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_wgerq2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_wgerqf + + !> ZGESC2: solves a system of linear equations + !> A * X = scale* RHS + !> with a general N-by-N matrix A using the LU factorization with + !> complete pivoting computed by ZGETC2. + + pure subroutine stdlib_wgesc2( n, a, lda, rhs, ipiv, jpiv, scale ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: rhs(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: bignum, eps, smlnum + complex(qp) :: temp + ! Intrinsic Functions + intrinsic :: abs,real,cmplx + ! Executable Statements + ! set constant to control overflow + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! apply permutations ipiv to rhs + call stdlib_wlaswp( 1, rhs, lda, 1, n-1, ipiv, 1 ) + ! solve for l part + do i = 1, n - 1 + do j = i + 1, n + rhs( j ) = rhs( j ) - a( j, i )*rhs( i ) + end do + end do + ! solve for u part + scale = one + ! check for scaling + i = stdlib_iwamax( n, rhs, 1 ) + if( two*smlnum*abs( rhs( i ) )>abs( a( n, n ) ) ) then + temp = cmplx( one / two, zero,KIND=qp) / abs( rhs( i ) ) + call stdlib_wscal( n, temp, rhs( 1 ), 1 ) + scale = scale*real( temp,KIND=qp) + end if + do i = n, 1, -1 + temp = cmplx( one, zero,KIND=qp) / a( i, i ) + rhs( i ) = rhs( i )*temp + do j = i + 1, n + rhs( i ) = rhs( i ) - rhs( j )*( a( i, j )*temp ) + end do + end do + ! apply permutations jpiv to the solution (rhs) + call stdlib_wlaswp( 1, rhs, lda, 1, n-1, jpiv, -1 ) + return + end subroutine stdlib_wgesc2 + + !> ZGESDD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors, by using divide-and-conquer method. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**H, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_wgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, iwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), s(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs + integer(ilp) :: blk, chunk, i, ie, ierr, il, ir, iru, irvt, iscl, itau, itaup, itauq, & + iu, ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr1, mnthr2, nrwork,& + nwork, wrkbl + integer(ilp) :: lwork_wgebrd_mn, lwork_wgebrd_mm, lwork_wgebrd_nn, lwork_wgelqf_mn, & + lwork_wgeqrf_mn, lwork_wungbr_p_mn, lwork_wungbr_p_nn, lwork_wungbr_q_mn, & + lwork_wungbr_q_mm, lwork_wunglq_mn, lwork_wunglq_nn, lwork_wungqr_mm, lwork_wungqr_mn, & + lwork_wunmbr_prc_mm, lwork_wunmbr_qln_mm, lwork_wunmbr_prc_mn, lwork_wunmbr_qln_mn, & + lwork_wunmbr_prc_nn, lwork_wunmbr_qln_nn + real(qp) :: anrm, bignum, eps, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dum(1) + complex(qp) :: cdum(1) + ! Intrinsic Functions + intrinsic :: int,max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + mnthr1 = int( minmn*17.0_qp / 9.0_qp,KIND=ilp) + mnthr2 = int( minmn*5.0_qp / 3.0_qp,KIND=ilp) + wntqa = stdlib_lsame( jobz, 'A' ) + wntqs = stdlib_lsame( jobz, 'S' ) + wntqas = wntqa .or. wntqs + wntqo = stdlib_lsame( jobz, 'O' ) + wntqn = stdlib_lsame( jobz, 'N' ) + lquery = ( lwork==-1 ) + minwrk = 1 + maxwrk = 1 + if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n .and. ldvt=n .and. minmn>0 ) then + ! there is no complex work space needed for bidiagonal svd + ! the realwork space needed for bidiagonal svd (stdlib_qbdsdc,KIND=qp) is + ! bdspac = 3*n*n + 4*n for singular values and vectors; + ! bdspac = 4*n for singular values only; + ! not including e, ru, and rvt matrices. + ! compute space preferred for each routine + call stdlib_wgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_wgebrd_mn = int( cdum(1),KIND=ilp) + call stdlib_wgebrd( n, n, cdum(1), n, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_wgebrd_nn = int( cdum(1),KIND=ilp) + call stdlib_wgeqrf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr ) + lwork_wgeqrf_mn = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'P', n, n, n, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_wungbr_p_nn = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wungbr_q_mm = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'Q', m, n, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wungbr_q_mn = int( cdum(1),KIND=ilp) + call stdlib_wungqr( m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wungqr_mm = int( cdum(1),KIND=ilp) + call stdlib_wungqr( m, n, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wungqr_mn = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_wunmbr_prc_nn = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, n, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_wunmbr_qln_mm = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'Q', 'L', 'N', m, n, n, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_wunmbr_qln_mn = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'Q', 'L', 'N', n, n, n, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_wunmbr_qln_nn = int( cdum(1),KIND=ilp) + if( m>=mnthr1 ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + maxwrk = n + lwork_wgeqrf_mn + maxwrk = max( maxwrk, 2*n + lwork_wgebrd_nn ) + minwrk = 3*n + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + wrkbl = n + lwork_wgeqrf_mn + wrkbl = max( wrkbl, n + lwork_wungqr_mn ) + wrkbl = max( wrkbl, 2*n + lwork_wgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_wunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_wunmbr_prc_nn ) + maxwrk = m*n + n*n + wrkbl + minwrk = 2*n*n + 3*n + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + wrkbl = n + lwork_wgeqrf_mn + wrkbl = max( wrkbl, n + lwork_wungqr_mn ) + wrkbl = max( wrkbl, 2*n + lwork_wgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_wunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_wunmbr_prc_nn ) + maxwrk = n*n + wrkbl + minwrk = n*n + 3*n + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + wrkbl = n + lwork_wgeqrf_mn + wrkbl = max( wrkbl, n + lwork_wungqr_mm ) + wrkbl = max( wrkbl, 2*n + lwork_wgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_wunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_wunmbr_prc_nn ) + maxwrk = n*n + wrkbl + minwrk = n*n + max( 3*n, n + m ) + end if + else if( m>=mnthr2 ) then + ! path 5 (m >> n, but not as much as mnthr1) + maxwrk = 2*n + lwork_wgebrd_mn + minwrk = 2*n + m + if( wntqo ) then + ! path 5o (m >> n, jobz='o') + maxwrk = max( maxwrk, 2*n + lwork_wungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_wungbr_q_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + n*n + else if( wntqs ) then + ! path 5s (m >> n, jobz='s') + maxwrk = max( maxwrk, 2*n + lwork_wungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_wungbr_q_mn ) + else if( wntqa ) then + ! path 5a (m >> n, jobz='a') + maxwrk = max( maxwrk, 2*n + lwork_wungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_wungbr_q_mm ) + end if + else + ! path 6 (m >= n, but not much larger) + maxwrk = 2*n + lwork_wgebrd_mn + minwrk = 2*n + m + if( wntqo ) then + ! path 6o (m >= n, jobz='o') + maxwrk = max( maxwrk, 2*n + lwork_wunmbr_prc_nn ) + maxwrk = max( maxwrk, 2*n + lwork_wunmbr_qln_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + n*n + else if( wntqs ) then + ! path 6s (m >= n, jobz='s') + maxwrk = max( maxwrk, 2*n + lwork_wunmbr_qln_mn ) + maxwrk = max( maxwrk, 2*n + lwork_wunmbr_prc_nn ) + else if( wntqa ) then + ! path 6a (m >= n, jobz='a') + maxwrk = max( maxwrk, 2*n + lwork_wunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*n + lwork_wunmbr_prc_nn ) + end if + end if + else if( minmn>0 ) then + ! there is no complex work space needed for bidiagonal svd + ! the realwork space needed for bidiagonal svd (stdlib_qbdsdc,KIND=qp) is + ! bdspac = 3*m*m + 4*m for singular values and vectors; + ! bdspac = 4*m for singular values only; + ! not including e, ru, and rvt matrices. + ! compute space preferred for each routine + call stdlib_wgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_wgebrd_mn = int( cdum(1),KIND=ilp) + call stdlib_wgebrd( m, m, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_wgebrd_mm = int( cdum(1),KIND=ilp) + call stdlib_wgelqf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr ) + lwork_wgelqf_mn = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'P', m, n, m, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wungbr_p_mn = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'P', n, n, m, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_wungbr_p_nn = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wungbr_q_mm = int( cdum(1),KIND=ilp) + call stdlib_wunglq( m, n, m, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_wunglq_mn = int( cdum(1),KIND=ilp) + call stdlib_wunglq( n, n, m, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_wunglq_nn = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'P', 'R', 'C', m, m, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_wunmbr_prc_mm = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'P', 'R', 'C', m, n, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_wunmbr_prc_mn = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, m, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_wunmbr_prc_nn = int( cdum(1),KIND=ilp) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_wunmbr_qln_mm = int( cdum(1),KIND=ilp) + if( n>=mnthr1 ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + maxwrk = m + lwork_wgelqf_mn + maxwrk = max( maxwrk, 2*m + lwork_wgebrd_mm ) + minwrk = 3*m + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + wrkbl = m + lwork_wgelqf_mn + wrkbl = max( wrkbl, m + lwork_wunglq_mn ) + wrkbl = max( wrkbl, 2*m + lwork_wgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_wunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_wunmbr_prc_mm ) + maxwrk = m*n + m*m + wrkbl + minwrk = 2*m*m + 3*m + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + wrkbl = m + lwork_wgelqf_mn + wrkbl = max( wrkbl, m + lwork_wunglq_mn ) + wrkbl = max( wrkbl, 2*m + lwork_wgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_wunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_wunmbr_prc_mm ) + maxwrk = m*m + wrkbl + minwrk = m*m + 3*m + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + wrkbl = m + lwork_wgelqf_mn + wrkbl = max( wrkbl, m + lwork_wunglq_nn ) + wrkbl = max( wrkbl, 2*m + lwork_wgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_wunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_wunmbr_prc_mm ) + maxwrk = m*m + wrkbl + minwrk = m*m + max( 3*m, m + n ) + end if + else if( n>=mnthr2 ) then + ! path 5t (n >> m, but not as much as mnthr1) + maxwrk = 2*m + lwork_wgebrd_mn + minwrk = 2*m + n + if( wntqo ) then + ! path 5to (n >> m, jobz='o') + maxwrk = max( maxwrk, 2*m + lwork_wungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_wungbr_p_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + m*m + else if( wntqs ) then + ! path 5ts (n >> m, jobz='s') + maxwrk = max( maxwrk, 2*m + lwork_wungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_wungbr_p_mn ) + else if( wntqa ) then + ! path 5ta (n >> m, jobz='a') + maxwrk = max( maxwrk, 2*m + lwork_wungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_wungbr_p_nn ) + end if + else + ! path 6t (n > m, but not much larger) + maxwrk = 2*m + lwork_wgebrd_mn + minwrk = 2*m + n + if( wntqo ) then + ! path 6to (n > m, jobz='o') + maxwrk = max( maxwrk, 2*m + lwork_wunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_wunmbr_prc_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + m*m + else if( wntqs ) then + ! path 6ts (n > m, jobz='s') + maxwrk = max( maxwrk, 2*m + lwork_wunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_wunmbr_prc_mn ) + else if( wntqa ) then + ! path 6ta (n > m, jobz='a') + maxwrk = max( maxwrk, 2*m + lwork_wunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_wunmbr_prc_nn ) + end if + end if + end if + maxwrk = max( maxwrk, minwrk ) + end if + if( info==0 ) then + work( 1 ) = stdlib_qroundup_lwork( maxwrk ) + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr1 ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n [tau] + n [work] + ! cworkspace: prefer n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! zero out below r + call stdlib_wlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + nrwork = ie + n + ! perform bidiagonal svd, compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_qbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + ir = iu + ldwrku*n + if( lwork >= m*n + n*n + 3*n ) then + ! work(ir) is m by n + ldwrkr = m + else + ldwrkr = ( lwork - n*n - 3*n ) / n + end if + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy r to work( ir ), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) + ! generate q in a + ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_wgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of r in work(iru) and computing right singular vectors + ! of r in work(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix work(iu) + ! overwrite work(iu) by the left singular vectors of r + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_wunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iu ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by the right singular vectors of r + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in work(ir) and copying to a + ! cworkspace: need n*n [u] + n*n [r] + ! cworkspace: prefer n*n [u] + m*n [r] + ! rworkspace: need 0 + do i = 1, m, ldwrkr + chunk = min( m-i+1, ldwrkr ) + call stdlib_wgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( iu ), & + ldwrku, czero,work( ir ), ldwrkr ) + call stdlib_wlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is n by n + ldwrkr = n + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) + ! generate q in a + ! cworkspace: need n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_wgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of r + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of r + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! cworkspace: need n*n [r] + ! rworkspace: need 0 + call stdlib_wlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),ldwrkr, czero, & + u, ldu ) + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + itau = iu + ldwrku*n + nwork = itau + n + ! compute a=q*r, copying result to u + ! cworkspace: need n*n [u] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! cworkspace: need n*n [u] + n [tau] + m [work] + ! cworkspace: prefer n*n [u] + n [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ! produce r in a, zeroing out below it + call stdlib_wlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix work(iu) + ! overwrite work(iu) by left singular vectors of r + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_wunmbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & + ldwrku,work( nwork ), lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of r + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! cworkspace: need n*n [u] + ! rworkspace: need 0 + call stdlib_wgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),ldwrku, czero, & + a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_wlacpy( 'F', m, n, a, lda, u, ldu ) + end if + else if( m>=mnthr2 ) then + ! mnthr2 <= m < mnthr1 + ! path 5 (m >> n, but not as much as mnthr1) + ! reduce to bidiagonal form without qr decomposition, use + ! stdlib_wungbr and matrix multiplication to compute singular vectors + ie = 1 + nrwork = ie + n + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need n [e] + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5n (m >> n, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_qbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1,dum,1,dum, idum, & + rwork( nrwork ), iwork, info ) + else if( wntqo ) then + iu = nwork + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + ! path 5o (m >> n, jobz='o') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! generate q in a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + if( lwork >= m*n + 3*n ) then + ! work( iu ) is m by n + ldwrku = m + else + ! work(iu) is ldwrku by n + ldwrku = ( lwork - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=qp) by p**h in vt, + ! storing the result in work(iu), copying to vt + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_wlarcm( n, n, rwork( irvt ), n, vt, ldvt,work( iu ), ldwrku, & + rwork( nrwork ) ) + call stdlib_wlacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt ) + ! multiply q in a by realmatrix rwork(iru,KIND=qp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] + ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_wlacrm( chunk, n, a( i, 1 ), lda, rwork( iru ),n, work( iu ), & + ldwrku, rwork( nrwork ) ) + call stdlib_wlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 5s (m >> n, jobz='s') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! copy a to u, generate q + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + call stdlib_wungbr( 'Q', m, n, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=qp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_wlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', n, n, a, lda, vt, ldvt ) + ! multiply q in u by realmatrix rwork(iru,KIND=qp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + call stdlib_wlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! path 5a (m >> n, jobz='a') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! copy a to u, generate q + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + call stdlib_wungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=qp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_wlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', n, n, a, lda, vt, ldvt ) + ! multiply q in u by realmatrix rwork(iru,KIND=qp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + call stdlib_wlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', m, n, a, lda, u, ldu ) + end if + else + ! m < mnthr2 + ! path 6 (m >= n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ! use stdlib_wunmbr to compute singular vectors + ie = 1 + nrwork = ie + n + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need n [e] + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 6n (m >= n, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_qbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + iu = nwork + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + if( lwork >= m*n + 3*n ) then + ! work( iu ) is m by n + ldwrku = m + else + ! work( iu ) is ldwrku by n + ldwrku = ( lwork - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! path 6o (m >= n, jobz='o') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_wlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + if( lwork >= m*n + 3*n ) then + ! path 6o-fast + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix work(iu) + ! overwrite work(iu) by left singular vectors of a, copying + ! to a + ! cworkspace: need 2*n [tauq, taup] + m*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + call stdlib_wlaset( 'F', m, n, czero, czero, work( iu ),ldwrku ) + call stdlib_wlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & + ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) + call stdlib_wlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) + else + ! path 6o-slow + ! generate q in a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & + lwork-nwork+1, ierr ) + ! multiply q in a by realmatrix rwork(iru,KIND=qp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] + ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_wlacrm( chunk, n, a( i, 1 ), lda,rwork( iru ), n, work( iu )& + , ldwrku,rwork( nrwork ) ) + call stdlib_wlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + end if + else if( wntqs ) then + ! path 6s (m >= n, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_wlaset( 'F', m, n, czero, czero, u, ldu ) + call stdlib_wlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_wlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + else + ! path 6a (m >= n, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_qbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! set the right corner of u to identity matrix + call stdlib_wlaset( 'F', m, m, czero, czero, u, ldu ) + if( m>n ) then + call stdlib_wlaset( 'F', m-n, m-n, czero, cone,u( n+1, n+1 ), ldu ) + end if + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_wlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_wlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr1 ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m [tau] + m [work] + ! cworkspace: prefer m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! zero out above l + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + nrwork = ie + m + ! perform bidiagonal svd, compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_qbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + ivt = 1 + ldwkvt = m + ! work(ivt) is m by m + il = ivt + ldwkvt*m + if( lwork >= m*n + m*m + 3*m ) then + ! work(il) m by n + ldwrkl = m + chunk = n + else + ! work(il) is m by chunk + ldwrkl = m + chunk = ( lwork - m*m - 3*m ) / m + end if + itau = il + ldwrkl*chunk + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy l to work(il), zeroing about above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) + + ! generate q in a + ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_wgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_qbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix work(iu) + ! overwrite work(iu) by the left singular vectors of l + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix work(ivt) + ! overwrite work(ivt) by the right singular vectors of l + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + call stdlib_wunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & + work( ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + ! multiply right singular vectors of l in work(il) by q + ! in a, storing result in work(il) and copying to a + ! cworkspace: need m*m [vt] + m*m [l] + ! cworkspace: prefer m*m [vt] + m*n [l] + ! rworkspace: need 0 + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_wgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,a( 1, i ), & + lda, czero, work( il ),ldwrkl ) + call stdlib_wlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + il = 1 + ! work(il) is m by m + ldwrkl = m + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy l to work(il), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) + + ! generate q in a + ! cworkspace: need m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_wgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_qbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of l + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by left singular vectors of l + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! copy vt to work(il), multiply right singular vectors of l + ! in work(il) by q in a, storing result in vt + ! cworkspace: need m*m [l] + ! rworkspace: need 0 + call stdlib_wlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,a, lda, czero, & + vt, ldvt ) + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ldwkvt = m + itau = ivt + ldwkvt*m + nwork = itau + m + ! compute a=l*q, copying result to vt + ! cworkspace: need m*m [vt] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! cworkspace: need m*m [vt] + m [tau] + n [work] + ! cworkspace: prefer m*m [vt] + m [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork-& + nwork+1, ierr ) + ! produce l in a, zeroing out above it + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_qbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of l + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix work(ivt) + ! overwrite work(ivt) by right singular vectors of l + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + call stdlib_wunmbr( 'P', 'R', 'C', m, m, m, a, lda,work( itaup ), work( ivt ),& + ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + ! multiply right singular vectors of l in work(ivt) by + ! q in vt, storing result in a + ! cworkspace: need m*m [vt] + ! rworkspace: need 0 + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,vt, ldvt, & + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_wlacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else if( n>=mnthr2 ) then + ! mnthr2 <= n < mnthr1 + ! path 5t (n >> m, but not as much as mnthr1) + ! reduce to bidiagonal form without qr decomposition, use + ! stdlib_wungbr and matrix multiplication to compute singular vectors + ie = 1 + nrwork = ie + m + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need m [e] + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5tn (n >> m, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_qbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + ivt = nwork + ! path 5to (n >> m, jobz='o') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! generate p**h in a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), lwork-& + nwork+1, ierr ) + ldwkvt = m + if( lwork >= m*n + 3*m ) then + ! work( ivt ) is m by n + nwork = ivt + ldwkvt*n + chunk = n + else + ! work( ivt ) is m by chunk + chunk = ( lwork - 3*m ) / m + nwork = ivt + ldwkvt*chunk + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + call stdlib_qbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(irvt,KIND=qp) + ! storing the result in work(ivt), copying to u + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_wlacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),ldwkvt, rwork( & + nrwork ) ) + call stdlib_wlacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu ) + ! multiply rwork(irvt) by p**h in a, storing the + ! result in work(ivt), copying to a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] + ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_wlarcm( m, blk, rwork( irvt ), m, a( 1, i ), lda,work( ivt ), & + ldwkvt, rwork( nrwork ) ) + call stdlib_wlacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 5ts (n >> m, jobz='s') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! copy a to vt, generate p**h + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + call stdlib_wungbr( 'P', m, n, m, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_qbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(iru,KIND=qp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_wlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', m, m, a, lda, u, ldu ) + ! multiply realmatrix rwork(irvt,KIND=qp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + call stdlib_wlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! path 5ta (n >> m, jobz='a') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! copy a to vt, generate p**h + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + call stdlib_wungbr( 'P', n, n, m, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_qbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(iru,KIND=qp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_wlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', m, m, a, lda, u, ldu ) + ! multiply realmatrix rwork(irvt,KIND=qp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + call stdlib_wlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_wlacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else + ! n < mnthr2 + ! path 6t (n > m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ! use stdlib_wunmbr to compute singular vectors + ie = 1 + nrwork = ie + m + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need m [e] + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 6tn (n > m, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_qbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 6to (n > m, jobz='o') + ldwkvt = m + ivt = nwork + if( lwork >= m*n + 3*m ) then + ! work( ivt ) is m by n + call stdlib_wlaset( 'F', m, n, czero, czero, work( ivt ),ldwkvt ) + nwork = ivt + ldwkvt*n + else + ! work( ivt ) is m by chunk + chunk = ( lwork - 3*m ) / m + nwork = ivt + ldwkvt*chunk + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_qbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_wlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + if( lwork >= m*n + 3*m ) then + ! path 6to-fast + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix work(ivt) + ! overwrite work(ivt) by right singular vectors of a, + ! copying to a + ! cworkspace: need 2*m [tauq, taup] + m*n [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_wlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + + call stdlib_wunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), work( & + ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + call stdlib_wlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) + else + ! path 6to-slow + ! generate p**h in a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] + ! rworkspace: need 0 + call stdlib_wungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! multiply q in a by realmatrix rwork(iru,KIND=qp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] + ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_wlarcm( m, blk, rwork( irvt ), m, a( 1, i ),lda, work( ivt )& + , ldwkvt,rwork( nrwork ) ) + call stdlib_wlacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1, i ), lda ) + + end do + end if + else if( wntqs ) then + ! path 6ts (n > m, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_qbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_wlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_wlaset( 'F', m, n, czero, czero, vt, ldvt ) + call stdlib_wlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + else + ! path 6ta (n > m, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_qbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=qp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_wlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_wunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! set all of vt to identity matrix + call stdlib_wlaset( 'F', n, n, czero, cone, vt, ldvt ) + ! copy realmatrix rwork(irvt,KIND=qp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_wlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_wunmbr( 'P', 'R', 'C', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1,rwork( ie ), minmn, ierr ) + if( anrm ZGESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_wgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( lda ZGESVD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**H, not V. + + subroutine stdlib_wgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu, jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), s(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& + wntvs + integer(ilp) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & + iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & + wrkbl + integer(ilp) :: lwork_wgeqrf, lwork_wungqr_n, lwork_wungqr_m, lwork_wgebrd, & + lwork_wungbr_p, lwork_wungbr_q, lwork_wgelqf, lwork_wunglq_n, lwork_wunglq_m + real(qp) :: anrm, bignum, eps, smlnum + ! Local Arrays + real(qp) :: dum(1) + complex(qp) :: cdum(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntua = stdlib_lsame( jobu, 'A' ) + wntus = stdlib_lsame( jobu, 'S' ) + wntuas = wntua .or. wntus + wntuo = stdlib_lsame( jobu, 'O' ) + wntun = stdlib_lsame( jobu, 'N' ) + wntva = stdlib_lsame( jobvt, 'A' ) + wntvs = stdlib_lsame( jobvt, 'S' ) + wntvas = wntva .or. wntvs + wntvo = stdlib_lsame( jobvt, 'O' ) + wntvn = stdlib_lsame( jobvt, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then + info = -1 + else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda=n .and. minmn>0 ) then + ! space needed for stdlib_wbdsqr is bdspac = 5*n + mnthr = stdlib_ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 ) + ! compute space needed for stdlib_wgeqrf + call stdlib_wgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_wgeqrf = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wungqr + call stdlib_wungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_wungqr_n = int( cdum(1),KIND=ilp) + call stdlib_wungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_wungqr_m = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wgebrd + call stdlib_wgebrd( n, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, ierr ) + + lwork_wgebrd = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wungbr + call stdlib_wungbr( 'P', n, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + lwork_wungbr_p = int( cdum(1),KIND=ilp) + call stdlib_wungbr( 'Q', n, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + lwork_wungbr_q = int( cdum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + maxwrk = n + lwork_wgeqrf + maxwrk = max( maxwrk, 2*n+lwork_wgebrd ) + if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2*n+lwork_wungbr_p ) + minwrk = 3*n + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + maxwrk = max( n*n+wrkbl, n*n+m*n ) + minwrk = 2*n + m + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or + ! 'a') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_p ) + maxwrk = max( n*n+wrkbl, n*n+m*n ) + minwrk = 2*n + m + else if( wntus .and. wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntus .and. wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_p ) + maxwrk = 2*n*n + wrkbl + minwrk = 2*n + m + else if( wntus .and. wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' or + ! 'a') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_p ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_p ) + maxwrk = 2*n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' or + ! 'a') + wrkbl = n + lwork_wgeqrf + wrkbl = max( wrkbl, n+lwork_wungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_wungbr_p ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + end if + else + ! path 10 (m at least n, but not much larger) + call stdlib_wgebrd( m, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, & + ierr ) + lwork_wgebrd = int( cdum(1),KIND=ilp) + maxwrk = 2*n + lwork_wgebrd + if( wntus .or. wntuo ) then + call stdlib_wungbr( 'Q', m, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + + lwork_wungbr_q = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*n+lwork_wungbr_q ) + end if + if( wntua ) then + call stdlib_wungbr( 'Q', m, m, n, a, lda, cdum(1),cdum(1), -1, ierr ) + + lwork_wungbr_q = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*n+lwork_wungbr_q ) + end if + if( .not.wntvn ) then + maxwrk = max( maxwrk, 2*n+lwork_wungbr_p ) + end if + minwrk = 2*n + m + end if + else if( minmn>0 ) then + ! space needed for stdlib_wbdsqr is bdspac = 5*m + mnthr = stdlib_ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 ) + ! compute space needed for stdlib_wgelqf + call stdlib_wgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_wgelqf = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wunglq + call stdlib_wunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,ierr ) + lwork_wunglq_n = int( cdum(1),KIND=ilp) + call stdlib_wunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_wunglq_m = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wgebrd + call stdlib_wgebrd( m, m, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, ierr ) + + lwork_wgebrd = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wungbr p + call stdlib_wungbr( 'P', m, m, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_wungbr_p = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_wungbr q + call stdlib_wungbr( 'Q', m, m, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_wungbr_q = int( cdum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + maxwrk = m + lwork_wgelqf + maxwrk = max( maxwrk, 2*m+lwork_wgebrd ) + if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2*m+lwork_wungbr_q ) + minwrk = 3*m + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + maxwrk = max( m*m+wrkbl, m*m+m*n ) + minwrk = 2*m + n + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', + ! jobvt='o') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_q ) + maxwrk = max( m*m+wrkbl, m*m+m*n ) + minwrk = 2*m + n + else if( wntvs .and. wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntvs .and. wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_q ) + maxwrk = 2*m*m + wrkbl + minwrk = 2*m + n + else if( wntvs .and. wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_q ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_q ) + maxwrk = 2*m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + wrkbl = m + lwork_wgelqf + wrkbl = max( wrkbl, m+lwork_wunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_wgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_wungbr_q ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + end if + else + ! path 10t(n greater than m, but not much larger) + call stdlib_wgebrd( m, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, & + ierr ) + lwork_wgebrd = int( cdum(1),KIND=ilp) + maxwrk = 2*m + lwork_wgebrd + if( wntvs .or. wntvo ) then + ! compute space needed for stdlib_wungbr p + call stdlib_wungbr( 'P', m, n, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_wungbr_p = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*m+lwork_wungbr_p ) + end if + if( wntva ) then + call stdlib_wungbr( 'P', n, n, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_wungbr_p = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*m+lwork_wungbr_p ) + end if + if( .not.wntun ) then + maxwrk = max( maxwrk, 2*m+lwork_wungbr_q ) + end if + minwrk = 2*m + n + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + ! no left singular vectors to be computed + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: need 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out below r + if( n > 1 ) then + call stdlib_wlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( iwork ), lwork-iwork+1,ierr ) + ncvt = 0 + if( wntvo .or. wntvas ) then + ! if right singular vectors desired, generate p'. + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + ncvt = n + end if + irwork = ie + n + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a if desired + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,cdum, 1, cdum, & + 1, rwork( irwork ), info ) + ! if right singular vectors desired in vt, copy them there + if( wntvas )call stdlib_wlacpy( 'F', n, n, a, lda, vt, ldvt ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + ! n left singular vectors to be overwritten on a and + ! no right singular vectors to be computed + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*n ) then + ! work(iu) is lda by n, work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+n*n ) then + ! work(iu) is lda by n, work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n, work(ir) is n by n + ldwrku = ( lwork-n*n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to work(ir) and zero out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: need 0) + call stdlib_wungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,work( ir ), & + ldwrkr, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (cworkspace: need n*n+n, prefer n*n+m*n) + ! (rworkspace: 0) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_wgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( ir & + ), ldwrkr, czero,work( iu ), ldwrku ) + call stdlib_wlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) + ! (rworkspace: n) + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing a + ! (cworkspace: need 3*n, prefer 2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a + ! (cworkspace: need 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,a, lda, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+n*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n and work(ir) is n by n + ldwrku = ( lwork-n*n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_wlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt, copying result to work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) and computing right + ! singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( ir ), & + ldwrkr, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (cworkspace: need n*n+n, prefer n*n+m*n) + ! (rworkspace: 0) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_wgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( ir & + ), ldwrkr, czero,work( iu ), ldwrku ) + call stdlib_wlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_wlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: n) + call stdlib_wgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in a by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& + work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, cdum,& + 1, rwork( irwork ),info ) + end if + else if( wntus ) then + if( wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + ! n left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,1, work( ir ),& + ldwrkr, cdum, 1,rwork( irwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & + czero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,1, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+3*n ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*n*n+3*n, + ! prefer 2*n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*n*n+3*n-1, + ! prefer 2*n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (cworkspace: need 2*n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, n, 0, s, rwork( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, cdum, 1, rwork( irwork ),info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & + czero, u, ldu ) + ! copy right singular vectors of r to a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,lda, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' + ! or 'a') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need n*n+3*n-1, + ! prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( iu )& + , ldwrku, cdum, 1,rwork( irwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & + czero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_wlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), & + ldvt ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + else if( wntua ) then + if( wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + ! m left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! copy r to work(ir), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in u + ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,1, work( ir ),& + ldwrkr, cdum, 1,rwork( irwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(ir), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_wlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,1, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*n*n+3*n, + ! prefer 2*n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*n*n+3*n-1, + ! prefer 2*n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (cworkspace: need 2*n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, n, 0, s, rwork( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, cdum, 1, rwork( irwork ),info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_wlacpy( 'F', m, n, a, lda, u, ldu ) + ! copy right singular vectors of r from work(ir) to a + call stdlib_wlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,lda, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' + ! or 'a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need n*n+3*n-1, + ! prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: need 0) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( iu )& + , ldwrku, cdum, 1,rwork( irwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_wlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_wgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_wungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r from a to vt, zeroing out below it + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_wlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), & + ldvt ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + end if + else + ! m < mnthr + ! path 10 (m at least n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) + ! (rworkspace: need n) + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) + ! (rworkspace: 0) + call stdlib_wlacpy( 'L', m, n, a, lda, u, ldu ) + if( wntus )ncu = n + if( wntua )ncu = m + call stdlib_wungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_wungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*n, prefer 2*n+n*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + irwork = ie + n + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1, rwork( irwork ),info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,lda, u, ldu, cdum,& + 1, rwork( irwork ),info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1, rwork( irwork ),info ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + ! no right singular vectors to be computed + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out above l + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( iwork ), lwork-iwork+1,ierr ) + if( wntuo .or. wntuas ) then + ! if left singular vectors desired, generate q + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + irwork = ie + m + nru = 0 + if( wntuo .or. wntuas )nru = m + ! perform bidiagonal qr iteration, computing left singular + ! vectors of a in a if desired + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,a, lda, cdum, & + 1, rwork( irwork ), info ) + ! if left singular vectors desired in u, copy them there + if( wntuas )call stdlib_wlacpy( 'F', m, m, a, lda, u, ldu ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! no left singular vectors to be computed + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to work(ir) and zero out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l + ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (cworkspace: need m*m+m, prefer m*m+m*n) + ! (rworkspace: 0) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_wgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1, & + i ), lda, czero,work( iu ), ldwrku ) + call stdlib_wlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,cdum, 1, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing about above it + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),ldu ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u, copying result to work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) + ! generate right vectors bidiagonalizing l in work(ir) + ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u, and computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( ir ), ldwrkr, u, & + ldu, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (cworkspace: need m*m+m, prefer m*m+m*n)) + ! (rworkspace: 0) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_wgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1, & + i ), lda, czero,work( iu ), ldwrku ) + call stdlib_wlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),ldu ) + ! generate q in a + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in a + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,u, ldu, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntvs ) then + if( wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + ! m right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(ir), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & + ldwrkr ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l in + ! work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & + czero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy result to vt + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,ldvt, cdum, 1, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+3*m ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out below it + call stdlib_wlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ! generate q in a + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*m*m+3*m, + ! prefer 2*m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*m*m+3*m-1, + ! prefer 2*m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (cworkspace: need 2*m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, cdum, 1, rwork( irwork ),info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & + czero, vt, ldvt ) + ! copy left singular vectors of l to a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors of l in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is lda by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need m*m+3*m-1, + ! prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + u, ldu, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & + czero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,u( 1, 2 ), ldu ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + else if( wntva ) then + if( wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + ! n right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! copy l to work(ir), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & + ldwrkr ) + ! generate q in vt + ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need m*m+3*m-1, + ! prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_wlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,ldvt, cdum, 1, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*m*m+3*m, + ! prefer 2*m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*m*m+3*m-1, + ! prefer 2*m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (cworkspace: need 2*m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, cdum, 1, rwork( irwork ),info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_wlacpy( 'F', m, n, a, lda, vt, ldvt ) + ! copy left singular vectors of a from work(ir) to a + call stdlib_wlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by m + ldwrku = lda + else + ! work(iu) is m by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_wlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + u, ldu, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_wgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_wlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_wgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_wunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wlaset( 'U', m-1, m-1, czero, czero,u( 1, 2 ), ldu ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_wgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_wunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + end if + else + ! n < mnthr + ! path 10t(n greater than m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + ! (rworkspace: m) + call stdlib_wgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_wungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) + ! (rworkspace: 0) + call stdlib_wlacpy( 'U', m, n, a, lda, vt, ldvt ) + if( wntva )nrvt = n + if( wntvs )nrvt = m + call stdlib_wungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_wungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + irwork = ie + m + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1, rwork( irwork ),info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,lda, u, ldu, cdum,& + 1, rwork( irwork ),info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_wbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1, rwork( irwork ),info ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_qlascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1,rwork( ie ), minmn, ierr ) + if( anrm ZCGESVDQ computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + subroutine stdlib_wgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) + ! Scalar Arguments + character, intent(in) :: joba, jobp, jobr, jobu, jobv + integer(ilp), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork + integer(ilp), intent(out) :: numrank, info + integer(ilp), intent(inout) :: lcwork + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: u(ldu,*), v(ldv,*), cwork(*) + real(qp), intent(out) :: s(*), rwork(*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: ierr, nr, n1, optratio, p, q + integer(ilp) :: lwcon, lwqp3, lwrk_wgelqf, lwrk_wgesvd, lwrk_wgesvd2, lwrk_wgeqp3, & + lwrk_wgeqrf, lwrk_wunmlq, lwrk_wunmqr, lwrk_wunmqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & + lwunq, lwunq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk + logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& + rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr + real(qp) :: big, epsln, rtmp, sconda, sfmin + complex(qp) :: ctmp + ! Local Arrays + complex(qp) :: cdummy(1) + real(qp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,real,sqrt + ! Executable Statements + ! test the input arguments + wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) + wntur = stdlib_lsame( jobu, 'R' ) + wntua = stdlib_lsame( jobu, 'A' ) + wntuf = stdlib_lsame( jobu, 'F' ) + lsvc0 = wntus .or. wntur .or. wntua + lsvec = lsvc0 .or. wntuf + dntwu = stdlib_lsame( jobu, 'N' ) + wntvr = stdlib_lsame( jobv, 'R' ) + wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) + rsvec = wntvr .or. wntva + dntwv = stdlib_lsame( jobv, 'N' ) + accla = stdlib_lsame( joba, 'A' ) + acclm = stdlib_lsame( joba, 'M' ) + conda = stdlib_lsame( joba, 'E' ) + acclh = stdlib_lsame( joba, 'H' ) .or. conda + rowprm = stdlib_lsame( jobp, 'P' ) + rtrans = stdlib_lsame( jobr, 'T' ) + if ( rowprm ) then + iminwrk = max( 1, n + m - 1 ) + rminwrk = max( 2, m, 5*n ) + else + iminwrk = max( 1, n ) + rminwrk = max( 2, 5*n ) + end if + lquery = (liwork == -1 .or. lcwork == -1 .or. lrwork == -1) + info = 0 + if ( .not. ( accla .or. acclm .or. acclh ) ) then + info = -1 + else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = -2 + else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then + info = -3 + else if ( .not.( lsvec .or. dntwu ) ) then + info = -4 + else if ( wntur .and. wntva ) then + info = -5 + else if ( .not.( rsvec .or. dntwv )) then + info = -5 + else if ( m<0 ) then + info = -6 + else if ( ( n<0 ) .or. ( n>m ) ) then + info = -7 + else if ( lda big / sqrt(real(m,KIND=qp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_wlascl('G',0,0,sqrt(real(m,KIND=qp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + call stdlib_wlaswp( n, a, lda, 1, m-1, iwork(n+1), 1 ) + end if + ! .. at this stage, preemptive scaling is done only to avoid column + ! norms overflows during the qr factorization. the svd procedure should + ! have its own scaling to save the singular values from overflows and + ! underflows. that depends on the svd procedure. + if ( .not.rowprm ) then + rtmp = stdlib_wlange( 'M', m, n, a, lda, rwork ) + if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then + info = -8 + call stdlib_xerbla( 'ZGESVDQ', -info ) + return + end if + if ( rtmp > big / sqrt(real(m,KIND=qp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_wlascl('G',0,0, sqrt(real(m,KIND=qp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + end if + ! Qr Factorization With Column Pivoting + ! a * p = q * [ r ] + ! [ 0 ] + do p = 1, n + ! All Columns Are Free Columns + iwork(p) = 0 + end do + call stdlib_wgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,rwork, ierr ) + + ! if the user requested accuracy level allows truncation in the + ! computed upper triangular factor, the matrix r is examined and, + ! if possible, replaced with its leading upper trapezoidal part. + epsln = stdlib_qlamch('E') + sfmin = stdlib_qlamch('S') + ! small = sfmin / epsln + nr = n + if ( accla ) then + ! standard absolute error bound suffices. all sigma_i with + ! sigma_i < n*eps*||a||_f are flushed to zero. this is an + ! aggressive enforcement of lower numerical rank by introducing a + ! backward error of the order of n*eps*||a||_f. + nr = 1 + rtmp = sqrt(real(n,KIND=qp))*epsln + do p = 2, n + if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 + nr = nr + 1 + end do + 3002 continue + elseif ( acclm ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r is used as the criterion for being + ! close-to-rank-deficient. the threshold is set to epsln=stdlib_qlamch('e'). + ! [[this can be made more flexible by replacing this hard-coded value + ! with a user specified threshold.]] also, the values that underflow + ! will be truncated. + nr = 1 + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go & + to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! Rrqr Not Authorized To Determine Numerical Rank Except In The + ! obvious case of zero pivots. + ! .. inspect r for exact zeros on the diagonal; + ! r(i,i)=0 => r(i:n,i:n)=0. + nr = 1 + do p = 2, n + if ( abs(a(p,p)) == zero ) go to 3502 + nr = nr + 1 + end do + 3502 continue + if ( conda ) then + ! estimate the scaled condition number of a. use the fact that it is + ! the same as the scaled condition number of r. + ! V Is Used As Workspace + call stdlib_wlacpy( 'U', n, n, a, lda, v, ldv ) + ! only the leading nr x nr submatrix of the triangular factor + ! is considered. only if nr=n will this give a reliable error + ! bound. however, even for nr < n, this can be used on an + ! expert level and obtain useful information in the sense of + ! perturbation theory. + do p = 1, nr + rtmp = stdlib_qznrm2( p, v(1,p), 1 ) + call stdlib_wdscal( p, one/rtmp, v(1,p), 1 ) + end do + if ( .not. ( lsvec .or. rsvec ) ) then + call stdlib_wpocon( 'U', nr, v, ldv, one, rtmp,cwork, rwork, ierr ) + + else + call stdlib_wpocon( 'U', nr, v, ldv, one, rtmp,cwork(n+1), rwork, ierr ) + + end if + sconda = one / sqrt(rtmp) + ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + ! see the reference [1] for more details. + end if + endif + if ( wntur ) then + n1 = nr + else if ( wntus .or. wntuf) then + n1 = n + else if ( wntua ) then + n1 = m + end if + if ( .not. ( rsvec .or. lsvec ) ) then + ! ....................................................................... + ! Only The Singular Values Are Requested + ! ....................................................................... + if ( rtrans ) then + ! .. compute the singular values of r**h = [a](1:nr,1:n)**h + ! .. set the lower triangle of [a] to [a](1:nr,1:n)**h and + ! the upper triangle of [a] to zero. + do p = 1, min( n, nr ) + a(p,p) = conjg(a(p,p)) + do q = p + 1, n + a(q,p) = conjg(a(p,q)) + if ( q <= nr ) a(p,q) = czero + end do + end do + call stdlib_wgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & + rwork, info ) + else + ! .. compute the singular values of r = [a](1:nr,1:n) + if ( nr > 1 )call stdlib_wlaset( 'L', nr-1,nr-1, czero,czero, a(2,1), lda ) + + call stdlib_wgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & + rwork, info ) + end if + else if ( lsvec .and. ( .not. rsvec) ) then + ! ....................................................................... + ! The Singular Values And The Left Singular Vectors Requested + ! ......................................................................."""""""" + if ( rtrans ) then + ! .. apply stdlib_wgesvd to r**h + ! .. copy r**h into [u] and overwrite [u] with the right singular + ! vectors of r + do p = 1, nr + do q = p, n + u(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_wlaset( 'U', nr-1,nr-1, czero,czero, u(1,2), ldu ) + + ! .. the left singular vectors not computed, the nr right singular + ! vectors overwrite [u](1:nr,1:nr) as conjugate transposed. these + ! will be pre-multiplied by q to build the left singular vectors of a. + call stdlib_wgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, nr + u(p,p) = conjg(u(p,p)) + do q = p + 1, nr + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + else + ! Apply Stdlib_Zgesvd To R + ! .. copy r into [u] and overwrite [u] with the left singular vectors + call stdlib_wlacpy( 'U', nr, n, a, lda, u, ldu ) + if ( nr > 1 )call stdlib_wlaset( 'L', nr-1, nr-1, czero, czero, u(2,1), ldu ) + + ! .. the right singular vectors not computed, the nr left singular + ! vectors overwrite [u](1:nr,1:nr) + call stdlib_wgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of + ! r. these will be pre-multiplied by q to build the left singular + ! vectors of a. + end if + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. ( .not.wntuf ) ) then + call stdlib_wlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_wlaset( 'A',nr,n1-nr,czero,czero,u(1,nr+1), ldu ) + call stdlib_wlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not.wntuf )call stdlib_wunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & + cwork(n+1), lcwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_wlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! ....................................................................... + ! The Singular Values And The Right Singular Vectors Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_wgesvd to r**h + ! .. copy r**h into v and overwrite v with the left singular vectors + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_wlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + + ! .. the left singular vectors of r**h overwrite v, the right singular + ! vectors not computed + if ( wntvr .or. ( nr == n ) ) then + call stdlib_wgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, nr + v(p,p) = conjg(v(p,p)) + do q = p + 1, nr + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr + 1, n + v(p,q) = conjg(v(q,p)) + end do + end do + end if + call stdlib_wlapmt( .false., nr, n, v, ldv, iwork ) + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:n,1:nr) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the qr factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_wlaset('G', n, n-nr, czero, czero, v(1,nr+1), ldv) + call stdlib_wgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, n + v(p,p) = conjg(v(p,p)) + do q = p + 1, n + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + call stdlib_wlapmt( .false., n, n, v, ldv, iwork ) + end if + else + ! Aply Stdlib_Zgesvd To R + ! Copy R Into V And Overwrite V With The Right Singular Vectors + call stdlib_wlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_wlaset( 'L', nr-1, nr-1, czero, czero, v(2,1), ldv ) + + ! .. the right singular vectors overwrite v, the nr left singular + ! vectors stored in u(1:nr,1:nr) + if ( wntvr .or. ( nr == n ) ) then + call stdlib_wgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_wlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:nr,1:n) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the lq factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_wlaset('G', n-nr, n, czero,czero, v(nr+1,1), ldv) + call stdlib_wgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_wlapmt( .false., n, n, v, ldv, iwork ) + end if + ! .. now [v] contains the adjoint of the matrix of the right singular + ! vectors of a. + end if + else + ! ....................................................................... + ! Full Svd Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_wgesvd to r**h [[this option is left for r + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r**h into [v] and overwrite [v] with the left singular + ! vectors of r**h + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_wlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + + ! .. the left singular vectors of r**h overwrite [v], the nr right + ! singular vectors of r**h stored in [u](1:nr,1:nr) as conjugate + ! transposed + call stdlib_wgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + ! Assemble V + do p = 1, nr + v(p,p) = conjg(v(p,p)) + do q = p + 1, nr + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr+1, n + v(p,q) = conjg(v(q,p)) + end do + end do + end if + call stdlib_wlapmt( .false., nr, n, v, ldv, iwork ) + do p = 1, nr + u(p,p) = conjg(u(p,p)) + do q = p + 1, nr + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_wlaset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_wlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_wlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! .. copy r**h into [v] and overwrite [v] with the left singular + ! vectors of r**h + ! [[the optimal ratio n/nr for using qrf instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio*nr > n ) then + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_wlaset('U',nr-1,nr-1, czero,czero, v(1,2),ldv) + + call stdlib_wlaset('A',n,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_wgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, n + v(p,p) = conjg(v(p,p)) + do q = p + 1, n + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + call stdlib_wlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + do p = 1, n + u(p,p) = conjg(u(p,p)) + do q = p + 1, n + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_wlaset('A',m-n,n,czero,czero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_wlaset('A',n,n1-n,czero,czero,u(1,n+1),ldu) + call stdlib_wlaset('A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) + end if + end if + else + ! .. copy r**h into [u] and overwrite [u] with the right + ! singular vectors of r + do p = 1, nr + do q = p, n + u(q,nr+p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_wlaset('U',nr-1,nr-1,czero,czero,u(1,nr+2),ldu) + + call stdlib_wgeqrf( n, nr, u(1,nr+1), ldu, cwork(n+1),cwork(n+nr+1), & + lcwork-n-nr, ierr ) + do p = 1, nr + do q = 1, n + v(q,p) = conjg(u(p,nr+q)) + end do + end do + call stdlib_wlaset('U',nr-1,nr-1,czero,czero,v(1,2),ldv) + call stdlib_wgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, cwork(n+nr+& + 1),lcwork-n-nr,rwork, info ) + call stdlib_wlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_wlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_wlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + call stdlib_wunmqr('R','C', n, n, nr, u(1,nr+1), ldu,cwork(n+1),v,ldv,& + cwork(n+nr+1),lcwork-n-nr,ierr) + call stdlib_wlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_wlaset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_wlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_wlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) + end if + end if + end if + end if + else + ! .. apply stdlib_wgesvd to r [[this is the recommended option]] + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r into [v] and overwrite v with the right singular vectors + call stdlib_wlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_wlaset( 'L', nr-1,nr-1, czero,czero, v(2,1), ldv ) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_wgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_wlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_wlaset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_wlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_wlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! The Requested Number Of The Left Singular Vectors + ! is then n1 (n or m) + ! [[the optimal ratio n/nr for using lq instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio * nr > n ) then + call stdlib_wlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_wlaset('L', nr-1,nr-1, czero,czero, v(2,1),ldv) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_wlaset('A', n-nr,n, czero,czero, v(nr+1,1),ldv) + call stdlib_wgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_wlapmt( .false., n, n, v, ldv, iwork ) + ! .. now [v] contains the adjoint of the matrix of the right + ! singular vectors of a. the leading n left singular vectors + ! are in [u](1:n,1:n) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_wlaset('A',m-n,n,czero,czero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_wlaset('A',n,n1-n,czero,czero,u(1,n+1),ldu) + call stdlib_wlaset( 'A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) + end if + end if + else + call stdlib_wlacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu ) + if ( nr > 1 )call stdlib_wlaset('L',nr-1,nr-1,czero,czero,u(nr+2,1),ldu) + + call stdlib_wgelqf( nr, n, u(nr+1,1), ldu, cwork(n+1),cwork(n+nr+1), & + lcwork-n-nr, ierr ) + call stdlib_wlacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv) + if ( nr > 1 )call stdlib_wlaset('U',nr-1,nr-1,czero,czero,v(1,2),ldv) + + call stdlib_wgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, cwork(n+nr+& + 1), lcwork-n-nr, rwork, info ) + call stdlib_wlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_wlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_wlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + call stdlib_wunmlq('R','N',n,n,nr,u(nr+1,1),ldu,cwork(n+1),v, ldv, cwork(n+& + nr+1),lcwork-n-nr,ierr) + call stdlib_wlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_wlaset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_wlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_wlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + end if + end if + ! .. end of the "r**h or r" branch + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not. wntuf )call stdlib_wunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & + cwork(n+1), lcwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_wlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + ! ... end of the "full svd" branch + end if + ! check whether some singular values are returned as zeros, e.g. + ! due to underflow, and update the numerical rank. + p = nr + do q = p, 1, -1 + if ( s(q) > zero ) go to 4002 + nr = nr - 1 + end do + 4002 continue + ! .. if numerical rank deficiency is detected, the truncated + ! singular values are set to zero. + if ( nr < n ) call stdlib_qlaset( 'G', n-nr,1, zero,zero, s(nr+1), n ) + ! .. undo scaling; this may cause overflow in the largest singular + ! values. + if ( ascaled )call stdlib_qlascl( 'G',0,0, one,sqrt(real(m,KIND=qp)), nr,1, s, n, ierr & + ) + if ( conda ) rwork(1) = sconda + rwork(2) = p - nr + ! .. p-nr is the number of singular values that are computed as + ! exact zeros in stdlib_wgesvd() applied to the (possibly truncated) + ! full row rank triangular (trapezoidal) factor of a. + numrank = nr + return + end subroutine stdlib_wgesvdq + + !> ZGESVJ: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + pure subroutine stdlib_wgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, lwork, & + rwork, lrwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, lrwork, m, mv, n + character, intent(in) :: joba, jobu, jobv + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), v(ldv,*), cwork(lwork) + real(qp), intent(inout) :: rwork(lrwork) + real(qp), intent(out) :: sva(n) + ! ===================================================================== + ! Local Parameters + integer(ilp), parameter :: nsweep = 30 + + + + ! Local Scalars + complex(qp) :: aapq, ompq + real(qp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & + mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, temp1, & + theta, thsign, tol + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, goscale, lower, lquery, lsvec, noscale, rotok, rsvec, uctol, & + upper + ! Intrinsic Functions + intrinsic :: abs,max,min,conjg,real,sign,sqrt + ! from lapack + ! from lapack + ! Executable Statements + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + uctol = stdlib_lsame( jobu, 'C' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. stdlib_lsame( jobv, 'J' ) + applv = stdlib_lsame( jobv, 'A' ) + upper = stdlib_lsame( joba, 'U' ) + lower = stdlib_lsame( joba, 'L' ) + lquery = ( lwork == -1 ) .or. ( lrwork == -1 ) + if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then + info = -1 + else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -2 + else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -5 + else if( ldaj}|a(:,i)^* * a(:,j)| / (||a(:,i)||*||a(:,j)||) < ctol*eps + ! where eps is the round-off and ctol is defined as follows: + if( uctol ) then + ! ... user controlled + ctol = rwork( 1 ) + else + ! ... default + if( lsvec .or. rsvec .or. applv ) then + ctol = sqrt( real( m,KIND=qp) ) + else + ctol = real( m,KIND=qp) + end if + end if + ! ... and the machine dependent parameters are + ! [!] (make sure that stdlib_dlamch() works properly on the target machine.) + epsln = stdlib_qlamch( 'EPSILON' ) + rooteps = sqrt( epsln ) + sfmin = stdlib_qlamch( 'SAFEMINIMUM' ) + rootsfmin = sqrt( sfmin ) + small = sfmin / epsln + big = stdlib_qlamch( 'OVERFLOW' ) + ! big = one / sfmin + rootbig = one / rootsfmin + ! large = big / sqrt( real( m*n,KIND=qp) ) + bigtheta = one / rooteps + tol = ctol*epsln + roottol = sqrt( tol ) + if( real( m,KIND=qp)*epsln>=one ) then + info = -4 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + ! initialize the right singular vector matrix. + if( rsvec ) then + mvl = n + call stdlib_wlaset( 'A', mvl, n, czero, cone, v, ldv ) + else if( applv ) then + mvl = mv + end if + rsvec = rsvec .or. applv + ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) + ! (!) if necessary, scale a to protect the largest singular value + ! from overflow. it is possible that saving the largest singular + ! value destroys the information about the small ones. + ! this initial scaling is almost minimal in the sense that the + ! goal is to make sure that no column norm overflows, and that + ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries + ! in a are detected, the procedure returns with info=-6. + skl = one / sqrt( real( m,KIND=qp)*real( n,KIND=qp) ) + noscale = .true. + goscale = .true. + if( lower ) then + ! the input matrix is m-by-n lower triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_wlassq( m-p+1, a( p, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else if( upper ) then + ! the input matrix is m-by-n upper triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_wlassq( p, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else + ! the input matrix is m-by-n general dense + do p = 1, n + aapp = zero + aaqq = one + call stdlib_wlassq( m, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + end if + if( noscale )skl = one + ! move the smaller part of the spectrum from the underflow threshold + ! (!) start by determining the position of the nonzero entries of the + ! array sva() relative to ( sfmin, big ). + aapp = zero + aaqq = big + do p = 1, n + if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) + aapp = max( aapp, sva( p ) ) + end do + ! #:) quick return for zero matrix + if( aapp==zero ) then + if( lsvec )call stdlib_wlaset( 'G', m, n, czero, cone, a, lda ) + rwork( 1 ) = one + rwork( 2 ) = zero + rwork( 3 ) = zero + rwork( 4 ) = zero + rwork( 5 ) = zero + rwork( 6 ) = zero + return + end if + ! #:) quick return for one-column matrix + if( n==1 ) then + if( lsvec )call stdlib_wlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,a( 1, 1 ), lda, ierr ) + + rwork( 1 ) = one / skl + if( sva( 1 )>=sfmin ) then + rwork( 2 ) = one + else + rwork( 2 ) = zero + end if + rwork( 3 ) = zero + rwork( 4 ) = zero + rwork( 5 ) = zero + rwork( 6 ) = zero + return + end if + ! protect small singular values from underflow, and try to + ! avoid underflows/overflows in computing jacobi rotations. + sn = sqrt( sfmin / epsln ) + temp1 = sqrt( big / real( n,KIND=qp) ) + if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & + then + temp1 = min( big, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then + temp1 = min( sn / aaqq, big / (aapp*sqrt( real(n,KIND=qp)) ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = max( sn / aaqq, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=qp) )*aapp ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else + temp1 = one + end if + ! scale, if necessary + if( temp1/=one ) then + call stdlib_qlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr ) + end if + skl = temp1*skl + if( skl/=one ) then + call stdlib_wlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr ) + skl = one / skl + end if + ! row-cyclic jacobi svd algorithm with column pivoting + emptsw = ( n*( n-1 ) ) / 2 + notrot = 0 + do q = 1, n + cwork( q ) = cone + end do + swband = 3 + ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective + ! if stdlib_wgesvj is used as a computational routine in the preconditioned + ! jacobi svd algorithm stdlib_wgejsv. for sweeps i=1:swband the procedure + ! works on pivots inside a band-like region around the diagonal. + ! the boundaries are determined dynamically, based on the number of + ! pivots above a threshold. + kbl = min( 8, n ) + ! [tp] kbl is a tuning parameter that defines the tile size in the + ! tiling of the p-q loops of pivot pairs. in general, an optimal + ! value of kbl depends on the matrix dimensions and on the + ! parameters of the computer's memory. + nbl = n / kbl + if( ( nbl*kbl )/=n )nbl = nbl + 1 + blskip = kbl**2 + ! [tp] blkskip is a tuning parameter that depends on swband and kbl. + rowskip = min( 5, kbl ) + ! [tp] rowskip is a tuning parameter. + lkahead = 1 + ! [tp] lkahead is a tuning parameter. + ! quasi block transformations, using the lower (upper) triangular + ! structure of the input matrix. the quasi-block-cycling usually + ! invokes cubic convergence. big part of this cycle is done inside + ! canonical subspaces of dimensions less than m. + if( ( lower .or. upper ) .and. ( n>max( 64, 4*kbl ) ) ) then + ! [tp] the number of partition levels and the actual partition are + ! tuning parameters. + n4 = n / 4 + n2 = n / 2 + n34 = 3*n4 + if( applv ) then + q = 0 + else + q = 1 + end if + if( lower ) then + ! this works very well on lower triangular matrices, in particular + ! in the framework of the preconditioned jacobi svd (xgejsv). + ! the idea is simple: + ! [+ 0 0 0] note that jacobi transformations of [0 0] + ! [+ + 0 0] [0 0] + ! [+ + x 0] actually work on [x 0] [x 0] + ! [+ + x x] [x x]. [x x] + call stdlib_wgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,cwork( n34+1 ), & + sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2, cwork( n+1 ), & + lwork-n, ierr ) + call stdlib_wgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,cwork( n2+1 ), sva( & + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,cwork( n+1 ), lwork-n, & + ierr ) + call stdlib_wgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,cwork( n2+1 ), & + sva( n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), & + lwork-n, ierr ) + call stdlib_wgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,cwork( n4+1 ), sva( & + n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), lwork-n, & + ierr ) + call stdlib_wgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & + tol, 1, cwork( n+1 ), lwork-n,ierr ) + call stdlib_wgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, cwork( n+1 ),lwork-n, ierr ) + else if( upper ) then + call stdlib_wgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & + tol, 2, cwork( n+1 ), lwork-n,ierr ) + call stdlib_wgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, cwork( n4+1 ),sva( n4+1 ), & + mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,ierr ) + + call stdlib_wgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, cwork( n+1 ),lwork-n, ierr ) + call stdlib_wgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,cwork( n2+1 ), sva( n2+1 )& + , mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), lwork-n, ierr ) + + end if + end if + ! .. row-cyclic pivot strategy with de rijk's pivoting .. + loop_1993: do i = 1, nsweep + ! .. go go go ... + mxaapq = zero + mxsinj = zero + iswrot = 0 + notrot = 0 + pskipped = 0 + ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs + ! 1 <= p < q <= n. this is the first step toward a blocked implementation + ! of the rotations. new implementation, based on block transformations, + ! is under development. + loop_2000: do ibr = 1, nbl + igl = ( ibr-1 )*kbl + 1 + loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) + igl = igl + ir1*kbl + loop_2001: do p = igl, min( igl+kbl-1, n-1 ) + ! .. de rijk's pivoting + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + call stdlib_wswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_wswap( mvl, v( 1, p ), 1,v( 1, q ), 1 ) + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = cwork(p) + cwork(p) = cwork(q) + cwork(q) = aapq + end if + if( ir1==0 ) then + ! column norms are periodically updated by explicit + ! norm computation. + ! [!] caveat: + ! unfortunately, some blas implementations compute stdlib_qznrm2(m,a(1,p),1) + ! as sqrt(s=stdlib_zdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to + ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_qznrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_dcnrm2 is available, the if-then-else-end if + ! below should be replaced with "aapp = stdlib_qznrm2( m, a(1,p), 1 )". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + temp1 = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_wcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, cwork(n+1), & + lda, ierr ) + aapq = stdlib_wdotc( m, cwork(n+1), 1,a( 1, q ), 1 ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aapp ) / aaqq + else + call stdlib_wcopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_wdotc( m, a(1, p ), 1,cwork(n+1), 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg( cwork(p) ) * cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq1 + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if ( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if ( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + cwork(p) = -cwork(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_wcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp, one, m,1, cwork(n+1), & + lda,ierr ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + call stdlib_waxpy( m, -aapq, cwork(n+1), 1,a( 1, q ), 1 ) + + call stdlib_wlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qznrm2( m, a( 1, q ), 1 ) + else + t = zero + aaqq = one + call stdlib_wlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_wcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_wdotc( m, cwork(n+1), 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_wcopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_wdotc( m, a( 1, p ), 1,cwork(n+1), 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + cwork(p) = -cwork(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_wcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, cwork(n+1)& + ,lda,ierr ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_waxpy( m, -aapq, cwork(n+1),1, a( 1, q ), 1 ) + + call stdlib_wlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_wcopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one,m, 1, cwork(n+1)& + ,lda,ierr ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_waxpy( m, -conjg(aapq),cwork(n+1), 1, a( 1, & + p ), 1 ) + call stdlib_wlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qznrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_wlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_qznrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the singular values and find how many are above + ! the underflow threshold. + n2 = 0 + n4 = 0 + do p = 1, n - 1 + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + call stdlib_wswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_wswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + if( sva( p )/=zero ) then + n4 = n4 + 1 + if( sva( p )*skl>sfmin )n2 = n2 + 1 + end if + end do + if( sva( n )/=zero ) then + n4 = n4 + 1 + if( sva( n )*skl>sfmin )n2 = n2 + 1 + end if + ! normalize the left singular vectors. + if( lsvec .or. uctol ) then + do p = 1, n4 + ! call stdlib_wdscal( m, one / sva( p ), a( 1, p ), 1 ) + call stdlib_wlascl( 'G',0,0, sva(p), one, m, 1, a(1,p), m, ierr ) + end do + end if + ! scale the product of jacobi rotations. + if( rsvec ) then + do p = 1, n + temp1 = one / stdlib_qznrm2( mvl, v( 1, p ), 1 ) + call stdlib_wdscal( mvl, temp1, v( 1, p ), 1 ) + end do + end if + ! undo scaling, if necessary (and possible). + if( ( ( skl>one ) .and. ( sva( 1 )<( big / skl ) ) ).or. ( ( skl( sfmin / skl ) ) ) ) then + do p = 1, n + sva( p ) = skl*sva( p ) + end do + skl = one + end if + rwork( 1 ) = skl + ! the singular values of a are skl*sva(1:n). if skl/=one + ! then some of the singular values may overflow or underflow and + ! the spectrum is given in this factored representation. + rwork( 2 ) = real( n4,KIND=qp) + ! n4 is the number of computed nonzero singular values of a. + rwork( 3 ) = real( n2,KIND=qp) + ! n2 is the number of singular values of a greater than sfmin. + ! if n2 ZGESVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wgesvx( fact, trans, n, nrhs, a, lda, af, ldaf, ipiv,equed, r, c, b, ldb, & + x, ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(inout) :: c(*), r(*) + complex(qp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j + real(qp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -12 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + rpvgrw = stdlib_wlantr( 'M', 'U', 'N', info, info, af, ldaf,rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_wlange( 'M', n, info, a, lda, rwork ) /rpvgrw + end if + rwork( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_wlange( norm, n, n, a, lda, rwork ) + rpvgrw = stdlib_wlantr( 'M', 'U', 'N', n, n, af, ldaf, rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_wlange( 'M', n, n, a, lda, rwork ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_wgecon( norm, n, af, ldaf, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wgetrs( trans, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_wgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZGETC2: computes an LU factorization, using complete pivoting, of the + !> n-by-n matrix A. The factorization has the form A = P * L * U * Q, + !> where P and Q are permutation matrices, L is lower triangular with + !> unit diagonal elements and U is upper triangular. + !> This is a level 1 BLAS version of the algorithm. + + pure subroutine stdlib_wgetc2( n, a, lda, ipiv, jpiv, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*), jpiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ip, ipv, j, jp, jpv + real(qp) :: bignum, eps, smin, smlnum, xmax + ! Intrinsic Functions + intrinsic :: abs,cmplx,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + ! handle the case n=1 by itself + if( n==1 ) then + ipiv( 1 ) = 1 + jpiv( 1 ) = 1 + if( abs( a( 1, 1 ) )=xmax ) then + xmax = abs( a( ip, jp ) ) + ipv = ip + jpv = jp + end if + end do + end do + if( i==1 )smin = max( eps*xmax, smlnum ) + ! swap rows + if( ipv/=i )call stdlib_wswap( n, a( ipv, 1 ), lda, a( i, 1 ), lda ) + ipiv( i ) = ipv + ! swap columns + if( jpv/=i )call stdlib_wswap( n, a( 1, jpv ), 1, a( 1, i ), 1 ) + jpiv( i ) = jpv + ! check for singularity + if( abs( a( i, i ) ) ZGETF2: computes an LU factorization of a general m-by-n matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_wgetf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: sfmin + integer(ilp) :: i, j, jp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_wscal( m-j, cone / a( j, j ), a( j+1, j ), 1 ) + else + do i = 1, m-j + a( j+i, j ) = a( j+i, j ) / a( j, j ) + end do + end if + end if + else if( info==0 ) then + info = j + end if + if( j ZGETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_wgetrf( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_wgetrf2( m, n, a, lda, ipiv, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks and test for exact + ! singularity. + call stdlib_wgetrf2( m-j+1, jb, a( j, j ), lda, ipiv( j ), iinfo ) + ! adjust info and the pivot indices. + if( info==0 .and. iinfo>0 )info = iinfo + j - 1 + do i = j, min( m, j+jb-1 ) + ipiv( i ) = j - 1 + ipiv( i ) + end do + ! apply interchanges to columns 1:j-1. + call stdlib_wlaswp( j-1, a, lda, j, j+jb-1, ipiv, 1 ) + if( j+jb<=n ) then + ! apply interchanges to columns j+jb:n. + call stdlib_wlaswp( n-j-jb+1, a( 1, j+jb ), lda, j, j+jb-1,ipiv, 1 ) + ! compute block row of u. + call stdlib_wtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,& + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_wgetrf + + !> ZGETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + + pure recursive subroutine stdlib_wgetrf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(qp) :: sfmin + complex(qp) :: temp + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_wscal( m-1, cone / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 1, m-1 + a( 1+i, 1 ) = a( 1+i, 1 ) / a( 1, 1 ) + end do + end if + else + info = 1 + end if + else + ! use recursive code + n1 = min( m, n ) / 2 + n2 = n-n1 + ! [ a11 ] + ! factor [ --- ] + ! [ a21 ] + call stdlib_wgetrf2( m, n1, a, lda, ipiv, iinfo ) + if ( info==0 .and. iinfo>0 )info = iinfo + ! [ a12 ] + ! apply interchanges to [ --- ] + ! [ a22 ] + call stdlib_wlaswp( n2, a( 1, n1+1 ), lda, 1, n1, ipiv, 1 ) + ! solve a12 + call stdlib_wtrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1, n1+1 ), lda ) + + ! update a22 + call stdlib_wgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, cone, a( n1+1, n1+1 ), lda ) + ! factor a22 + call stdlib_wgetrf2( m-n1, n2, a( n1+1, n1+1 ), lda, ipiv( n1+1 ),iinfo ) + ! adjust info and the pivot indices + if ( info==0 .and. iinfo>0 )info = iinfo + n1 + do i = n1+1, min( m, n ) + ipiv( i ) = ipiv( i ) + n1 + end do + ! apply interchanges to a21 + call stdlib_wlaswp( n1, a( 1, 1 ), lda, n1+1, min( m, n), ipiv, 1 ) + end if + return + end subroutine stdlib_wgetrf2 + + !> ZGETRI: computes the inverse of a matrix using the LU factorization + !> computed by ZGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + + pure subroutine stdlib_wgetri( n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iws, j, jb, jj, jp, ldwork, lwkopt, nb, nbmin, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'ZGETRI', ' ', n, -1, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( lda 0 from stdlib_wtrtri, then u is singular, + ! and the inverse is not computed. + call stdlib_wtrtri( 'UPPER', 'NON-UNIT', n, a, lda, info ) + if( info>0 )return + nbmin = 2 + ldwork = n + if( nb>1 .and. nb=n ) then + ! use unblocked code. + do j = n, 1, -1 + ! copy current column of l to work and replace with zeros. + do i = j + 1, n + work( i ) = a( i, j ) + a( i, j ) = czero + end do + ! compute current column of inv(a). + if( j ZGETRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by ZGETRF. + + pure subroutine stdlib_wgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZGETSLS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_wgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, tran + integer(ilp) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & + lw2, wsizeo, wsizem, info2 + real(qp) :: anrm, bignum, bnrm, smlnum, dum(1) + complex(qp) :: tq(5), workq(1) + ! Intrinsic Functions + intrinsic :: real,max,min,int + ! Executable Statements + ! test the input arguments. + info = 0 + maxmn = max( m, n ) + tran = stdlib_lsame( trans, 'C' ) + lquery = ( lwork==-1 .or. lwork==-2 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + call stdlib_wgeqr( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_wgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_wgeqr( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_wgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + else + call stdlib_wgelq( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_wgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_wgelq( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_wgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + end if + if( ( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_wlaset( 'F', maxmn, nrhs, czero, czero, b, ldb ) + go to 50 + end if + brow = m + if ( tran ) then + brow = n + end if + bnrm = stdlib_wlange( 'M', brow, nrhs, b, ldb, dum ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_wlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if ( m>=n ) then + ! compute qr factorization of a + call stdlib_wgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + if ( .not.tran ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_wgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& + 1 ), lw2,info ) + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_wtrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = n + else + ! overdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_wtrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = czero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = czero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_wgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_wgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + ! workspace at least m, optimally m*nb. + if( .not.tran ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_wtrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = czero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_wgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_wgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_wtrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_wlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_wlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_wlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( tszo + lwo,KIND=qp) + return + end subroutine stdlib_wgetsls + + !> ZGETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a complex M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in ZGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of ZGEQRT for more details on the format. + + pure subroutine stdlib_wgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1 + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, & + num_all_row_blocks + ! Intrinsic Functions + intrinsic :: ceiling,real,cmplx,max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m ZGGBAK: forms the right or left eigenvectors of a complex generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> ZGGBAL. + + pure subroutine stdlib_wggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(in) :: lscale(*), rscale(*) + complex(qp), intent(inout) :: v(ldv,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, k + ! Intrinsic Functions + intrinsic :: max,int + ! Executable Statements + ! test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( n==0 .and. ihi==0 .and. ilo/=1 ) then + info = -4 + else if( n>0 .and. ( ihimax( 1, n ) ) )then + info = -5 + else if( n==0 .and. ilo==1 .and. ihi/=0 ) then + info = -5 + else if( m<0 ) then + info = -8 + else if( ldv ZGGBAL: balances a pair of general complex matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + + pure subroutine stdlib_wggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, n + ! Array Arguments + real(qp), intent(out) :: lscale(*), rscale(*), work(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sclfac = 1.0e+1_qp + + + + ! Local Scalars + integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, & + lrab, lsfmax, lsfmin, m, nr, nrp2 + real(qp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, & + pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc + complex(qp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,int,log10,max,min,sign + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldacmax )cmax = abs( cor ) + lscale( i ) = lscale( i ) + cor + cor = alpha*work( i ) + if( abs( cor )>cmax )cmax = abs( cor ) + rscale( i ) = rscale( i ) + cor + end do + if( cmax ZGGES: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> ZGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + + subroutine stdlib_wgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alpha, beta, & + vsl, ldvsl, vsr, ldvsr, work,lwork, rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_w) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, iright, irows, irwrk, & + itau, iwrk, lwkmin, lwkopt + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_wggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_wgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_wunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (complex workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_wlaset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_wlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_wungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_wlaset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_wgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + call stdlib_whgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 30 + end if + ! sort eigenvalues alpha/beta if desired + ! (workspace: none needed) + if( wantst ) then + ! undo scaling on eigenvalues before selecting + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + call stdlib_wtgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alpha,beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pvsl, pvsr,dif, work( iwrk ), lwork-iwrk+1, idum, 1, ierr ) + + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_wggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_wggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_wlascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_wlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_wlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_wlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 30 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_wgges + + !> ZGGES3: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> ZGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + + subroutine stdlib_wgges3( jobvsl, jobvsr, sort, selctg, n, a, lda, b,ldb, sdim, alpha, beta, & + vsl, ldvsl, vsr, ldvsr,work, lwork, rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_w) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, iright, irows, irwrk, & + itau, iwrk, lwkopt + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(qp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_wggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_wgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_wunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + if( ilvsl ) then + call stdlib_wlaset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_wlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_wungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_wlaset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + call stdlib_wgghd3( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + work( iwrk ), lwork+1-iwrk, ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + iwrk = itau + call stdlib_wlaqz0( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 30 + end if + ! sort eigenvalues alpha/beta if desired + if( wantst ) then + ! undo scaling on eigenvalues before selecting + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + call stdlib_wtgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alpha,beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pvsl, pvsr,dif, work( iwrk ), lwork-iwrk+1, idum, 1, ierr ) + + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + if( ilvsl )call stdlib_wggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_wggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_wlascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_wlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_wlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_wlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 30 continue + work( 1 ) = cmplx( lwkopt,KIND=qp) + return + end subroutine stdlib_wgges3 + + !> ZGGESX: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the complex Schur form (S,T), + !> and, optionally, the left and/or right matrices of Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T; computes + !> a reciprocal condition number for the average of the selected + !> eigenvalues (RCONDE); and computes a reciprocal condition number for + !> the right and left deflating subspaces corresponding to the selected + !> eigenvalues (RCONDV). The leading columns of VSL and VSR then form + !> an orthonormal basis for the corresponding left and right eigenspaces + !> (deflating subspaces). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if T is + !> upper triangular with non-negative diagonal and S is upper + !> triangular. + + subroutine stdlib_wggesx( jobvsl, jobvsr, sort, selctg, sense, n, a, lda,b, ldb, sdim, alpha,& + beta, vsl, ldvsl, vsr,ldvsr, rconde, rcondv, work, lwork, rwork,iwork, liwork, bwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, liwork, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rconde(2), rcondv(2), rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_w) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantsb, wantse, & + wantsn, wantst, wantsv + integer(ilp) :: i, icols, ierr, ihi, ijob, ijobvl, ijobvr, ileft, ilo, iright, irows, & + irwrk, itau, iwrk, liwmin, lwrk, maxwrk, minwrk + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pl, pr, smlnum + ! Local Arrays + real(qp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( wantsn ) then + ijob = 0 + else if( wantse ) then + ijob = 1 + else if( wantsv ) then + ijob = 2 + else if( wantsb ) then + ijob = 4 + end if + ! test the input arguments + info = 0 + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda0) then + minwrk = 2*n + maxwrk = n*(1 + stdlib_ilaenv( 1, 'ZGEQRF', ' ', n, 1, n, 0 ) ) + maxwrk = max( maxwrk, n*( 1 +stdlib_ilaenv( 1, 'ZUNMQR', ' ', n, 1, n, -1 ) ) ) + + if( ilvsl ) then + maxwrk = max( maxwrk, n*( 1 +stdlib_ilaenv( 1, 'ZUNGQR', ' ', n, 1, n, -1 ) ) & + ) + end if + lwrk = maxwrk + if( ijob>=1 )lwrk = max( lwrk, n*n/2 ) + else + minwrk = 1 + maxwrk = 1 + lwrk = 1 + end if + work( 1 ) = lwrk + if( wantsn .or. n==0 ) then + liwmin = 1 + else + liwmin = n + 2 + end if + iwork( 1 ) = liwmin + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_wggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_wgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the unitary transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_wunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (complex workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_wlaset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_wlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_wungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_wlaset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_wgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + call stdlib_whgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 40 + end if + ! sort eigenvalues alpha/beta and compute the reciprocal of + ! condition number(s) + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + ! reorder eigenvalues, transform generalized schur vectors, and + ! compute reciprocal condition numbers + ! (complex workspace: if ijob >= 1, need max(1, 2*sdim*(n-sdim)) + ! otherwise, need 1 ) + call stdlib_wtgsen( ijob, ilvsl, ilvsr, bwork, n, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pl, pr,dif, work( iwrk ), lwork-iwrk+1, iwork, liwork,ierr & + ) + if( ijob>=1 )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( ierr==-21 ) then + ! not enough complex workspace + info = -21 + else + if( ijob==1 .or. ijob==4 ) then + rconde( 1 ) = pl + rconde( 2 ) = pr + end if + if( ijob==2 .or. ijob==4 ) then + rcondv( 1 ) = dif( 1 ) + rcondv( 2 ) = dif( 2 ) + end if + if( ierr==1 )info = n + 3 + end if + end if + ! apply permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_wggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_wggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_wlascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_wlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_wlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_wlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 40 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwmin + return + end subroutine stdlib_wggesx + + !> ZGGEV: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_wggev( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, irwrk,& + itau, iwrk, jc, jr, lwkmin, lwkopt + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(qp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(qp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=qp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_wggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_wgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_wunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + ! (complex workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_wlaset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_wlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_wungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_wlaset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_wgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_wgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur form and schur vectors) + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_whgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 70 + end if + ! compute eigenvectors + ! (real workspace: need 2*n) + ! (complex workspace: need 2*n) + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_wtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), rwork( irwrk ),ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 70 + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_wggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), n, vl,& + ldvl, ierr ) + loop_30: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp ZGGEV3: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_wggev3( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, irwrk,& + itau, iwrk, jc, jr, lwkopt + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(qp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(qp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=qp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_wggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_wgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_wunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + if( ilvl ) then + call stdlib_wlaset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_wlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_wungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_wlaset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_wgghd3( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + work( iwrk ), lwork+1-iwrk, ierr ) + else + call stdlib_wgghd3( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur form and schur vectors) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_wlaqz0( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 70 + end if + ! compute eigenvectors + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_wtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), rwork( irwrk ),ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 70 + end if + ! undo balancing on vl and vr and normalization + if( ilvl ) then + call stdlib_wggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), n, vl,& + ldvl, ierr ) + loop_30: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp ZGGEVX: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B) the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> Optionally, it also computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for + !> the eigenvalues (RCONDE), and reciprocal condition numbers for the + !> right eigenvectors (RCONDV). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j) . + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B. + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_wggevx( balanc, jobvl, jobvr, sense, n, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, ilo, ihi,lscale, rscale, abnrm, bbnrm, rconde, rcondv,work, lwork, rwork, & + iwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + real(qp), intent(out) :: abnrm, bbnrm + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: lscale(*), rconde(*), rcondv(*), rscale(*), rwork(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery, noscl, wantsb, wantse, wantsn, & + wantsv + character :: chtemp + integer(ilp) :: i, icols, ierr, ijobvl, ijobvr, in, irows, itau, iwrk, iwrk1, j, jc, & + jr, m, maxwrk, minwrk + real(qp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(qp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(qp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=qp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + noscl = stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'P' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( .not.( noscl .or. stdlib_lsame( balanc,'S' ) .or.stdlib_lsame( balanc, 'B' ) ) ) & + then + info = -1 + else if( ijobvl<=0 ) then + info = -2 + else if( ijobvr<=0 ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsb .or. wantsv ) )then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_wlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_wlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_wlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute and/or balance the matrix pair (a,b) + ! (real workspace: need 6*n if balanc = 's' or 'b', 1 otherwise) + call stdlib_wggbal( balanc, n, a, lda, b, ldb, ilo, ihi, lscale, rscale,rwork, ierr ) + + ! compute abnrm and bbnrm + abnrm = stdlib_wlange( '1', n, n, a, lda, rwork( 1 ) ) + if( ilascl ) then + rwork( 1 ) = abnrm + call stdlib_qlascl( 'G', 0, 0, anrmto, anrm, 1, 1, rwork( 1 ), 1,ierr ) + abnrm = rwork( 1 ) + end if + bbnrm = stdlib_wlange( '1', n, n, b, ldb, rwork( 1 ) ) + if( ilbscl ) then + rwork( 1 ) = bbnrm + call stdlib_qlascl( 'G', 0, 0, bnrmto, bnrm, 1, 1, rwork( 1 ), 1,ierr ) + bbnrm = rwork( 1 ) + end if + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb ) + irows = ihi + 1 - ilo + if( ilv .or. .not.wantsn ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_wgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the unitary transformation to a + ! (complex workspace: need n, prefer n*nb) + call stdlib_wunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl and/or vr + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_wlaset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_wlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_wungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + if( ilvr )call stdlib_wlaset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv .or. .not.wantsn ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_wgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_wgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + if( ilv .or. .not.wantsn ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_whgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 90 + end if + ! compute eigenvectors and estimate condition numbers if desired + ! stdlib_wtgevc: (complex workspace: need 2*n ) + ! (real workspace: need 2*n ) + ! stdlib_wtgsna: (complex workspace: need 2*n*n if sense='v' or 'b') + ! (integer workspace: need n+2 ) + if( ilv .or. .not.wantsn ) then + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_wtgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, n,& + in, work( iwrk ), rwork,ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 90 + end if + end if + if( .not.wantsn ) then + ! compute eigenvectors (stdlib_wtgevc) and estimate condition + ! numbers (stdlib_wtgsna). note that the definition of the condition + ! number is not invariant under transformation (u,v) to + ! (q*u, z*v), where (u,v) are eigenvectors of the generalized + ! schur form (s,t), q and z are orthogonal matrices. in order + ! to avoid using extra 2*n*n workspace, we have to + ! re-calculate eigenvectors and estimate the condition numbers + ! one at a time. + do i = 1, n + do j = 1, n + bwork( j ) = .false. + end do + bwork( i ) = .true. + iwrk = n + 1 + iwrk1 = iwrk + n + if( wantse .or. wantsb ) then + call stdlib_wtgevc( 'B', 'S', bwork, n, a, lda, b, ldb,work( 1 ), n, work( & + iwrk ), n, 1, m,work( iwrk1 ), rwork, ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 90 + end if + end if + call stdlib_wtgsna( sense, 'S', bwork, n, a, lda, b, ldb,work( 1 ), n, work( & + iwrk ), n, rconde( i ),rcondv( i ), 1, m, work( iwrk1 ),lwork-iwrk1+1, iwork, & + ierr ) + end do + end if + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_wggbak( balanc, 'L', n, ilo, ihi, lscale, rscale, n, vl,ldvl, ierr ) + + loop_50: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp ZGGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + + pure subroutine stdlib_wggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), d(*) + complex(qp), intent(out) :: work(*), x(*), y(*) + ! =================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, lopt, lwkmin, lwkopt, nb, nb1, nb2, nb3, nb4, np + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + np = min( n, p ) + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -2 + else if( p<0 .or. pm ) then + call stdlib_wtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', n-m, 1,b( m+1, m+p-n+1 ), & + ldb, d( m+1 ), n-m, info ) + if( info>0 ) then + info = 1 + return + end if + call stdlib_wcopy( n-m, d( m+1 ), 1, y( m+p-n+1 ), 1 ) + end if + ! set y1 = 0 + do i = 1, m + p - n + y( i ) = czero + end do + ! update d1 = d1 - t12*y2 + call stdlib_wgemv( 'NO TRANSPOSE', m, n-m, -cone, b( 1, m+p-n+1 ), ldb,y( m+p-n+1 ), 1,& + cone, d, 1 ) + ! solve triangular system: r11*x = d1 + if( m>0 ) then + call stdlib_wtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', m, 1, a, lda,d, m, info ) + + if( info>0 ) then + info = 2 + return + end if + ! copy d to x + call stdlib_wcopy( m, d, 1, x, 1 ) + end if + ! backward transformation y = z**h *y + call stdlib_wunmrq( 'LEFT', 'CONJUGATE TRANSPOSE', p, 1, np,b( max( 1, n-p+1 ), 1 ), & + ldb, work( m+1 ), y,max( 1, p ), work( m+np+1 ), lwork-m-np, info ) + work( 1 ) = m + np + max( lopt, int( work( m+np+1 ),KIND=ilp) ) + return + end subroutine stdlib_wggglm + + !> ZGGHD3: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then ZGGHD3 reduces the original + !> problem to generalized Hessenberg form. + !> This is a blocked variant of CGGHRD, using matrix-matrix + !> multiplications for parts of the computation to enhance performance. + + pure subroutine stdlib_wgghd3( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: blk22, initq, initz, lquery, wantq, wantz + character :: compq2, compz2 + integer(ilp) :: cola, i, ierr, j, j0, jcol, jj, jrow, k, kacc22, len, lwkopt, n2nb, nb,& + nblst, nbmin, nh, nnb, nx, ppw, ppwo, pw, top, topq + real(qp) :: c + complex(qp) :: c1, c2, ctemp, s, s1, s2, temp, temp1, temp2, temp3 + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'ZGGHD3', ' ', n, ilo, ihi, -1 ) + lwkopt = max( 6*n*nb, 1 ) + work( 1 ) = cmplx( lwkopt,KIND=qp) + initq = stdlib_lsame( compq, 'I' ) + wantq = initq .or. stdlib_lsame( compq, 'V' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi1 )call stdlib_wlaset( 'LOWER', n-1, n-1, czero, czero, b(2, 1), ldb ) + ! quick return if possible + nh = ihi - ilo + 1 + if( nh<=1 ) then + work( 1 ) = cone + return + end if + ! determine the blocksize. + nbmin = stdlib_ilaenv( 2, 'ZGGHD3', ' ', n, ilo, ihi, -1 ) + if( nb>1 .and. nb=6*n*nbmin ) then + nb = lwork / ( 6*n ) + else + nb = 1 + end if + end if + end if + end if + if( nb=nh ) then + ! use unblocked code below + jcol = ilo + else + ! use blocked code + kacc22 = stdlib_ilaenv( 16, 'ZGGHD3', ' ', n, ilo, ihi, -1 ) + blk22 = kacc22==2 + do jcol = ilo, ihi-2, nb + nnb = min( nb, ihi-jcol-1 ) + ! initialize small unitary factors that will hold the + ! accumulated givens rotations in workspace. + ! n2nb denotes the number of 2*nnb-by-2*nnb factors + ! nblst denotes the (possibly smaller) order of the last + ! factor. + n2nb = ( ihi-jcol-1 ) / nnb - 1 + nblst = ihi - jcol - n2nb*nnb + call stdlib_wlaset( 'ALL', nblst, nblst, czero, cone, work, nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_wlaset( 'ALL', 2*nnb, 2*nnb, czero, cone,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! reduce columns jcol:jcol+nnb-1 of a to hessenberg form. + do j = jcol, jcol+nnb-1 + ! reduce jth column of a. store cosines and sines in jth + ! column of a and b, respectively. + do i = ihi, j+2, -1 + temp = a( i-1, j ) + call stdlib_wlartg( temp, a( i, j ), c, s, a( i-1, j ) ) + a( i, j ) = cmplx( c,KIND=qp) + b( i, j ) = s + end do + ! accumulate givens rotations into workspace array. + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + ctemp = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = ctemp*temp - s*work( jj ) + work( jj ) = conjg( s )*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + ctemp = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = ctemp*temp - s*work( jj ) + work( jj ) = conjg( s )*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + ! top denotes the number of top rows in a and b that will + ! not be updated during the next steps. + if( jcol<=2 ) then + top = 0 + else + top = jcol + end if + ! propagate transformations through b and replace stored + ! left sines/cosines by right sines/cosines. + do jj = n, j+1, -1 + ! update jjth column of b. + do i = min( jj+1, ihi ), j+2, -1 + ctemp = a( i, j ) + s = b( i, j ) + temp = b( i, jj ) + b( i, jj ) = ctemp*temp - conjg( s )*b( i-1, jj ) + b( i-1, jj ) = s*temp + ctemp*b( i-1, jj ) + end do + ! annihilate b( jj+1, jj ). + if( jj0 ) then + do i = jj, 1, -1 + c = real( a( j+1+i, j ),KIND=qp) + call stdlib_wrot( ihi-top, a( top+1, j+i+1 ), 1,a( top+1, j+i ), 1, c,-& + conjg( b( j+1+i, j ) ) ) + end do + end if + ! update (j+1)th column of a by transformations from left. + if ( j < jcol + nnb - 1 ) then + len = 1 + j - jcol + ! multiply with the trailing accumulated unitary + ! matrix, which takes the form + ! [ u11 u12 ] + ! u = [ ], + ! [ u21 u22 ] + ! where u21 is a len-by-len matrix and u12 is lower + ! triangular. + jrow = ihi - nblst + 1 + call stdlib_wgemv( 'CONJUGATE', nblst, len, cone, work,nblst, a( jrow, j+1 & + ), 1, czero,work( pw ), 1 ) + ppw = pw + len + do i = jrow, jrow+nblst-len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_wtrmv( 'LOWER', 'CONJUGATE', 'NON-UNIT',nblst-len, work( & + len*nblst + 1 ), nblst,work( pw+len ), 1 ) + call stdlib_wgemv( 'CONJUGATE', len, nblst-len, cone,work( (len+1)*nblst - & + len + 1 ), nblst,a( jrow+nblst-len, j+1 ), 1, cone,work( pw+len ), 1 ) + + ppw = pw + do i = jrow, jrow+nblst-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ! multiply with the other accumulated unitary + ! matrices, which take the form + ! [ u11 u12 0 ] + ! [ ] + ! u = [ u21 u22 0 ], + ! [ ] + ! [ 0 0 i ] + ! where i denotes the (nnb-len)-by-(nnb-len) identity + ! matrix, u21 is a len-by-len upper triangular matrix + ! and u12 is an nnb-by-nnb lower triangular matrix. + ppwo = 1 + nblst*nblst + j0 = jrow - nnb + do jrow = j0, jcol+1, -nnb + ppw = pw + len + do i = jrow, jrow+nnb-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + ppw = pw + do i = jrow+nnb, jrow+nnb+len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_wtrmv( 'UPPER', 'CONJUGATE', 'NON-UNIT', len,work( ppwo + & + nnb ), 2*nnb, work( pw ),1 ) + call stdlib_wtrmv( 'LOWER', 'CONJUGATE', 'NON-UNIT', nnb,work( ppwo + & + 2*len*nnb ),2*nnb, work( pw + len ), 1 ) + call stdlib_wgemv( 'CONJUGATE', nnb, len, cone,work( ppwo ), 2*nnb, a( & + jrow, j+1 ), 1,cone, work( pw ), 1 ) + call stdlib_wgemv( 'CONJUGATE', len, nnb, cone,work( ppwo + 2*len*nnb + & + nnb ), 2*nnb,a( jrow+nnb, j+1 ), 1, cone,work( pw+len ), 1 ) + ppw = pw + do i = jrow, jrow+len+nnb-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + ! apply accumulated unitary matrices to a. + cola = n - jcol - nnb + 1 + j = ihi - nblst + 1 + call stdlib_wgemm( 'CONJUGATE', 'NO TRANSPOSE', nblst,cola, nblst, cone, work, & + nblst,a( j, jcol+nnb ), lda, czero, work( pw ),nblst ) + call stdlib_wlacpy( 'ALL', nblst, cola, work( pw ), nblst,a( j, jcol+nnb ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of + ! [ u11 u12 ] + ! u = [ ] + ! [ u21 u22 ], + ! where all blocks are nnb-by-nnb, u21 is upper + ! triangular and u12 is lower triangular. + call stdlib_wunm22( 'LEFT', 'CONJUGATE', 2*nnb, cola, nnb,nnb, work( ppwo )& + , 2*nnb,a( j, jcol+nnb ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_wgemm( 'CONJUGATE', 'NO TRANSPOSE', 2*nnb,cola, 2*nnb, cone, & + work( ppwo ), 2*nnb,a( j, jcol+nnb ), lda, czero, work( pw ),2*nnb ) + + call stdlib_wlacpy( 'ALL', 2*nnb, cola, work( pw ), 2*nnb,a( j, jcol+nnb ),& + lda ) + end if + ppwo = ppwo + 4*nnb*nnb + end do + ! apply accumulated unitary matrices to q. + if( wantq ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, cone, q( & + topq, j ), ldq,work, nblst, czero, work( pw ), nh ) + call stdlib_wlacpy( 'ALL', nh, nblst, work( pw ), nh,q( topq, j ), ldq ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_wunm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,q( topq, j ), ldq, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, & + cone, q( topq, j ), ldq,work( ppwo ), 2*nnb, czero, work( pw ),nh ) + + call stdlib_wlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,q( topq, j ), ldq ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! accumulate right givens rotations if required. + if ( wantz .or. top>0 ) then + ! initialize small unitary factors that will hold the + ! accumulated givens rotations in workspace. + call stdlib_wlaset( 'ALL', nblst, nblst, czero, cone, work,nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_wlaset( 'ALL', 2*nnb, 2*nnb, czero, cone,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! accumulate givens rotations into workspace array. + do j = jcol, jcol+nnb-1 + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + ctemp = a( i, j ) + a( i, j ) = czero + s = b( i, j ) + b( i, j ) = czero + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = ctemp*temp -conjg( s )*work( jj ) + work( jj ) = s*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + ctemp = a( i, j ) + a( i, j ) = czero + s = b( i, j ) + b( i, j ) = czero + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = ctemp*temp -conjg( s )*work( jj ) + work( jj ) = s*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end do + else + call stdlib_wlaset( 'LOWER', ihi - jcol - 1, nnb, czero, czero,a( jcol + 2, & + jcol ), lda ) + call stdlib_wlaset( 'LOWER', ihi - jcol - 1, nnb, czero, czero,b( jcol + 2, & + jcol ), ldb ) + end if + ! apply accumulated unitary matrices to a and b. + if ( top>0 ) then + j = ihi - nblst + 1 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, cone, a( & + 1, j ), lda,work, nblst, czero, work( pw ), top ) + call stdlib_wlacpy( 'ALL', top, nblst, work( pw ), top,a( 1, j ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_wunm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,a( 1, j ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + cone, a( 1, j ), lda,work( ppwo ), 2*nnb, czero,work( pw ), top ) + + call stdlib_wlacpy( 'ALL', top, 2*nnb, work( pw ), top,a( 1, j ), lda ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + j = ihi - nblst + 1 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, cone, b( & + 1, j ), ldb,work, nblst, czero, work( pw ), top ) + call stdlib_wlacpy( 'ALL', top, nblst, work( pw ), top,b( 1, j ), ldb ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_wunm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,b( 1, j ), ldb, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + cone, b( 1, j ), ldb,work( ppwo ), 2*nnb, czero,work( pw ), top ) + + call stdlib_wlacpy( 'ALL', top, 2*nnb, work( pw ), top,b( 1, j ), ldb ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! apply accumulated unitary matrices to z. + if( wantz ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, cone, z( & + topq, j ), ldz,work, nblst, czero, work( pw ), nh ) + call stdlib_wlacpy( 'ALL', nh, nblst, work( pw ), nh,z( topq, j ), ldz ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_wunm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,z( topq, j ), ldz, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, & + cone, z( topq, j ), ldz,work( ppwo ), 2*nnb, czero, work( pw ),nh ) + + call stdlib_wlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,z( topq, j ), ldz ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + end if + ! use unblocked code to reduce the rest of the matrix + ! avoid re-initialization of modified q and z. + compq2 = compq + compz2 = compz + if ( jcol/=ilo ) then + if ( wantq )compq2 = 'V' + if ( wantz )compz2 = 'V' + end if + if ( jcol ZGGHRD: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then ZGGHRD reduces the original + !> problem to generalized Hessenberg form. + + pure subroutine stdlib_wgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilq, ilz + integer(ilp) :: icompq, icompz, jcol, jrow + real(qp) :: c + complex(qp) :: ctemp, s + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! decode compq + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + ! decode compz + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! test the input parameters. + info = 0 + if( icompq<=0 ) then + info = -1 + else if( icompz<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi ZGGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + + pure subroutine stdlib_wgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), c(*), d(*) + complex(qp), intent(out) :: work(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkmin, lwkopt, mn, nb, nb1, nb2, nb3, nb4, nr + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( p<0 .or. p>n .or. p0 ) then + call stdlib_wtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', p, 1,b( 1, n-p+1 ), ldb, d,& + p, info ) + if( info>0 ) then + info = 1 + return + end if + ! put the solution in x + call stdlib_wcopy( p, d, 1, x( n-p+1 ), 1 ) + ! update c1 + call stdlib_wgemv( 'NO TRANSPOSE', n-p, p, -cone, a( 1, n-p+1 ), lda,d, 1, cone, c, & + 1 ) + end if + ! solve r11*x1 = c1 for x1 + if( n>p ) then + call stdlib_wtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n-p, 1,a, lda, c, n-p, & + info ) + if( info>0 ) then + info = 2 + return + end if + ! put the solutions in x + call stdlib_wcopy( n-p, c, 1, x, 1 ) + end if + ! compute the residual vector: + if( m0 )call stdlib_wgemv( 'NO TRANSPOSE', nr, n-m, -cone, a( n-p+1, m+1 ),lda, d(& + nr+1 ), 1, cone, c( n-p+1 ), 1 ) + else + nr = p + end if + if( nr>0 ) then + call stdlib_wtrmv( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', nr,a( n-p+1, n-p+1 ), lda, & + d, 1 ) + call stdlib_waxpy( nr, -cone, d, 1, c( n-p+1 ), 1 ) + end if + ! backward transformation x = q**h*x + call stdlib_wunmrq( 'LEFT', 'CONJUGATE TRANSPOSE', n, 1, p, b, ldb,work( 1 ), x, n, & + work( p+mn+1 ), lwork-p-mn, info ) + work( 1 ) = p + mn + max( lopt, int( work( p+mn+1 ),KIND=ilp) ) + return + end subroutine stdlib_wgglse + + !> ZGGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, + !> and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**H * (inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the + !> conjugate transpose of matrix Z. + + pure subroutine stdlib_wggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'ZGEQRF', ' ', n, m, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'ZGERQF', ' ', n, p, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'ZUNMQR', ' ', n, m, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( p<0 ) then + info = -3 + else if( lda ZGGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**H + !> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the + !> conjugate transpose of the matrix Z. + + pure subroutine stdlib_wggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'ZGERQF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'ZGEQRF', ' ', p, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'ZUNMRQ', ' ', m, n, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( p<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda ZGSVJ0: is called from ZGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + + pure subroutine stdlib_wgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep + real(qp), intent(in) :: eps, sfmin, tol + character, intent(in) :: jobv + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), d(n), v(ldv,*) + complex(qp), intent(out) :: work(lwork) + real(qp), intent(inout) :: sva(n) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: aapq, ompq + real(qp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Intrinsic Functions + intrinsic :: abs,max,conjg,real,min,sign,sqrt + ! from lapack + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( lda sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_qznrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_qznrm2 is available, the if-then-else-end if + ! below should be replaced with "aapp = stdlib_qznrm2( m, a(1,p), 1 )". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + temp1 = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_wcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, work, lda, & + ierr ) + aapq = stdlib_wdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aapp ) / aaqq + else + call stdlib_wcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_wdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg( cwork(p) ) * cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq1 + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if ( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if ( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_wcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp, one, m,1, work, lda,& + ierr ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + call stdlib_waxpy( m, -aapq, work, 1,a( 1, q ), 1 ) + call stdlib_wlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qznrm2( m, a( 1, q ), 1 ) + else + t = zero + aaqq = one + call stdlib_wlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_wcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_wdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_wcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_wdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_wcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, work,lda,& + ierr ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_waxpy( m, -aapq, work,1, a( 1, q ), 1 ) + + call stdlib_wlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_wcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one,m, 1, work,lda,& + ierr ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_waxpy( m, -conjg(aapq),work, 1, a( 1, p ), 1 & + ) + call stdlib_wlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qznrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_wlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_qznrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector sva() of column norms. + do p = 1, n - 1 + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = d( p ) + d( p ) = d( q ) + d( q ) = aapq + call stdlib_wswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_wswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_wgsvj0 + + !> ZGSVJ1: is called from ZGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + + pure subroutine stdlib_wgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: eps, sfmin, tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep + character, intent(in) :: jobv + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), d(n), v(ldv,*) + complex(qp), intent(out) :: work(lwork) + real(qp), intent(inout) :: sva(n) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: aapq, ompq + real(qp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & + notrot, nblc, nblr, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real,min,sign,sqrt + ! From Lapack + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( n1<0 ) then + info = -4 + else if( ldazero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_wcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_wdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_wdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_wcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_wdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_wrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_wrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_wcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, work,lda,& + ierr ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_waxpy( m, -aapq, work,1, a( 1, q ), 1 ) + + call stdlib_wlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_wcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_wlascl( 'G', 0, 0, aaqq, one,m, 1, work,lda,& + ierr ) + call stdlib_wlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_waxpy( m, -conjg(aapq),work, 1, a( 1, p ), 1 & + ) + call stdlib_wlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_qznrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_wlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_qznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_qznrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_wlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector sva() of column norms. + do p = 1, n - 1 + q = stdlib_iqamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = d( p ) + d( p ) = d( q ) + d( q ) = aapq + call stdlib_wswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_wswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_wgsvj1 + + !> ZGTCON: estimates the reciprocal of the condition number of a complex + !> tridiagonal matrix A using the LU factorization as computed by + !> ZGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: d(*), dl(*), du(*), du2(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + integer(ilp) :: i, kase, kase1 + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: cmplx + ! Executable Statements + ! test the input arguments. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm ZGTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_wgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, x, & + ldx, ferr, berr, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: b(ldb,*), d(*), df(*), dl(*), dlf(*), du(*), du2(*), duf(*) + + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wgttrs( trans, n, 1, dlf, df, duf, du2, ipiv, work, n,info ) + call stdlib_waxpy( n, cmplx( one,KIND=qp), work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 70 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_wgttrs( transt, n, 1, dlf, df, duf, du2, ipiv, work,n, info ) + + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wgttrs( transn, n, 1, dlf, df, duf, du2, ipiv, work,n, info ) + + end if + go to 70 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_110 + return + end subroutine stdlib_wgtrfs + + !> ZGTSV: solves the equation + !> A*X = B, + !> where A is an N-by-N tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T *X = B may be solved by interchanging the + !> order of the arguments DU and DL. + + pure subroutine stdlib_wgtsv( n, nrhs, dl, d, du, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(qp), intent(inout) :: b(ldb,*), d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k + complex(qp) :: mult, temp, zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb=cabs1( dl( k ) ) ) then + ! no row interchange required + mult = dl( k ) / d( k ) + d( k+1 ) = d( k+1 ) - mult*du( k ) + do j = 1, nrhs + b( k+1, j ) = b( k+1, j ) - mult*b( k, j ) + end do + if( k<( n-1 ) )dl( k ) = czero + else + ! interchange rows k and k+1 + mult = d( k ) / dl( k ) + d( k ) = dl( k ) + temp = d( k+1 ) + d( k+1 ) = du( k ) - mult*temp + if( k<( n-1 ) ) then + dl( k ) = du( k+1 ) + du( k+1 ) = -mult*dl( k ) + end if + du( k ) = temp + do j = 1, nrhs + temp = b( k, j ) + b( k, j ) = b( k+1, j ) + b( k+1, j ) = temp - mult*b( k+1, j ) + end do + end if + end do loop_30 + if( d( n )==czero ) then + info = n + return + end if + ! back solve with the matrix u from the factorization. + do j = 1, nrhs + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) / d( n-1 ) + do k = n - 2, 1, -1 + b( k, j ) = ( b( k, j )-du( k )*b( k+1, j )-dl( k )*b( k+2, j ) ) / d( k ) + + end do + end do + return + end subroutine stdlib_wgtsv + + !> ZGTSVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_wgtsvx( fact, trans, n, nrhs, dl, d, du, dlf, df, duf,du2, ipiv, b, & + ldb, x, ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: b(ldb,*), d(*), dl(*), du(*) + complex(qp), intent(inout) :: df(*), dlf(*), du2(*), duf(*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact, notran + character :: norm + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + notran = stdlib_lsame( trans, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb1 ) then + call stdlib_wcopy( n-1, dl, 1, dlf, 1 ) + call stdlib_wcopy( n-1, du, 1, duf, 1 ) + end if + call stdlib_wgttrf( n, dlf, df, duf, du2, ipiv, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_wlangt( norm, n, dl, d, du ) + ! compute the reciprocal of the condition number of a. + call stdlib_wgtcon( norm, n, dlf, df, duf, du2, ipiv, anorm, rcond, work,info ) + ! compute the solution vectors x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wgttrs( trans, n, nrhs, dlf, df, duf, du2, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_wgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2, ipiv,b, ldb, x, ldx, & + ferr, berr, work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZGTTRF: computes an LU factorization of a complex tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + + pure subroutine stdlib_wgttrf( n, dl, d, du, du2, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: d(*), dl(*), du(*) + complex(qp), intent(out) :: du2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(qp) :: fact, temp, zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'ZGTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! initialize ipiv(i) = i and du2(i) = 0 + do i = 1, n + ipiv( i ) = i + end do + do i = 1, n - 2 + du2( i ) = zero + end do + do i = 1, n - 2 + if( cabs1( d( i ) )>=cabs1( dl( i ) ) ) then + ! no row interchange required, eliminate dl(i) + if( cabs1( d( i ) )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + ! interchange rows i and i+1, eliminate dl(i) + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + du2( i ) = du( i+1 ) + du( i+1 ) = -fact*du( i+1 ) + ipiv( i ) = i + 1 + end if + end do + if( n>1 ) then + i = n - 1 + if( cabs1( d( i ) )>=cabs1( dl( i ) ) ) then + if( cabs1( d( i ) )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + ipiv( i ) = i + 1 + end if + end if + ! check for a zero on the diagonal of u. + do i = 1, n + if( cabs1( d( i ) )==zero ) then + info = i + go to 50 + end if + end do + 50 continue + return + end subroutine stdlib_wgttrf + + !> ZGTTRS: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by ZGTTRF. + + pure subroutine stdlib_wgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: itrans, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + notran = ( trans=='N' .or. trans=='N' ) + if( .not.notran .and. .not.( trans=='T' .or. trans=='T' ) .and. .not.( trans=='C' .or. & + trans=='C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_wgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_wgtts2( itrans, n, jb, dl, d, du, du2, ipiv, b( 1, j ),ldb ) + end do + end if + end subroutine stdlib_wgttrs + + !> ZGTTS2: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by ZGTTRF. + + pure subroutine stdlib_wgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: itrans, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + complex(qp) :: temp + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( n==0 .or. nrhs==0 )return + if( itrans==0 ) then + ! solve a*x = b using the lu factorization of a, + ! overwriting each right hand side vector with its solution. + if( nrhs<=1 ) then + j = 1 + 10 continue + ! solve l*x = b. + do i = 1, n - 1 + if( ipiv( i )==i ) then + b( i+1, j ) = b( i+1, j ) - dl( i )*b( i, j ) + else + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - dl( i )*b( i, j ) + end if + end do + ! solve u*x = b. + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + else if( itrans==1 ) then + ! solve a**t * x = b. + if( nrhs<=1 ) then + j = 1 + 70 continue + ! solve u**t * x = b. + b( 1, j ) = b( 1, j ) / d( 1 ) + if( n>1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d( i & + ) + end do + ! solve l**t * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + if( j1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d(& + i ) + end do + ! solve l**t * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + end do + end if + else + ! solve a**h * x = b. + if( nrhs<=1 ) then + j = 1 + 130 continue + ! solve u**h * x = b. + b( 1, j ) = b( 1, j ) / conjg( d( 1 ) ) + if( n>1 )b( 2, j ) = ( b( 2, j )-conjg( du( 1 ) )*b( 1, j ) ) /conjg( d( 2 ) ) + + do i = 3, n + b( i, j ) = ( b( i, j )-conjg( du( i-1 ) )*b( i-1, j )-conjg( du2( i-2 ) )*b( & + i-2, j ) ) /conjg( d( i ) ) + end do + ! solve l**h * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - conjg( dl( i ) )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - conjg( dl( i ) )*temp + b( i, j ) = temp + end if + end do + if( j1 )b( 2, j ) = ( b( 2, j )-conjg( du( 1 ) )*b( 1, j ) )/ conjg( d( 2 ) ) + + do i = 3, n + b( i, j ) = ( b( i, j )-conjg( du( i-1 ) )*b( i-1, j )-conjg( du2( i-2 ) )& + *b( i-2, j ) ) / conjg( d( i ) ) + end do + ! solve l**h * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - conjg( dl( i ) )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - conjg( dl( i ) )*temp + b( i, j ) = temp + end if + end do + end do + end if + end if + end subroutine stdlib_wgtts2 + + !> ZHB2ST_KERNELS: is an internal routine used by the ZHETRD_HB2ST + !> subroutine. + + pure subroutine stdlib_whb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, lda, & + v, tau, ldvt, work) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype, st, ed, sweep, n, nb, ib, lda, ldvt + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: v(*), tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j1, j2, lm, ln, vpos, taupos, dpos, ofdpos, ajeter + complex(qp) :: ctmp + ! Intrinsic Functions + intrinsic :: conjg,mod + ! Executable Statements + ajeter = ib + ldvt + upper = stdlib_lsame( uplo, 'U' ) + if( upper ) then + dpos = 2 * nb + 1 + ofdpos = 2 * nb + else + dpos = 1 + ofdpos = 2 + endif + ! upper case + if( upper ) then + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + else + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + endif + if( ttype==1 ) then + lm = ed - st + 1 + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = conjg( a( ofdpos-i, st+i ) ) + a( ofdpos-i, st+i ) = czero + end do + ctmp = conjg( a( ofdpos, st ) ) + call stdlib_wlarfg( lm, ctmp, v( vpos+1 ), 1,tau( taupos ) ) + a( ofdpos, st ) = ctmp + lm = ed - st + 1 + call stdlib_wlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==3 ) then + lm = ed - st + 1 + call stdlib_wlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==2 ) then + j1 = ed+1 + j2 = min( ed+nb, n ) + ln = ed-st+1 + lm = j2-j1+1 + if( lm>0) then + call stdlib_wlarfx( 'LEFT', ln, lm, v( vpos ),conjg( tau( taupos ) ),a( & + dpos-nb, j1 ), lda-1, work) + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + else + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + endif + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) =conjg( a( dpos-nb-i, j1+i ) ) + a( dpos-nb-i, j1+i ) = czero + end do + ctmp = conjg( a( dpos-nb, j1 ) ) + call stdlib_wlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) ) + a( dpos-nb, j1 ) = ctmp + call stdlib_wlarfx( 'RIGHT', ln-1, lm, v( vpos ),tau( taupos ),a( dpos-nb+& + 1, j1 ), lda-1, work) + endif + endif + ! lower case + else + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + else + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + endif + if( ttype==1 ) then + lm = ed - st + 1 + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = a( ofdpos+i, st-1 ) + a( ofdpos+i, st-1 ) = czero + end do + call stdlib_wlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,tau( taupos ) ) + + lm = ed - st + 1 + call stdlib_wlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==3 ) then + lm = ed - st + 1 + call stdlib_wlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==2 ) then + j1 = ed+1 + j2 = min( ed+nb, n ) + ln = ed-st+1 + lm = j2-j1+1 + if( lm>0) then + call stdlib_wlarfx( 'RIGHT', lm, ln, v( vpos ),tau( taupos ), a( dpos+nb, & + st ),lda-1, work) + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + else + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + endif + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = a( dpos+nb+i, st ) + a( dpos+nb+i, st ) = czero + end do + call stdlib_wlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,tau( taupos ) ) + + call stdlib_wlarfx( 'LEFT', lm, ln-1, v( vpos ),conjg( tau( taupos ) ),a( & + dpos+nb-1, st+1 ), lda-1, work) + endif + endif + endif + return + end subroutine stdlib_whb2st_kernels + + !> ZHBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. + + subroutine stdlib_whbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ab(ldab,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, iscale + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_wlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_wlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_whbtrd to reduce hermitian band matrix to tridiagonal form. + inde = 1 + call stdlib_whbtrd( jobz, uplo, n, kd, ab, ldab, w, rwork( inde ), z,ldz, work, iinfo ) + + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, call stdlib_wsteqr. + if( .not.wantz ) then + call stdlib_qsterf( n, w, rwork( inde ), info ) + else + indrwk = inde + n + call stdlib_wsteqr( jobz, n, w, rwork( inde ), z, ldz,rwork( indrwk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_whbev + + !> ZHBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_whbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, rwork, lrwork, & + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ab(ldab,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indwk2, indwrk, iscale, liwmin, llrwk, llwk2, & + lrwmin, lwmin + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 .or. lrwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1 + lrwmin = 1 + liwmin = 1 + else + if( wantz ) then + lwmin = 2*n**2 + lrwmin = 1 + 5*n + 2*n**2 + liwmin = 3 + 5*n + else + lwmin = n + lrwmin = n + liwmin = 1 + end if + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_wlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_wlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_whbtrd to reduce hermitian band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + indwk2 = 1 + n*n + llwk2 = lwork - indwk2 + 1 + llrwk = lrwork - indwrk + 1 + call stdlib_whbtrd( jobz, uplo, n, kd, ab, ldab, w, rwork( inde ), z,ldz, work, iinfo ) + + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, call stdlib_wstedc. + if( .not.wantz ) then + call stdlib_qsterf( n, w, rwork( inde ), info ) + else + call stdlib_wstedc( 'I', n, w, rwork( inde ), work, n, work( indwk2 ),llwk2, rwork( & + indwrk ), llrwk, iwork, liwork,info ) + call stdlib_wgemm( 'N', 'N', n, n, n, cone, z, ldz, work, n, czero,work( indwk2 ), & + n ) + call stdlib_wlacpy( 'A', n, n, work( indwk2 ), n, z, ldz ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_whbevd + + !> ZHBEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian band matrix A. Eigenvalues and eigenvectors + !> can be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_whbevx( jobz, range, uplo, n, kd, ab, ldab, q, ldq, vl,vu, il, iu, abstol, & + m, w, z, ldz, work, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, kd, ldab, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ab(ldab,*) + complex(qp), intent(out) :: q(ldq,*), work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indwrk, iscale, itmp1, j, jj, nsplit + real(qp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + complex(qp) :: ctmp1 + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=tmp1 ) )m = 0 + end if + if( m==1 ) then + w( 1 ) = real( ctmp1,KIND=qp) + if( wantz )z( 1, 1 ) = cone + end if + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + anrm = stdlib_wlanhb( 'M', uplo, n, kd, ab, ldab, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_wlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_wlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_whbtrd to reduce hermitian band matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indwrk = 1 + call stdlib_whbtrd( jobz, uplo, n, kd, ab, ldab, rwork( indd ),rwork( inde ), q, ldq, & + work( indwrk ), iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_qsterf or stdlib_wsteqr. if this fails for some + ! eigenvalue, then try stdlib_qstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_qcopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_qcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_qsterf( n, w, rwork( indee ), info ) + else + call stdlib_wlacpy( 'A', n, n, q, ldq, z, ldz ) + call stdlib_qcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_wsteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_wstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_wstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_wstein. + do j = 1, m + call stdlib_wcopy( n, z( 1, j ), 1, work( 1 ), 1 ) + call stdlib_wgemv( 'N', n, n, cone, q, ldq, work, 1, czero,z( 1, j ), 1 ) + end do + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHBGST: reduces a complex Hermitian-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**H*S by ZPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**H*A*X, where + !> X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the + !> bandwidth of A. + + pure subroutine stdlib_whbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, rwork,& + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldx, n + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: ab(ldab,*) + complex(qp), intent(in) :: bb(ldbb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: update, upper, wantx + integer(ilp) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, & + nrt, nx + real(qp) :: bii + complex(qp) :: ra, ra1, t + ! Intrinsic Functions + intrinsic :: real,conjg,max,min + ! Executable Statements + ! test the input parameters + wantx = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + ka1 = ka + 1 + kb1 = kb + 1 + info = 0 + if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldabn-1 )go to 480 + end if + if( upper ) then + ! transform a, working with the upper triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( kb1, i ),KIND=qp) + ab( ka1, i ) = ( real( ab( ka1, i ),KIND=qp) / bii ) / bii + do j = i + 1, i1 + ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii + end do + do j = max( 1, i-ka ), i - 1 + ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*conjg( ab( k-i+ka1, & + i ) ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1, i ) +real( ab( ka1, i ),& + KIND=qp)*bb( j-i+kb1, i )*conjg( bb( k-i+kb1, i ) ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1,& + i ) + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j ) + + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_wdscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_wgerc( n-m, kbt, -cone, x( m+1, i ), 1,bb( kb1-kbt, i )& + , 1, x( m+1, i-kbt ),ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+ka1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_130: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i,i-k+ka+1) + call stdlib_wlartg( ab( k+1, i-k+ka ), ra1,rwork( i-k+ka-m ), work( i-k+ka-& + m ), ra ) + ! create nonzero element a(i-k,i-k+ka+1) outside the + ! band and store it in work(i-k) + t = -bb( kb1-k, i )*ra1 + work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( 1, i-k+ka & + ) + ab( 1, i-k+ka ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( 1, i-k+ka ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( 1, j+1 ) + ab( 1, j+1 ) = rwork( j-m )*ab( 1, j+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_wlargv( nrt, ab( 1, j2t ), inca, work( j2t-m ), ka1,rwork(& + j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, & + rwork( j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + rwork( j2-m ),work( j2-m ), ka1 ) + call stdlib_wlacgv( nr, work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_wrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j-m ), & + conjg( work( j-m ) ) ) + end do + end if + end do loop_130 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kb1-kbt, i )*ra1 + end if + end if + loop_170: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), & + inca, rwork( j2-ka ),work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + rwork( j ) = rwork( j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( 1, j+1 ) + ab( 1, j+1 ) = rwork( j )*ab( 1, j+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_wlargv( nr, ab( 1, j2 ), inca, work( j2 ), ka1,rwork( j2 ), ka1 ) + + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, & + rwork( j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + rwork( j2 ),work( j2 ), ka1 ) + call stdlib_wlacgv( nr, work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_wrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j ), conjg( & + work( j ) ) ) + end do + end if + end do loop_210 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, j2 + ka, -1 + rwork( j-m ) = rwork( j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( 1, i ),KIND=qp) + ab( 1, i ) = ( real( ab( 1, i ),KIND=qp) / bii ) / bii + do j = i + 1, i1 + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do j = max( 1, i-ka ), i - 1 + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*conjg( ab( i-k+1,k ) ) - & + conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + real( ab( 1, i ),KIND=qp)*bb( i-j+& + 1, j )*conjg( bb( i-k+1,k ) ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( k-j+1, j ) = ab( k-j+1, j ) -conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_wdscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_wgeru( n-m, kbt, -cone, x( m+1, i ), 1,bb( kbt+1, i-& + kbt ), ldbb-1,x( m+1, i-kbt ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_360: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i-k+ka+1,i) + call stdlib_wlartg( ab( ka1-k, i ), ra1, rwork( i-k+ka-m ),work( i-k+ka-m )& + , ra ) + ! create nonzero element a(i-k+ka+1,i-k) outside the + ! band and store it in work(i-k) + t = -bb( k+1, i-k )*ra1 + work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( ka1, i-k ) + + ab( ka1, i-k ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( ka1, i-k ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = rwork( j-m )*ab( ka1, j-ka+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_wlargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, & + rwork( j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(& + j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, rwork( & + j2-m ), work( j2-m ), ka1 ) + call stdlib_wlacgv( nr, work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_wrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j-m ), work(& + j-m ) ) + end do + end if + end do loop_360 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1 + end if + end if + loop_400: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-& + ka+1 ), inca,rwork( j2-ka ), work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + rwork( j ) = rwork( j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = rwork( j )*ab( ka1, j-ka+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_wlargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,rwork( j2 ), & + ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(& + j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, rwork( & + j2 ), work( j2 ), ka1 ) + call stdlib_wlacgv( nr, work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_wrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j ), work( & + j ) ) + end do + end if + end do loop_440 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, j2 + ka, -1 + rwork( j-m ) = rwork( j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + end if + go to 10 + 480 continue + ! **************************** phase 2 ***************************** + ! the logical structure of this phase is: + ! update = .true. + ! do i = 1, m + ! use s(i) to update a and create a new bulge + ! apply rotations to push all bulges ka positions upward + ! end do + ! update = .false. + ! do i = m - ka - 1, 2, -1 + ! apply rotations to push all bulges ka positions upward + ! end do + ! to avoid duplicating code, the two loops are merged. + update = .true. + i = 0 + 490 continue + if( update ) then + i = i + 1 + kbt = min( kb, m-i ) + i0 = i + 1 + i1 = max( 1, i-ka ) + i2 = i + kbt - ka1 + if( i>m ) then + update = .false. + i = i - 1 + i0 = m + 1 + if( ka==0 )return + go to 490 + end if + else + i = i - ka + if( i<2 )return + end if + if( i0 )call stdlib_wgeru( nx, kbt, -cone, x( 1, i ), 1,bb( kb, i+1 ), & + ldbb-1, x( 1, i+1 ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+ka1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_610: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_wlargv( nrt, ab( 1, j1+ka ), inca, work( j1 ), ka1,rwork( & + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + rwork( j1 ),work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + rwork( j1 ), work( j1 ),ka1 ) + call stdlib_wlacgv( nr, work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( j1t ),work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_wrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( j ), work( j ) ) + + end do + end if + end do loop_610 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1 + end if + end if + loop_650: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),& + inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + rwork( m-kb+j ) = rwork( m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j-1,j+ka) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( 1, j+ka-1 ) + ab( 1, j+ka-1 ) = rwork( m-kb+j )*ab( 1, j+ka-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_650 + loop_690: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_wlargv( nr, ab( 1, j1+ka ), inca, work( m-kb+j1 ),ka1, rwork( m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 ) + call stdlib_wlacgv( nr, work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_wrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_690 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( j1t ),work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, i2 - ka + rwork( j ) = rwork( j+ka ) + work( j ) = work( j+ka ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( 1, i ),KIND=qp) + ab( 1, i ) = ( real( ab( 1, i ),KIND=qp) / bii ) / bii + do j = i1, i - 1 + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do j = i + 1, min( n, i+ka ) + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do k = i + 1, i + kbt + do j = k, i + kbt + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*conjg( ab( k-i+1,i ) ) - & + conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + real( ab( 1, i ),KIND=qp)*bb( j-i+& + 1, i )*conjg( bb( k-i+1,i ) ) + end do + do j = i + kbt + 1, min( n, i+ka ) + ab( j-k+1, k ) = ab( j-k+1, k ) -conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + + end do + end do + do j = i1, i + do k = i + 1, min( j+ka, i+kbt ) + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_wdscal( nx, one / bii, x( 1, i ), 1 ) + if( kbt>0 )call stdlib_wgerc( nx, kbt, -cone, x( 1, i ), 1, bb( 2, i ),1, x( & + 1, i+1 ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_840: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_wlargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,rwork( & + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( & + j1 ), work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, rwork(& + j1 ),work( j1 ), ka1 ) + call stdlib_wlacgv( nr, work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_wrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( j ), conjg( work(& + j ) ) ) + end do + end if + end do loop_840 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1 + end if + end if + loop_880: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, & + j1t+l-1 ), inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + rwork( m-kb+j ) = rwork( m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j+ka,j-1) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 ) + ab( ka1, j-1 ) = rwork( m-kb+j )*ab( ka1, j-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_880 + loop_920: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_wlargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, rwork( m-kb+& + j1 ), ka1 ) + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_wlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( & + m-kb+j1 ), work( m-kb+j1 ),ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_wlar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, rwork(& + m-kb+j1 ),work( m-kb+j1 ), ka1 ) + call stdlib_wlacgv( nr, work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_wrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( m-kb+j ), conjg( & + work( m-kb+j ) ) ) + end do + end if + end do loop_920 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_wlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, i2 - ka + rwork( j ) = rwork( j+ka ) + work( j ) = work( j+ka ) + end do + end if + end if + go to 490 + end subroutine stdlib_whbgst + + !> ZHBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. + + pure subroutine stdlib_whbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwrk + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab ZHBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_whbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, rwork, lrwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwk2, indwrk, liwmin, llrwk, llwk2, lrwmin, & + lwmin + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1+n + lrwmin = 1+n + liwmin = 1 + else if( wantz ) then + lwmin = 2*n**2 + lrwmin = 1 + 5*n + 2*n**2 + liwmin = 3 + 5*n + else + lwmin = n + lrwmin = n + liwmin = 1 + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab ZHBGVX: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. Eigenvalues and + !> eigenvectors can be selected by specifying either all eigenvalues, + !> a range of values or a range of indices for the desired eigenvalues. + + pure subroutine stdlib_whbgvx( jobz, range, uplo, n, ka, kb, ab, ldab, bb,ldbb, q, ldq, vl, & + vu, il, iu, abstol, m, w, z,ldz, work, rwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ka, kb, ldab, ldbb, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(qp), intent(out) :: q(ldq,*), work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, test, upper, valeig, wantz + character :: order, vect + integer(ilp) :: i, iinfo, indd, inde, indee, indibl, indisp, indiwk, indrwk, indwrk, & + itmp1, j, jj, nsplit + real(qp) :: tmp1 + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ka<0 ) then + info = -5 + else if( kb<0 .or. kb>ka ) then + info = -6 + else if( ldab0 .and. vu<=vl )info = -14 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -15 + else if ( iun ) then + info = -16 + end if + end if + end if + if( info==0) then + if( ldz<1 .or. ( wantz .and. ldz ZHBTRD: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_whbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldq, n + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: ab(ldab,*), q(ldq,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: initq, upper, wantq + integer(ilp) :: i, i2, ibl, inca, incx, iqaend, iqb, iqend, j, j1, j1end, j1inc, j2, & + jend, jin, jinc, k, kd1, kdm1, kdn, l, last, lend, nq, nr, nrt + real(qp) :: abst + complex(qp) :: t, temp + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,min + ! Executable Statements + ! test the input parameters + initq = stdlib_lsame( vect, 'V' ) + wantq = initq .or. stdlib_lsame( vect, 'U' ) + upper = stdlib_lsame( uplo, 'U' ) + kd1 = kd + 1 + kdm1 = kd - 1 + incx = ldab - 1 + iqend = 1 + info = 0 + if( .not.wantq .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldab1 ) then + ! reduce to complex hermitian tridiagonal form, working with + ! the upper triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + ab( kd1, 1 ) = real( ab( kd1, 1 ),KIND=qp) + loop_90: do i = 1, n - 2 + ! reduce i-th row of matrix to tridiagonal form + loop_80: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_wlargv( nr, ab( 1, j1-1 ), inca, work( j1 ),kd1, d( j1 ), & + kd1 ) + ! apply rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_wlartv or stdlib_wrot is used + if( nr>=2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_wlartv( nr, ab( l+1, j1-1 ), inca,ab( l, j1 ), inca, & + d( j1 ),work( j1 ), kd1 ) + end do + else + jend = j1 + ( nr-1 )*kd1 + do jinc = j1, jend, kd1 + call stdlib_wrot( kdm1, ab( 2, jinc-1 ), 1,ab( 1, jinc ), 1, d( & + jinc ),work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+k-1) + ! within the band + call stdlib_wlartg( ab( kd-k+3, i+k-2 ),ab( kd-k+2, i+k-1 ), d( i+k-& + 1 ),work( i+k-1 ), temp ) + ab( kd-k+3, i+k-2 ) = temp + ! apply rotation from the right + call stdlib_wrot( k-3, ab( kd-k+4, i+k-2 ), 1,ab( kd-k+3, i+k-1 ), 1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_wlar2v( nr, ab( kd1, j1-1 ), ab( kd1, j1 ),ab( kd, & + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the left + if( nr>0 ) then + call stdlib_wlacgv( nr, work( j1 ), kd1 ) + if( 2*kd-1n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_wlartv( nrt, ab( kd-l, j1+l ), inca,ab( kd-& + l+1, j1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do jin = j1, j1end, kd1 + call stdlib_wrot( kd-1, ab( kd-1, jin+1 ), incx,ab( kd, jin+1 )& + , incx,d( jin ), work( jin ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_wrot( lend, ab( kd-1, last+1 ), incx,ab( kd, & + last+1 ), incx, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_wrot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + conjg( work( j ) ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_wrot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), conjg( & + work( j ) ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j-1,j+kd) outside the band + ! and store it in work + work( j+kd ) = work( j )*ab( 1, j+kd ) + ab( 1, j+kd ) = d( j )*ab( 1, j+kd ) + end do + end do loop_80 + end do loop_90 + end if + if( kd>0 ) then + ! make off-diagonal elements real and copy them to e + do i = 1, n - 1 + t = ab( kd, i+1 ) + abst = abs( t ) + ab( kd, i+1 ) = abst + e( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( i1 ) then + ! reduce to complex hermitian tridiagonal form, working with + ! the lower triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + ab( 1, 1 ) = real( ab( 1, 1 ),KIND=qp) + loop_210: do i = 1, n - 2 + ! reduce i-th column of matrix to tridiagonal form + loop_200: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_wlargv( nr, ab( kd1, j1-kd1 ), inca,work( j1 ), kd1, d( j1 )& + , kd1 ) + ! apply plane rotations from one side + ! dependent on the the number of diagonals either + ! stdlib_wlartv or stdlib_wrot is used + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_wlartv( nr, ab( kd1-l, j1-kd1+l ), inca,ab( kd1-l+1, & + j1-kd1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + jend = j1 + kd1*( nr-1 ) + do jinc = j1, jend, kd1 + call stdlib_wrot( kdm1, ab( kd, jinc-kd ), incx,ab( kd1, jinc-kd )& + , incx,d( jinc ), work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i+k-1,i) + ! within the band + call stdlib_wlartg( ab( k-1, i ), ab( k, i ),d( i+k-1 ), work( i+k-1 & + ), temp ) + ab( k-1, i ) = temp + ! apply rotation from the left + call stdlib_wrot( k-3, ab( k-2, i+1 ), ldab-1,ab( k-1, i+1 ), ldab-1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_wlar2v( nr, ab( 1, j1-1 ), ab( 1, j1 ),ab( 2, j1-1 ),& + inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_wlartv or stdlib_wrot is used + if( nr>0 ) then + call stdlib_wlacgv( nr, work( j1 ), kd1 ) + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + if( j2+l>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_wlartv( nrt, ab( l+2, j1-1 ), inca,ab( l+1,& + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do j1inc = j1, j1end, kd1 + call stdlib_wrot( kdm1, ab( 3, j1inc-1 ), 1,ab( 2, j1inc ), 1, & + d( j1inc ),work( j1inc ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_wrot( lend, ab( 3, last-1 ), 1,ab( 2, last ),& + 1, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_wrot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_wrot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j+kd,j-1) outside the + ! band and store it in work + work( j+kd ) = work( j )*ab( kd1, j ) + ab( kd1, j ) = d( j )*ab( kd1, j ) + end do + end do loop_200 + end do loop_210 + end if + if( kd>0 ) then + ! make off-diagonal elements real and copy them to e + do i = 1, n - 1 + t = ab( 2, i ) + abst = abs( t ) + ab( 2, i ) = abst + e( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( i ZHECON: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_whecon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_whetrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_whecon + + !> ZHECON_ROOK: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_whecon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_whetrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_whecon_rook + + !> ZHEEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_wheequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(out) :: s(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(qp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,int,log,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'ZHEEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_qp / s( j ) + end do + tol = one / sqrt( 2.0_qp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + real( s( i )*work( i ),KIND=qp) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_wlassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = cabs1( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( real( work( i ),KIND=qp) - t*si ) + c0 = -(t*si)*si + 2 * real( work( i ),KIND=qp) * si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + real( work( i ),KIND=qp) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_qlamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_qlamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_wheequb + + !> ZHEEV: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. + + subroutine stdlib_wheev( jobz, uplo, n, a, lda, w, work, lwork, rwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale, llwork, lwkopt, nb + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_wlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_whetrd to reduce hermitian matrix to tridiagonal form. + inde = 1 + indtau = 1 + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_whetrd( uplo, n, a, lda, w, rwork( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, first call + ! stdlib_wungtr to generate the unitary matrix, then call stdlib_wsteqr. + if( .not.wantz ) then + call stdlib_qsterf( n, w, rwork( inde ), info ) + else + call stdlib_wungtr( uplo, n, a, lda, work( indtau ), work( indwrk ),llwork, iinfo ) + + indwrk = inde + n + call stdlib_wsteqr( jobz, n, w, rwork( inde ), a, lda,rwork( indwrk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! set work(1) to optimal complex workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_wheev + + !> ZHEEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_wheevd( jobz, uplo, n, a, lda, w, work, lwork, rwork,lrwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwk2, indwrk, iscale, liopt, & + liwmin, llrwk, llwork, llwrk2, lopt, lropt, lrwmin, lwmin + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_wlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_whetrd to reduce hermitian matrix to tridiagonal form. + inde = 1 + indtau = 1 + indwrk = indtau + n + indrwk = inde + n + indwk2 = indwrk + n*n + llwork = lwork - indwrk + 1 + llwrk2 = lwork - indwk2 + 1 + llrwk = lrwork - indrwk + 1 + call stdlib_whetrd( uplo, n, a, lda, w, rwork( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, first call + ! stdlib_wstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_wunmtr to multiply it to the + ! householder transformations represented as householder vectors in + ! a. + if( .not.wantz ) then + call stdlib_qsterf( n, w, rwork( inde ), info ) + else + call stdlib_wstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, rwork( indrwk ), llrwk,iwork, liwork, info ) + call stdlib_wunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),work( indwrk ), n, & + work( indwk2 ), llwrk2, iinfo ) + call stdlib_wlacpy( 'A', n, n, work( indwrk ), n, a, lda ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lopt + rwork( 1 ) = lropt + iwork( 1 ) = liopt + return + end subroutine stdlib_wheevd + + !> ZHEEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> ZHEEVR first reduces the matrix A to tridiagonal form T with a call + !> to ZHETRD. Then, whenever possible, ZHEEVR calls ZSTEMR to compute + !> eigenspectrum using Relatively Robust Representations. ZSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see ZSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : ZHEEVR calls ZSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> ZHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of ZSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + subroutine stdlib_wheevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork,rwork, lrwork, iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, liwork, lrwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, iinfo, imax, indibl, indifl, indisp, indiwo, indrd, indrdd, & + indre, indree, indrwk, indtau, indwk, indwkn, iscale, itmp1, j, jj, liwmin, llwork, & + llrwork, llwrkn, lrwmin, lwkopt, lwmin, nb, nsplit + real(qp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 ) + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( lrwork==-1 ) .or.( liwork==-1 ) ) + lrwmin = max( 1, 24*n ) + liwmin = max( 1, 10*n ) + lwmin = max( 1, 2*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( a( 1, 1 ),KIND=qp) )then + m = 1 + w( 1 ) = real( a( 1, 1 ),KIND=qp) + end if + end if + if( wantz ) then + z( 1, 1 ) = one + isuppz( 1 ) = 1 + isuppz( 2 ) = 1 + end if + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if (valeig) then + vll = vl + vuu = vu + end if + anrm = stdlib_wlansy( 'M', uplo, n, a, lda, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_wdscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_wdscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: the iwork indices are + ! used only if stdlib_qsterf or stdlib_wstemr fail. + ! work(indtau:indtau+n-1) stores the complex scalar factors of the + ! elementary reflectors used in stdlib_whetrd. + indtau = 1 + ! indwk is the starting offset of the remaining complex workspace, + ! and llwork is the remaining complex workspace size. + indwk = indtau + n + llwork = lwork - indwk + 1 + ! rwork(indrd:indrd+n-1) stores the real tridiagonal's diagonal + ! entries. + indrd = 1 + ! rwork(indre:indre+n-1) stores the off-diagonal entries of the + ! tridiagonal matrix from stdlib_whetrd. + indre = indrd + n + ! rwork(indrdd:indrdd+n-1) is a copy of the diagonal entries over + ! -written by stdlib_wstemr (the stdlib_qsterf path copies the diagonal to w). + indrdd = indre + n + ! rwork(indree:indree+n-1) is a copy of the off-diagonal entries over + ! -written while computing the eigenvalues in stdlib_qsterf and stdlib_wstemr. + indree = indrdd + n + ! indrwk is the starting offset of the left-over real workspace, and + ! llrwork is the remaining workspace size. + indrwk = indree + n + llrwork = lrwork - indrwk + 1 + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_qstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_qstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_qstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indifl + n + ! call stdlib_whetrd to reduce hermitian matrix to tridiagonal form. + call stdlib_whetrd( uplo, n, a, lda, rwork( indrd ), rwork( indre ),work( indtau ), & + work( indwk ), llwork, iinfo ) + ! if all eigenvalues are desired + ! then call stdlib_qsterf or stdlib_wstemr and stdlib_wunmtr. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig.or.test ) .and. ( ieeeok==1 ) ) then + if( .not.wantz ) then + call stdlib_qcopy( n, rwork( indrd ), 1, w, 1 ) + call stdlib_qcopy( n-1, rwork( indre ), 1, rwork( indree ), 1 ) + call stdlib_qsterf( n, w, rwork( indree ), info ) + else + call stdlib_qcopy( n-1, rwork( indre ), 1, rwork( indree ), 1 ) + call stdlib_qcopy( n, rwork( indrd ), 1, rwork( indrdd ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_wstemr( jobz, 'A', n, rwork( indrdd ),rwork( indree ), vl, vu, il, & + iu, m, w,z, ldz, n, isuppz, tryrac,rwork( indrwk ), llrwork,iwork, liwork, info ) + + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_wstemr. + if( wantz .and. info==0 ) then + indwkn = indwk + llwrkn = lwork - indwkn + 1 + call stdlib_wunmtr( 'L', uplo, 'N', n, m, a, lda,work( indtau ), z, ldz, work(& + indwkn ),llwrkn, iinfo ) + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_wstein. + ! also call stdlib_qstebz and stdlib_wstein if stdlib_wstemr fails. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indrd ), rwork( & + indre ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwo )& + , info ) + if( wantz ) then + call stdlib_wstein( n, rwork( indrd ), rwork( indre ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwo ), iwork( indifl ),info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_wstein. + indwkn = indwk + llwrkn = lwork - indwkn + 1 + call stdlib_wunmtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHEEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_wheevx( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, lwork, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indtau, indwrk, iscale, itmp1, j, jj, llwork, lwkmin, lwkopt, nb, nsplit + real(qp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( a( 1, 1 ),KIND=qp) )then + m = 1 + w( 1 ) = real( a( 1, 1 ),KIND=qp) + end if + end if + if( wantz )z( 1, 1 ) = cone + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + end if + anrm = stdlib_wlanhe( 'M', uplo, n, a, lda, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_wdscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_wdscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_whetrd to reduce hermitian matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indtau = 1 + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_whetrd( uplo, n, a, lda, rwork( indd ), rwork( inde ),work( indtau ), work(& + indwrk ), llwork, iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal to + ! zero, then call stdlib_qsterf or stdlib_wungtr and stdlib_wsteqr. if this fails for + ! some eigenvalue, then try stdlib_qstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_qcopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_qcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_qsterf( n, w, rwork( indee ), info ) + else + call stdlib_wlacpy( 'A', n, n, a, lda, z, ldz ) + call stdlib_wungtr( uplo, n, z, ldz, work( indtau ),work( indwrk ), llwork, & + iinfo ) + call stdlib_qcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_wsteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 40 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_wstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_wstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_wstein. + call stdlib_wunmtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwrk ), llwork, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 40 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHEGS2: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. + !> B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. + + pure subroutine stdlib_whegs2( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k + real(qp) :: akk, bkk + complex(qp) :: ct + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda ZHEGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by ZPOTRF. + + pure subroutine stdlib_whegst( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_whegs2( itype, uplo, n, a, lda, b, ldb, info ) + else + ! use blocked code + if( itype==1 ) then + if( upper ) then + ! compute inv(u**h)*a*inv(u) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(k:n,k:n) + call stdlib_whegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_wtrsm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, & + n-k-kb+1, cone,b( k, k ), ldb, a( k, k+kb ), lda ) + call stdlib_whemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(& + k, k+kb ), ldb,cone, a( k, k+kb ), lda ) + call stdlib_wher2k( uplo, 'CONJUGATE TRANSPOSE', n-k-kb+1,kb, -cone, a( & + k, k+kb ), lda,b( k, k+kb ), ldb, one,a( k+kb, k+kb ), lda ) + call stdlib_whemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(& + k, k+kb ), ldb,cone, a( k, k+kb ), lda ) + call stdlib_wtrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+& + 1, cone,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda ) + end if + end do + else + ! compute inv(l)*a*inv(l**h) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(k:n,k:n) + call stdlib_whegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_wtrsm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', n-k-& + kb+1, kb, cone,b( k, k ), ldb, a( k+kb, k ), lda ) + call stdlib_whemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, & + b( k+kb, k ), ldb,cone, a( k+kb, k ), lda ) + call stdlib_wher2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-cone, a( k+kb, & + k ), lda,b( k+kb, k ), ldb, one,a( k+kb, k+kb ), lda ) + call stdlib_whemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, & + b( k+kb, k ), ldb,cone, a( k+kb, k ), lda ) + call stdlib_wtrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, & + kb, cone,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda ) + end if + end do + end if + else + if( upper ) then + ! compute u*a*u**h + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_wtrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, cone, & + b, ldb, a( 1, k ), lda ) + call stdlib_whemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1, k ),& + ldb, cone, a( 1, k ),lda ) + call stdlib_wher2k( uplo, 'NO TRANSPOSE', k-1, kb, cone,a( 1, k ), lda, b( & + 1, k ), ldb, one, a,lda ) + call stdlib_whemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1, k ),& + ldb, cone, a( 1, k ),lda ) + call stdlib_wtrmm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', k-1, & + kb, cone, b( k, k ), ldb,a( 1, k ), lda ) + call stdlib_whegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + else + ! compute l**h*a*l + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_wtrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, cone,& + b, ldb, a( k, 1 ), lda ) + call stdlib_whemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1 ), & + ldb, cone, a( k, 1 ),lda ) + call stdlib_wher2k( uplo, 'CONJUGATE TRANSPOSE', k-1, kb,cone, a( k, 1 ), & + lda, b( k, 1 ), ldb,one, a, lda ) + call stdlib_whemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1 ), & + ldb, cone, a( k, 1 ),lda ) + call stdlib_wtrmm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, k-1,& + cone, b( k, k ), ldb,a( k, 1 ), lda ) + call stdlib_whegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + end if + end if + end if + return + end subroutine stdlib_whegst + + !> ZHEGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian and B is also + !> positive definite. + + subroutine stdlib_whegv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, lwork, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: lwkopt, nb, neig + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + call stdlib_wtrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, cone,b, ldb, a, lda & + ) + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + call stdlib_wtrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, cone,b, ldb, a, lda & + ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_whegv + + !> ZHEGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_whegvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, lrwork,& + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: liopt, liwmin, lopt, lropt, lrwmin, lwmin + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1 + lrwmin = 1 + liwmin = 1 + else if( wantz ) then + lwmin = 2*n + n*n + lrwmin = 1 + 5*n + 2*n*n + liwmin = 3 + 5*n + else + lwmin = n + 1 + lrwmin = n + liwmin = 1 + end if + lopt = lwmin + lropt = lrwmin + liopt = liwmin + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda ZHEGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> Eigenvalues and eigenvectors can be selected by specifying either a + !> range of values or a range of indices for the desired eigenvalues. + + subroutine stdlib_whegvx( itype, jobz, range, uplo, n, a, lda, b, ldb,vl, vu, il, iu, abstol,& + m, w, z, ldz, work,lwork, rwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, lda, ldb, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, upper, valeig, wantz + character :: trans + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if (info==0) then + if (ldz<1 .or. (wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + call stdlib_wtrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, cone, b,ldb, z, ldz ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + call stdlib_wtrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, cone, b,ldb, z, ldz ) + + end if + end if + ! set work(1) to optimal complex workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_whegvx + + !> ZHERFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_wherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_whetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_whetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_whetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wherfs + + !> ZHESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_whesv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESV_AA: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**H * T * U, if UPLO = 'U', or + !> A = L * T * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is Hermitian and tridiagonal. The factored form + !> of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_whesv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_hetrf, lwkopt_hetrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**H)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> ZHETRF_RK is called to compute the factorization of a complex + !> Hermitian matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine ZHETRS_3. + + pure subroutine stdlib_whesv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESV_ROOK: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used + !> to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> ZHETRF_ROOK is called to compute the factorization of a complex + !> Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling ZHETRS_ROOK (uses BLAS 2). + + pure subroutine stdlib_whesv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESVX: uses the diagonal pivoting factorization to compute the + !> solution to a complex system of linear equations A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_whesvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: af(ldaf,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlanhe( 'I', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_whecon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_whetrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_wherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZHESWAPR: applies an elementary permutation on the rows and the columns of + !> a hermitian matrix. + + pure subroutine stdlib_wheswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(qp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_wswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + ! - swap a(i2,i1) and a(i1,i2) + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=conjg(a(i1+i,i2)) + a(i1+i,i2)=conjg(tmp) + end do + a(i1,i2)=conjg(a(i1,i2)) + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from 1 to i1-1 + call stdlib_wswap ( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + ! - swap a(i2,i1) and a(i1,i2) + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=conjg(a(i2,i1+i)) + a(i2,i1+i)=conjg(tmp) + end do + a(i2,i1)=conjg(a(i2,i1)) + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_wheswapr + + !> ZHETD2: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_whetd2( uplo, n, a, lda, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(qp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U') + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZHETF2: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_whetf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(qp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt + complex(qp) :: d12, d21, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iwamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) .or. stdlib_qisnan(absakk) ) then + ! column k is zero or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=qp) + else + ! ============================================================ + ! test for interchange + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine only rowmax. + jmax = imax + stdlib_iwamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_iwamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, cabs1( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( a( imax, imax ),KIND=qp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_wswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + a( kp, kk ) = conjg( a( kp, kk ) ) + r1 = real( a( kk, kk ),KIND=qp) + a( kk, kk ) = real( a( kp, kp ),KIND=qp) + a( kp, kp ) = r1 + if( kstep==2 ) then + a( k, k ) = real( a( k, k ),KIND=qp) + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + else + a( k, k ) = real( a( k, k ),KIND=qp) + if( kstep==2 )a( k-1, k-1 ) = real( a( k-1, k-1 ),KIND=qp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h + r1 = one / real( a( k, k ),KIND=qp) + call stdlib_wher( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_wdscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h + if( k>2 ) then + d = stdlib_qlapy2( real( a( k-1, k ),KIND=qp),aimag( a( k-1, k ) ) ) + + d22 = real( a( k-1, k-1 ),KIND=qp) / d + d11 = real( a( k, k ),KIND=qp) / d + tt = one / ( d11*d22-one ) + d12 = a( k-1, k ) / d + d = tt / d + do j = k - 2, 1, -1 + wkm1 = d*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = d*( d22*a( j, k )-d12*a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*conjg( wk ) -a( i, k-1 )*conjg( & + wkm1 ) + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + a( j, j ) = cmplx( real( a( j, j ),KIND=qp), zero,KIND=qp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 50 continue + ! if k > n, exit from loop + if( k>n )go to 90 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine only rowmax. + jmax = k - 1 + stdlib_iwamax( imax-k, a( imax, k ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( a( imax, imax ),KIND=qp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZHETF2_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_whetf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: e(*) + ! ====================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: done, upper + integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p + real(qp) :: absakk, alpha, colmax, d, d11, d22, r1, dtemp, rowmax, tt, sfmin + complex(qp) :: d12, d21, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iwamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( ( max( absakk, colmax )==zero ) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=qp) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_iwamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=qp) )1 )call stdlib_wswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! (2) swap and conjugate middle parts + do j = p + 1, k - 1 + t = conjg( a( j, k ) ) + a( j, k ) = conjg( a( p, j ) ) + a( p, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( p, k ) = conjg( a( p, k ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( k, k ),KIND=qp) + a( k, k ) = real( a( p, p ),KIND=qp) + a( p, p ) = r1 + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_wswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! (2) swap and conjugate middle parts + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( kp, kk ) = conjg( a( kp, kk ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( kk, kk ),KIND=qp) + a( kk, kk ) = real( a( kp, kp ),KIND=qp) + a( kp, kp ) = r1 + if( kstep==2 ) then + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=qp) + ! (5) swap row elements + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( real( a( k, k ),KIND=qp) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / real( a( k, k ),KIND=qp) + call stdlib_wher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_wdscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=qp) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + ! d = |a12| + d = stdlib_qlapy2( real( a( k-1, k ),KIND=qp),aimag( a( k-1, k ) ) ) + + d11 = real( a( k, k ) / d,KIND=qp) + d22 = real( a( k-1, k-1 ) / d,KIND=qp) + d12 = a( k-1, k ) / d + tt = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + ! compute d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j + wkm1 = tt*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = tt*( d22*a( j, k )-d12*a( j, k-1 ) ) + ! perform a rank-2 update of a(1:k-2,1:k-2) + do i = j, 1, -1 + a( i, j ) = a( i, j ) -( a( i, k ) / d )*conjg( wk ) -( a( i, k-1 ) & + / d )*conjg( wkm1 ) + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d + a( j, k-1 ) = wkm1 / d + ! (*) make sure that diagonal element of pivot is real + a( j, j ) = cmplx( real( a( j, j ),KIND=qp), zero,KIND=qp) + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = czero + a( k-1, k ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**h using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=qp) )1 )call stdlib_wswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! for both 1x1 and 2x2 pivots, interchange rows and + ! columns kk and kp in the trailing submatrix a(k:n,k:n) + if( kp/=kk ) then + ! (1) swap columnar parts + if( kp1 )call stdlib_wswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + else + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=qp) + if( kstep==2 )a( k+1, k+1 ) = real( a( k+1, k+1 ),KIND=qp) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of a now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / real( a( k, k ),KIND=qp) + call stdlib_wher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_wdscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=qp) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZHETF2_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_whetf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ====================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + ! Local Scalars + logical(lk) :: done, upper + integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p + real(qp) :: absakk, alpha, colmax, d, d11, d22, r1, dtemp, rowmax, tt, sfmin + complex(qp) :: d12, d21, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iwamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( ( max( absakk, colmax )==zero ) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=qp) + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_iwamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=qp) )1 )call stdlib_wswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! (2) swap and conjugate middle parts + do j = p + 1, k - 1 + t = conjg( a( j, k ) ) + a( j, k ) = conjg( a( p, j ) ) + a( p, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( p, k ) = conjg( a( p, k ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( k, k ),KIND=qp) + a( k, k ) = real( a( p, p ),KIND=qp) + a( p, p ) = r1 + end if + ! for both 1x1 and 2x2 pivots, interchange rows and + ! columns kk and kp in the leading submatrix a(1:k,1:k) + if( kp/=kk ) then + ! (1) swap columnar parts + if( kp>1 )call stdlib_wswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! (2) swap and conjugate middle parts + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( kp, kk ) = conjg( a( kp, kk ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( kk, kk ),KIND=qp) + a( kk, kk ) = real( a( kp, kp ),KIND=qp) + a( kp, kp ) = r1 + if( kstep==2 ) then + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=qp) + ! (5) swap row elements + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + else + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=qp) + if( kstep==2 )a( k-1, k-1 ) = real( a( k-1, k-1 ),KIND=qp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( real( a( k, k ),KIND=qp) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / real( a( k, k ),KIND=qp) + call stdlib_wher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_wdscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=qp) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + ! d = |a12| + d = stdlib_qlapy2( real( a( k-1, k ),KIND=qp),aimag( a( k-1, k ) ) ) + + d11 = real( a( k, k ) / d,KIND=qp) + d22 = real( a( k-1, k-1 ) / d,KIND=qp) + d12 = a( k-1, k ) / d + tt = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + ! compute d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j + wkm1 = tt*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = tt*( d22*a( j, k )-d12*a( j, k-1 ) ) + ! perform a rank-2 update of a(1:k-2,1:k-2) + do i = j, 1, -1 + a( i, j ) = a( i, j ) -( a( i, k ) / d )*conjg( wk ) -( a( i, k-1 ) & + / d )*conjg( wkm1 ) + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d + a( j, k-1 ) = wkm1 / d + ! (*) make sure that diagonal element of pivot is real + a( j, j ) = cmplx( real( a( j, j ),KIND=qp), zero,KIND=qp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=qp) )=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / real( a( k, k ),KIND=qp) + call stdlib_wher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_wdscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=qp) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZHETRD: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_whetrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, iws, j, kk, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb ZHETRD_HB2ST: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_whetrd_hb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, lhous, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: stage1, uplo, vect + integer(ilp), intent(in) :: n, kd, ldab, lhous, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: ab(ldab,*) + complex(qp), intent(out) :: hous(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: rzero = 0.0e+0_qp + + + ! Local Scalars + logical(lk) :: lquery, wantq, upper, afters1 + integer(ilp) :: i, m, k, ib, sweepid, myid, shift, stt, st, ed, stind, edind, & + blklastind, colpt, thed, stepercol, grsiz, thgrsiz, thgrnb, thgrid, nbtiles, ttype, & + tid, nthreads, debug, abdpos, abofdpos, dpos, ofdpos, awpos, inda, indw, apos, sizea, & + lda, indv, indtau, sizev, sizetau, ldv, lhmin, lwmin + real(qp) :: abstmp + complex(qp) :: tmp + ! Intrinsic Functions + intrinsic :: min,max,ceiling,real + ! Executable Statements + ! determine the minimal workspace size required. + ! test the input parameters + debug = 0 + info = 0 + afters1 = stdlib_lsame( stage1, 'Y' ) + wantq = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) .or. ( lhous==-1 ) + ! determine the block size, the workspace size and the hous size. + ib = stdlib_ilaenv2stage( 2, 'ZHETRD_HB2ST', vect, n, kd, -1, -1 ) + lhmin = stdlib_ilaenv2stage( 3, 'ZHETRD_HB2ST', vect, n, kd, ib, -1 ) + lwmin = stdlib_ilaenv2stage( 4, 'ZHETRD_HB2ST', vect, n, kd, ib, -1 ) + if( .not.afters1 .and. .not.stdlib_lsame( stage1, 'N' ) ) then + info = -1 + else if( .not.stdlib_lsame( vect, 'N' ) ) then + info = -2 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab<(kd+1) ) then + info = -7 + else if( lhoused ) exit + loop_120: do m = 1, stepercol + st = stt + loop_130: do sweepid = st, ed + loop_140: do k = 1, grsiz + myid = (i-sweepid)*(stepercol*grsiz)+ (m-1)*grsiz + k + if ( myid==1 ) then + ttype = 1 + else + ttype = mod( myid, 2 ) + 2 + endif + if( ttype==2 ) then + colpt = (myid/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + blklastind = colpt + else + colpt = ((myid+1)/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + if( ( stind>=edind-1 ).and.( edind==n ) ) then + blklastind = n + else + blklastind = 0 + endif + endif + ! call the kernel + !$ if( ttype/=1 ) then + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(in:WORK(MYID-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + !$ call stdlib_whb2st_kernels( uplo, wantq, ttype,stind, edind, & + !$ sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + !$ indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ else + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + call stdlib_whb2st_kernels( uplo, wantq, ttype,stind, edind, & + sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ endif + if ( blklastind>=(n-1) ) then + stt = stt + 1 + exit + endif + end do loop_140 + end do loop_130 + end do loop_120 + end do loop_110 + end do loop_100 + !$OMP END MASTER + !$OMP END PARALLEL + ! copy the diagonal from a to d. note that d is real thus only + ! the real part is needed, the imaginary part should be czero. + do i = 1, n + d( i ) = real( work( dpos+(i-1)*lda ),KIND=qp) + end do + ! copy the off diagonal from a to e. note that e is real thus only + ! the real part is needed, the imaginary part should be czero. + if( upper ) then + do i = 1, n-1 + e( i ) = real( work( ofdpos+i*lda ),KIND=qp) + end do + else + do i = 1, n-1 + e( i ) = real( work( ofdpos+(i-1)*lda ),KIND=qp) + end do + endif + hous( 1 ) = lhmin + work( 1 ) = lwmin + return + end subroutine stdlib_whetrd_hb2st + + !> ZHETRD_HE2HB: reduces a complex Hermitian matrix A to complex Hermitian + !> band-diagonal form AB by a unitary similarity transformation: + !> Q**H * A * Q = AB. + + pure subroutine stdlib_whetrd_he2hb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldab, lwork, n, kd + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: ab(ldab,*), tau(*), work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: rone = 1.0e+0_qp + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, j, iinfo, lwmin, pn, pk, lk, ldt, ldw, lds2, lds1, ls2, ls1, lw, lt,& + tpos, wpos, s2pos, s1pos + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + ! determine the minimal workspace size required + ! and test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + lwmin = stdlib_ilaenv2stage( 4, 'ZHETRD_HE2HB', '', n, kd, -1, -1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( lda ZHETRF: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_whetrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_wlahef( uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_whetf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_wlahef; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_wlahef( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, n, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_whetf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_whetrf + + !> ZHETRF_AA: computes the factorization of a complex hermitian matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**H*T*U or A = L*T*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a hermitian tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_whetrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'ZHETRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_wlahef_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_wswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = conjg( a( j, j+1 ) ) + a( j, j+1 ) = cone + call stdlib_wcopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_wscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=0 and k2=1 for the first panel, + ! and k1=1 and k2=0 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_wgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_wgemm( 'CONJUGATE TRANSPOSE', 'TRANSPOSE',1, mj, jb+1,-cone,& + a( j1-k2, j3 ), lda,work( (j3-j1+1)+k1*n ), n,cone, a( j3, j3 ), lda ) + + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_wgemm + call stdlib_wgemm( 'CONJUGATE TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-& + cone, a( j1-k2, j2 ), lda,work( (j3-j1+1)+k1*n ), n,cone, a( j2, j3 ), lda & + ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = conjg( alpha ) + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_wcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**h using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_wcopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_wlahef; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_wlahef_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_wswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = conjg( a( j+1, j ) ) + a( j+1, j ) = cone + call stdlib_wcopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_wscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=0 and k2=1 for the first panel, + ! and k1=1 and k2=0 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_wgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',mj, 1, jb+1,-& + cone, work( (j3-j1+1)+k1*n ), n,a( j3, j1-k2 ), lda,cone, a( j3, j3 ), & + lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block column with stdlib_wgemm + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',n-j3+1, nj, jb+1,-& + cone, work( (j3-j1+1)+k1*n ), n,a( j2, j1-k2 ), lda,cone, a( j3, j2 ), lda & + ) + end do + ! recover t( j+1, j ) + a( j+1, j ) = conjg( alpha ) + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_wcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_whetrf_aa + + !> ZHETRF_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_whetrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_wlahef_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_whetf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_wlahef_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_whetf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_wswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_whetrf_rk + + !> ZHETRF_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_whetrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_wlahef_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_whetf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_wlahef_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_wlahef_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_whetf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_whetrf_rook + + !> ZHETRI: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> ZHETRF. + + pure subroutine stdlib_whetri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp, kstep + real(qp) :: ak, akp1, d, t + complex(qp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=qp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_whemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_wdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=qp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = real( a( k, k ),KIND=qp) / t + akp1 = real( a( k+1, k+1 ),KIND=qp) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_whemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_wdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=qp) + a( k, k+1 ) = a( k, k+1 ) -stdlib_wdotc( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_wcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_whemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -real( stdlib_wdotc( k-1, work, 1, a( 1, k+1 ),& + 1 ),KIND=qp) + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=qp) + ! compute column k of the inverse. + if( k ZHETRI_ROOK: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> ZHETRF_ROOK. + + pure subroutine stdlib_whetri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp, kstep + real(qp) :: ak, akp1, d, t + complex(qp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 70 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=qp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_whemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_wdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=qp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = real( a( k, k ),KIND=qp) / t + akp1 = real( a( k+1, k+1 ),KIND=qp) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_whemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_wdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=qp) + a( k, k+1 ) = a( k, k+1 ) -stdlib_wdotc( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_wcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_whemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -real( stdlib_wdotc( k-1, work, 1, a( 1, k+1 ),& + 1 ),KIND=qp) + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k,1:k) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1) in the leading submatrix a(k+1:n,k+1:n) + ! (1) interchange rows and columns k and -ipiv(k) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + ! (2) interchange rows and columns k+1 and -ipiv(k+1) + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 70 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 80 continue + ! if k < 1, exit from loop. + if( k<1 )go to 120 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=qp) + ! compute column k of the inverse. + if( k ZHETRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF. + + pure subroutine stdlib_whetrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(qp) :: s + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=qp) / real( a( k, k ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_wgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_wgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 )& + , 1, cone, b( k+1, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZHETRS2: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF and converted by ZSYCONV. + + pure subroutine stdlib_whetrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + real(qp) :: s + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_wtrsm('L','U','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + s = real( cone,KIND=qp) / real( a( i, i ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / conjg( akm1k ) + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**h \ b) -> b [ u**h \ (d \ (u \p**t * b) ) ] + call stdlib_wtrsm('L','U','C','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (u**h \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**h. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_wswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_wtrsm('L','L','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + s = real( cone,KIND=qp) / real( a( i, i ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / conjg( akm1k ) + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i, j ) / conjg( akm1k ) + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**h \ b) -> b [ l**h \ (d \ (l \p**t * b) ) ] + call stdlib_wtrsm('L','L','C','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (l**h \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_wsyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_whetrs2 + + !> ZHETRS_3: solves a system of linear equations A * X = B with a complex + !> Hermitian matrix A using the factorization computed + !> by ZHETRF_RK or ZHETRF_BK: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_whetrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*), e(*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + real(qp) :: s + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_wtrsm( 'L', 'U', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + s = real( cone,KIND=qp) / real( a( i, i ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / conjg( akm1k ) + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**h \ b) -> b [ u**h \ (d \ (u \p**t * b) ) ] + call stdlib_wtrsm( 'L', 'U', 'C', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (u**h \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**h. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_wtrsm( 'L', 'L', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + s = real( cone,KIND=qp) / real( a( i, i ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( i, 1 ), ldb ) + else if( i b [ l**h \ (d \ (l \p**t * b) ) ] + call stdlib_wtrsm('L', 'L', 'C', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (l**h \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_whetrs_3 + + !> ZHETRS_AA: solves a system of linear equations A*X = B with a complex + !> hermitian matrix A using the factorization A = U**H*T*U or + !> A = L*T*L**H computed by ZHETRF_AA. + + pure subroutine stdlib_whetrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**h \ b -> b [ (u**h \p**t * b) ] + call stdlib_wtrsm( 'L', 'U', 'C', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb ) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**h \p**t * b) ] + call stdlib_wlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1 ) + if( n>1 ) then + call stdlib_wlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1) + call stdlib_wlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_wlacgv( n-1, work( 1 ), 1 ) + end if + call stdlib_wgtsv( n, nrhs, work(1), work(n), work(2*n), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**h \p**t * b) ) ] + call stdlib_wtrsm( 'L', 'U', 'N', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b(2, 1), & + ldb) + ! pivot, p * b [ p * (u**h \ (t \ (u \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**h. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_wtrsm( 'L', 'L', 'N', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b(2, 1), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_wlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_wlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1) + call stdlib_wlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1) + call stdlib_wlacgv( n-1, work( 2*n ), 1 ) + end if + call stdlib_wgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb,info) + ! 3) backward substitution with l**h + if( n>1 ) then + ! compute l**h \ b -> b [ l**h \ (t \ (l \p**t * b) ) ] + call stdlib_wtrsm( 'L', 'L', 'C', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b [ p * (l**h \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_whetrs_aa + + !> ZHETRS_ROOK: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF_ROOK. + + pure subroutine stdlib_whetrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(qp) :: s + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=qp) / real( a( k, k ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k), then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1) + if( kp/=k-1 )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_wgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_wgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 )& + , 1, cone, b( k+1, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k), then k+1 and -ipiv(k+1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_wswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k Level 3 BLAS like routine for C in RFP Format. + !> ZHFRK: performs one of the Hermitian rank--k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n Hermitian + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + + pure subroutine stdlib_whfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, n + character, intent(in) :: trans, transr, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: c(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, normaltransr, nisodd, notrans + integer(ilp) :: info, nrowa, j, nk, n1, n2 + complex(qp) :: calpha, cbeta + ! Intrinsic Functions + intrinsic :: max,cmplx + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( notrans ) then + nrowa = n + else + nrowa = k + end if + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( lda ZHGEQZ: computes the eigenvalues of a complex matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the single-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a complex matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by ZGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices and S and P are upper triangular. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced + !> the matrix pair (A,B) to generalized Hessenberg form, then the output + !> matrices Q1*Q and Z1*Z are the unitary factors from the generalized + !> Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) + !> (equivalently, of (A,B)) are computed as a pair of complex values + !> (alpha,beta). If beta is nonzero, lambda = alpha / beta is an + !> eigenvalue of the generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> The values of alpha and beta for the i-th eigenvalue can be read + !> directly from the generalized Schur form: alpha = S(i,i), + !> beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + + subroutine stdlib_whgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alpha, beta, q, ldq,& + z, ldz, work, lwork,rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz, job + integer(ilp), intent(in) :: ihi, ilo, ldh, ldq, ldt, ldz, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(out) :: alpha(*), beta(*), work(*) + complex(qp), intent(inout) :: h(ldh,*), q(ldq,*), t(ldt,*), z(ldz,*) + ! ===================================================================== + + + + ! Local Scalars + logical(lk) :: ilazr2, ilazro, ilq, ilschr, ilz, lquery + integer(ilp) :: icompq, icompz, ifirst, ifrstm, iiter, ilast, ilastm, in, ischur, & + istart, j, jc, jch, jiter, jr, maxit + real(qp) :: absb, anorm, ascale, atol, bnorm, bscale, btol, c, safmin, temp, temp2, & + tempr, ulp + complex(qp) :: abi22, ad11, ad12, ad21, ad22, ctemp, ctemp2, ctemp3, eshift, s, shift, & + signbc, u12, x, abi12, y + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min,sqrt + ! Statement Functions + real(qp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=qp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode job, compq, compz + if( stdlib_lsame( job, 'E' ) ) then + ilschr = .false. + ischur = 1 + else if( stdlib_lsame( job, 'S' ) ) then + ilschr = .true. + ischur = 2 + else + ilschr = .true. + ischur = 0 + end if + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + ilq = .true. + icompq = 0 + end if + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + ilz = .true. + icompz = 0 + end if + ! check argument values + info = 0 + work( 1 ) = max( 1, n ) + lquery = ( lwork==-1 ) + if( ischur==0 ) then + info = -1 + else if( icompq==0 ) then + info = -2 + else if( icompz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihisafmin ) then + signbc = conjg( t( j, j ) / absb ) + t( j, j ) = absb + if( ilschr ) then + call stdlib_wscal( j-1, signbc, t( 1, j ), 1 ) + call stdlib_wscal( j, signbc, h( 1, j ), 1 ) + else + call stdlib_wscal( 1, signbc, h( j, j ), 1 ) + end if + if( ilz )call stdlib_wscal( n, signbc, z( 1, j ), 1 ) + else + t( j, j ) = czero + end if + alpha( j ) = h( j, j ) + beta( j ) = t( j, j ) + end do + ! if ihi < ilo, skip qz steps + if( ihimaxit )go to 180 + ! split the matrix if possible. + ! two tests: + ! 1: h(j,j-1)=0 or j=ilo + ! 2: t(j,j)=0 + ! special case: j=ilast + if( ilast==ilo ) then + go to 60 + else + if( abs1( h( ilast, ilast-1 ) )<=max( safmin, ulp*(abs1( h( ilast, ilast ) ) + & + abs1( h( ilast-1, ilast-1 )) ) ) ) then + h( ilast, ilast-1 ) = czero + go to 60 + end if + end if + if( abs( t( ilast, ilast ) )<=max( safmin, ulp*(abs( t( ilast - 1, ilast ) ) + abs( & + t( ilast-1, ilast-1 )) ) ) ) then + t( ilast, ilast ) = czero + go to 50 + end if + ! general case: j ilo )temp = temp + abs ( t( j - 1, j ) ) + if( abs( t( j, j ) )=btol ) then + if( jch+1>=ilast ) then + go to 60 + else + ifirst = jch + 1 + go to 70 + end if + end if + t( jch+1, jch+1 ) = czero + end do + go to 50 + else + ! only test 2 passed -- chase the zero to t(ilast,ilast) + ! then process as in the case t(ilast,ilast)=0 + do jch = j, ilast - 1 + ctemp = t( jch, jch+1 ) + call stdlib_wlartg( ctemp, t( jch+1, jch+1 ), c, s,t( jch, jch+1 ) ) + + t( jch+1, jch+1 ) = czero + if( jchsafmin ) then + signbc = conjg( t( ilast, ilast ) / absb ) + t( ilast, ilast ) = absb + if( ilschr ) then + call stdlib_wscal( ilast-ifrstm, signbc, t( ifrstm, ilast ), 1 ) + call stdlib_wscal( ilast+1-ifrstm, signbc, h( ifrstm, ilast ),1 ) + else + call stdlib_wscal( 1, signbc, h( ilast, ilast ), 1 ) + end if + if( ilz )call stdlib_wscal( n, signbc, z( 1, ilast ), 1 ) + else + t( ilast, ilast ) = czero + end if + alpha( ilast ) = h( ilast, ilast ) + beta( ilast ) = t( ilast, ilast ) + ! go to next block -- exit if finished. + ilast = ilast - 1 + if( ilastilast )ifrstm = ilo + end if + go to 160 + ! qz step + ! this iteration only involves rows/columns ifirst:ilast. we + ! assume ifirst < ilast, and that the diagonal of b is non-zero. + 70 continue + iiter = iiter + 1 + if( .not.ilschr ) then + ifrstm = ifirst + end if + ! compute the shift. + ! at this point, ifirst < ilast, and the diagonal elements of + ! t(ifirst:ilast,ifirst,ilast) are larger than btol (in + ! magnitude) + if( ( iiter / 10 )*10/=iiter ) then + ! the wilkinson shift (aep p.512_qp), i.e., the eigenvalue of + ! the bottom-right 2x2 block of a inv(b) which is nearest to + ! the bottom-right element. + ! we factor b as u*d, where u has unit diagonals, and + ! compute (a*inv(d))*inv(u). + u12 = ( bscale*t( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad11 = ( ascale*h( ilast-1, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad21 = ( ascale*h( ilast, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad12 = ( ascale*h( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad22 = ( ascale*h( ilast, ilast ) ) /( bscale*t( ilast, ilast ) ) + abi22 = ad22 - u12*ad21 + abi12 = ad12 - u12*ad11 + shift = abi22 + ctemp = sqrt( abi12 )*sqrt( ad21 ) + temp = abs1( ctemp ) + if( ctemp/=zero ) then + x = half*( ad11-shift ) + temp2 = abs1( x ) + temp = max( temp, abs1( x ) ) + y = temp*sqrt( ( x / temp )**2+( ctemp / temp )**2 ) + if( temp2>zero ) then + if( real( x / temp2,KIND=qp)*real( y,KIND=qp)+aimag( x / temp2 )*aimag( y )& + safmin ) & + then + eshift = eshift + ( ascale*h( ilast,ilast ) )/( bscale*t( ilast, ilast ) ) + + else + eshift = eshift + ( ascale*h( ilast,ilast-1 ) )/( bscale*t( ilast-1, ilast-1 )& + ) + end if + shift = eshift + end if + ! now check for two consecutive small subdiagonals. + do j = ilast - 1, ifirst + 1, -1 + istart = j + ctemp = ascale*h( j, j ) - shift*( bscale*t( j, j ) ) + temp = abs1( ctemp ) + temp2 = ascale*abs1( h( j+1, j ) ) + tempr = max( temp, temp2 ) + if( tempristart ) then + ctemp = h( j, j-1 ) + call stdlib_wlartg( ctemp, h( j+1, j-1 ), c, s, h( j, j-1 ) ) + h( j+1, j-1 ) = czero + end if + do jc = j, ilastm + ctemp = c*h( j, jc ) + s*h( j+1, jc ) + h( j+1, jc ) = -conjg( s )*h( j, jc ) + c*h( j+1, jc ) + h( j, jc ) = ctemp + ctemp2 = c*t( j, jc ) + s*t( j+1, jc ) + t( j+1, jc ) = -conjg( s )*t( j, jc ) + c*t( j+1, jc ) + t( j, jc ) = ctemp2 + end do + if( ilq ) then + do jr = 1, n + ctemp = c*q( jr, j ) + conjg( s )*q( jr, j+1 ) + q( jr, j+1 ) = -s*q( jr, j ) + c*q( jr, j+1 ) + q( jr, j ) = ctemp + end do + end if + ctemp = t( j+1, j+1 ) + call stdlib_wlartg( ctemp, t( j+1, j ), c, s, t( j+1, j+1 ) ) + t( j+1, j ) = czero + do jr = ifrstm, min( j+2, ilast ) + ctemp = c*h( jr, j+1 ) + s*h( jr, j ) + h( jr, j ) = -conjg( s )*h( jr, j+1 ) + c*h( jr, j ) + h( jr, j+1 ) = ctemp + end do + do jr = ifrstm, j + ctemp = c*t( jr, j+1 ) + s*t( jr, j ) + t( jr, j ) = -conjg( s )*t( jr, j+1 ) + c*t( jr, j ) + t( jr, j+1 ) = ctemp + end do + if( ilz ) then + do jr = 1, n + ctemp = c*z( jr, j+1 ) + s*z( jr, j ) + z( jr, j ) = -conjg( s )*z( jr, j+1 ) + c*z( jr, j ) + z( jr, j+1 ) = ctemp + end do + end if + end do loop_150 + 160 continue + end do loop_170 + ! drop-through = non-convergence + 180 continue + info = ilast + go to 210 + ! successful completion of all qz steps + 190 continue + ! set eigenvalues 1:ilo-1 + do j = 1, ilo - 1 + absb = abs( t( j, j ) ) + if( absb>safmin ) then + signbc = conjg( t( j, j ) / absb ) + t( j, j ) = absb + if( ilschr ) then + call stdlib_wscal( j-1, signbc, t( 1, j ), 1 ) + call stdlib_wscal( j, signbc, h( 1, j ), 1 ) + else + call stdlib_wscal( 1, signbc, h( j, j ), 1 ) + end if + if( ilz )call stdlib_wscal( n, signbc, z( 1, j ), 1 ) + else + t( j, j ) = czero + end if + alpha( j ) = h( j, j ) + beta( j ) = t( j, j ) + end do + ! normal termination + info = 0 + ! exit (other than argument error) -- return optimal workspace size + 210 continue + work( 1 ) = cmplx( n,KIND=qp) + return + end subroutine stdlib_whgeqz + + !> ZHPCON: estimates the reciprocal of the condition number of a complex + !> Hermitian packed matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_whpcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ip, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm0 .and. ap( ip )==zero )return + ip = ip - i + end do + else + ! lower triangular storage: examine d from top to bottom. + ip = 1 + do i = 1, n + if( ipiv( i )>0 .and. ap( ip )==zero )return + ip = ip + n - i + 1 + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_whptrs( uplo, n, 1, ap, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_whpcon + + !> ZHPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix in packed storage. + + subroutine stdlib_whpev( jobz, uplo, n, ap, w, z, ldz, work, rwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwrk, iscale + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_wdscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_whptrd to reduce hermitian packed matrix to tridiagonal form. + inde = 1 + indtau = 1 + call stdlib_whptrd( uplo, n, ap, w, rwork( inde ), work( indtau ),iinfo ) + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, first call + ! stdlib_wupgtr to generate the orthogonal matrix, then call stdlib_wsteqr. + if( .not.wantz ) then + call stdlib_qsterf( n, w, rwork( inde ), info ) + else + indwrk = indtau + n + call stdlib_wupgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + indrwk = inde + n + call stdlib_wsteqr( jobz, n, w, rwork( inde ), z, ldz,rwork( indrwk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_whpev + + !> ZHPEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_whpevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,rwork, lrwork, iwork, & + liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwrk, iscale, liwmin, llrwk, & + llwrk, lrwmin, lwmin + real(qp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_wdscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_whptrd to reduce hermitian packed matrix to tridiagonal form. + inde = 1 + indtau = 1 + indrwk = inde + n + indwrk = indtau + n + llwrk = lwork - indwrk + 1 + llrwk = lrwork - indrwk + 1 + call stdlib_whptrd( uplo, n, ap, w, rwork( inde ), work( indtau ),iinfo ) + ! for eigenvalues only, call stdlib_qsterf. for eigenvectors, first call + ! stdlib_wupgtr to generate the orthogonal matrix, then call stdlib_wstedc. + if( .not.wantz ) then + call stdlib_qsterf( n, w, rwork( inde ), info ) + else + call stdlib_wstedc( 'I', n, w, rwork( inde ), z, ldz, work( indwrk ),llwrk, rwork( & + indrwk ), llrwk, iwork, liwork,info ) + call stdlib_wupmtr( 'L', uplo, 'N', n, n, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_whpevd + + !> ZHPEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A in packed storage. + !> Eigenvalues/vectors can be selected by specifying either a range of + !> values or a range of indices for the desired eigenvalues. + + subroutine stdlib_whpevx( jobz, range, uplo, n, ap, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, rwork, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indtau, indwrk, iscale, itmp1, j, jj, nsplit + real(qp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( ap( 1 ),KIND=qp) ) then + m = 1 + w( 1 ) = real( ap( 1 ),KIND=qp) + end if + end if + if( wantz )z( 1, 1 ) = cone + return + end if + ! get machine constants. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + eps = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + anrm = stdlib_wlanhp( 'M', uplo, n, ap, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_wdscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_whptrd to reduce hermitian packed matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indtau = 1 + indwrk = indtau + n + call stdlib_whptrd( uplo, n, ap, rwork( indd ), rwork( inde ),work( indtau ), iinfo ) + + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_qsterf or stdlib_wupgtr and stdlib_wsteqr. if this fails + ! for some eigenvalue, then try stdlib_qstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_qcopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_qcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_qsterf( n, w, rwork( indee ), info ) + else + call stdlib_wupgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_qcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_wsteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 20 + end if + info = 0 + end if + ! otherwise, call stdlib_qstebz and, if eigenvectors are desired, stdlib_wstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_qstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_wstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_wstein. + indwrk = indtau + n + call stdlib_wupmtr( 'L', uplo, 'N', n, m, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 20 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_qscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHPGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by ZPPTRF. + + pure subroutine stdlib_whpgst( itype, uplo, n, ap, bp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, n + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(in) :: bp(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk + real(qp) :: ajj, akk, bjj, bkk + complex(qp) :: ct + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPGST', -info ) + return + end if + if( itype==1 ) then + if( upper ) then + ! compute inv(u**h)*a*inv(u) + ! j1 and jj are the indices of a(1,j) and a(j,j) + jj = 0 + do j = 1, n + j1 = jj + 1 + jj = jj + j + ! compute the j-th column of the upper triangle of a + ap( jj ) = real( ap( jj ),KIND=qp) + bjj = real( bp( jj ),KIND=qp) + call stdlib_wtpsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', j,bp, ap( j1 ), 1 & + ) + call stdlib_whpmv( uplo, j-1, -cone, ap, bp( j1 ), 1, cone,ap( j1 ), 1 ) + + call stdlib_wdscal( j-1, one / bjj, ap( j1 ), 1 ) + ap( jj ) = ( ap( jj )-stdlib_wdotc( j-1, ap( j1 ), 1, bp( j1 ),1 ) ) / & + bjj + end do + else + ! compute inv(l)*a*inv(l**h) + ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1) + kk = 1 + do k = 1, n + k1k1 = kk + n - k + 1 + ! update the lower triangle of a(k:n,k:n) + akk = real( ap( kk ),KIND=qp) + bkk = real( bp( kk ),KIND=qp) + akk = akk / bkk**2 + ap( kk ) = akk + if( k ZHPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian, stored in packed format, + !> and B is also positive definite. + + subroutine stdlib_whpgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, n + ! Array Arguments + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ap(*), bp(*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: trans + integer(ilp) :: j, neig + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, neig + call stdlib_wtpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_wtpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_whpgv + + !> ZHPGVD: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_whpgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, rwork, lrwork,& + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ap(*), bp(*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: j, liwmin, lrwmin, lwmin, neig + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, neig + call stdlib_wtpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_wtpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_whpgvd + + !> ZHPGVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. Eigenvalues and eigenvectors can be selected by + !> specifying either a range of values or a range of indices for the + !> desired eigenvalues. + + subroutine stdlib_whpgvx( itype, jobz, range, uplo, n, ap, bp, vl, vu,il, iu, abstol, m, w, & + z, ldz, work, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(out) :: rwork(*), w(*) + complex(qp), intent(inout) :: ap(*), bp(*) + complex(qp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: alleig, indeig, upper, valeig, wantz + character :: trans + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else + if( valeig ) then + if( n>0 .and. vu<=vl ) then + info = -9 + end if + else if( indeig ) then + if( il<1 ) then + info = -10 + else if( iun ) then + info = -11 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, m + call stdlib_wtpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, m + call stdlib_wtpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_whpgvx + + !> ZHPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_whprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, work,& + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_whptrs( uplo, n, 1, afp, ipiv, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_whptrs( uplo, n, 1, afp, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_whptrs( uplo, n, 1, afp, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_whprfs + + !> ZHPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is Hermitian and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + + pure subroutine stdlib_whpsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZHPSVX: uses the diagonal pivoting factorization A = U*D*U**H or + !> A = L*D*L**H to compute the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix stored + !> in packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_whpsvx( fact, uplo, n, nrhs, ap, afp, ipiv, b, ldb, x,ldx, rcond, ferr, & + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(inout) :: afp(*) + complex(qp), intent(in) :: ap(*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlanhp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_whpcon( uplo, n, afp, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_whptrs( uplo, n, nrhs, afp, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_whprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZHPTRD: reduces a complex Hermitian matrix A stored in packed form to + !> real symmetric tridiagonal form T by a unitary similarity + !> transformation: Q**H * A * Q = T. + + pure subroutine stdlib_whptrd( uplo, n, ap, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, i1, i1i1, ii + complex(qp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPTRD', -info ) + return + end if + ! quick return if possible + if( n<=0 )return + if( upper ) then + ! reduce the upper triangle of a. + ! i1 is the index in ap of a(1,i+1). + i1 = n*( n-1 ) / 2 + 1 + ap( i1+n-1 ) = real( ap( i1+n-1 ),KIND=qp) + do i = n - 1, 1, -1 + ! generate elementary reflector h(i) = i - tau * v * v**h + ! to annihilate a(1:i-1,i+1) + alpha = ap( i1+i-1 ) + call stdlib_wlarfg( i, alpha, ap( i1 ), 1, taui ) + e( i ) = real( alpha,KIND=qp) + if( taui/=czero ) then + ! apply h(i) from both sides to a(1:i,1:i) + ap( i1+i-1 ) = cone + ! compute y := tau * a * v storing y in tau(1:i) + call stdlib_whpmv( uplo, i, taui, ap, ap( i1 ), 1, czero, tau,1 ) + ! compute w := y - 1/2 * tau * (y**h *v) * v + alpha = -chalf*taui*stdlib_wdotc( i, tau, 1, ap( i1 ), 1 ) + call stdlib_waxpy( i, alpha, ap( i1 ), 1, tau, 1 ) + ! apply the transformation as a rank-2 update: + ! a := a - v * w**h - w * v**h + call stdlib_whpr2( uplo, i, -cone, ap( i1 ), 1, tau, 1, ap ) + end if + ap( i1+i-1 ) = e( i ) + d( i+1 ) = real( ap( i1+i ),KIND=qp) + tau( i ) = taui + i1 = i1 - i + end do + d( 1 ) = real( ap( 1 ),KIND=qp) + else + ! reduce the lower triangle of a. ii is the index in ap of + ! a(i,i) and i1i1 is the index of a(i+1,i+1). + ii = 1 + ap( 1 ) = real( ap( 1 ),KIND=qp) + do i = 1, n - 1 + i1i1 = ii + n - i + 1 + ! generate elementary reflector h(i) = i - tau * v * v**h + ! to annihilate a(i+2:n,i) + alpha = ap( ii+1 ) + call stdlib_wlarfg( n-i, alpha, ap( ii+2 ), 1, taui ) + e( i ) = real( alpha,KIND=qp) + if( taui/=czero ) then + ! apply h(i) from both sides to a(i+1:n,i+1:n) + ap( ii+1 ) = cone + ! compute y := tau * a * v storing y in tau(i:n-1) + call stdlib_whpmv( uplo, n-i, taui, ap( i1i1 ), ap( ii+1 ), 1,czero, tau( i ),& + 1 ) + ! compute w := y - 1/2 * tau * (y**h *v) * v + alpha = -chalf*taui*stdlib_wdotc( n-i, tau( i ), 1, ap( ii+1 ),1 ) + call stdlib_waxpy( n-i, alpha, ap( ii+1 ), 1, tau( i ), 1 ) + ! apply the transformation as a rank-2 update: + ! a := a - v * w**h - w * v**h + call stdlib_whpr2( uplo, n-i, -cone, ap( ii+1 ), 1, tau( i ), 1,ap( i1i1 ) ) + + end if + ap( ii+1 ) = e( i ) + d( i ) = real( ap( ii ),KIND=qp) + tau( i ) = taui + ii = i1i1 + end do + d( n ) = real( ap( ii ),KIND=qp) + end if + return + end subroutine stdlib_whptrd + + !> ZHPTRF: computes the factorization of a complex Hermitian packed + !> matrix A using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_whptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(qp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt + complex(qp) :: d12, d21, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**h using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( ap( kc+k-1 ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_iwamax( k-1, ap( kc ), 1 ) + colmax = cabs1( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=qp) + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_iwamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, cabs1( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( ap( kpc+imax-1 ),KIND=qp) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_wswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = conjg( ap( knc+j-1 ) ) + ap( knc+j-1 ) = conjg( ap( kx ) ) + ap( kx ) = t + end do + ap( kx+kk-1 ) = conjg( ap( kx+kk-1 ) ) + r1 = real( ap( knc+kk-1 ),KIND=qp) + ap( knc+kk-1 ) = real( ap( kpc+kp-1 ),KIND=qp) + ap( kpc+kp-1 ) = r1 + if( kstep==2 ) then + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=qp) + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + else + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=qp) + if( kstep==2 )ap( kc-1 ) = real( ap( kc-1 ),KIND=qp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h + r1 = one / real( ap( kc+k-1 ),KIND=qp) + call stdlib_whpr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_wdscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h + if( k>2 ) then + d = stdlib_qlapy2( real( ap( k-1+( k-1 )*k / 2 ),KIND=qp),aimag( ap( k-1+( & + k-1 )*k / 2 ) ) ) + d22 = real( ap( k-1+( k-2 )*( k-1 ) / 2 ),KIND=qp) / d + d11 = real( ap( k+( k-1 )*k / 2 ),KIND=qp) / d + tt = one / ( d11*d22-one ) + d12 = ap( k-1+( k-1 )*k / 2 ) / d + d = tt / d + do j = k - 2, 1, -1 + wkm1 = d*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-conjg( d12 )*ap( j+( k-1 )*k & + / 2 ) ) + wk = d*( d22*ap( j+( k-1 )*k / 2 )-d12*ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *conjg( wk ) -ap( i+( k-2 )*( k-1 ) / 2 )*conjg( wkm1 ) + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + ap( j+( j-1 )*j / 2 ) = cmplx( real( ap( j+( j-1 )*j / 2 ),KIND=qp), & + zero,KIND=qp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( ap( kc ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( ap( kpc ),KIND=qp) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZHPTRI: computes the inverse of a complex Hermitian indefinite matrix + !> A in packed storage using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHPTRF. + + pure subroutine stdlib_whptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + real(qp) :: ak, akp1, d, t + complex(qp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,real,conjg + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = one / real( ap( kc+k-1 ),KIND=qp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_whpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kc ), 1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -real( stdlib_wdotc( k-1, work, 1, ap( kc ), 1 ),& + KIND=qp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( ap( kcnext+k-1 ) ) + ak = real( ap( kc+k-1 ),KIND=qp) / t + akp1 = real( ap( kcnext+k ),KIND=qp) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-one ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_whpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kc ), 1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -real( stdlib_wdotc( k-1, work, 1, ap( kc ), 1 ),& + KIND=qp) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_wdotc( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_wcopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_whpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -real( stdlib_wdotc( k-1, work, 1, ap( kcnext & + ),1 ),KIND=qp) + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_wswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = conjg( ap( kc+j-1 ) ) + ap( kc+j-1 ) = conjg( ap( kx ) ) + ap( kx ) = temp + end do + ap( kc+kp-1 ) = conjg( ap( kc+kp-1 ) ) + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = one / real( ap( kc ),KIND=qp) + ! compute column k of the inverse. + if( k ZHPTRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A stored in packed format using the factorization + !> A = U*D*U**H or A = L*D*L**H computed by ZHPTRF. + + pure subroutine stdlib_whptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + real(qp) :: s + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgeru( k-1, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=qp) / real( ap( kc+k-1 ),KIND=qp) + call stdlib_wdscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_wgeru( k-2, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_wgeru( k-2, nrhs, -cone, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, & + 1 ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc+k ),& + 1, cone, b( k+1, 1 ), ldb ) + call stdlib_wlacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZHSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a complex upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + + subroutine stdlib_whsein( side, eigsrc, initv, select, n, h, ldh, w, vl,ldvl, vr, ldvr, mm, & + m, work, rwork, ifaill,ifailr, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: eigsrc, initv, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldh, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: ifaill(*), ifailr(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: h(ldh,*) + complex(qp), intent(inout) :: vl(ldvl,*), vr(ldvr,*), w(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: rzero = 0.0e+0_qp + + + ! Local Scalars + logical(lk) :: bothv, fromqr, leftv, noinit, rightv + integer(ilp) :: i, iinfo, k, kl, kln, kr, ks, ldwork + real(qp) :: eps3, hnorm, smlnum, ulp, unfl + complex(qp) :: cdum, wk + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters. + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + fromqr = stdlib_lsame( eigsrc, 'Q' ) + noinit = stdlib_lsame( initv, 'N' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + m = 0 + do k = 1, n + if( select( k ) )m = m + 1 + end do + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.fromqr .and. .not.stdlib_lsame( eigsrc, 'N' ) ) then + info = -2 + else if( .not.noinit .and. .not.stdlib_lsame( initv, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldhkr ) then + do i = k, n - 1 + if( h( i+1, i )==czero )go to 50 + end do + 50 continue + kr = i + end if + end if + if( kl/=kln ) then + kln = kl + ! compute infinity-norm of submatrix h(kl:kr,kl:kr) if it + ! has not ben computed before. + hnorm = stdlib_wlanhs( 'I', kr-kl+1, h( kl, kl ), ldh, rwork ) + if( stdlib_qisnan( hnorm ) ) then + info = -6 + return + else if( hnorm>rzero ) then + eps3 = hnorm*ulp + else + eps3 = smlnum + end if + end if + ! perturb eigenvalue if it is close to any previous + ! selected eigenvalues affiliated to the submatrix + ! h(kl:kr,kl:kr). close roots are modified by eps3. + wk = w( k ) + 60 continue + do i = k - 1, kl, -1 + if( select( i ) .and. cabs1( w( i )-wk )0 ) then + info = info + 1 + ifaill( ks ) = k + else + ifaill( ks ) = 0 + end if + do i = 1, kl - 1 + vl( i, ks ) = czero + end do + end if + if( rightv ) then + ! compute right eigenvector. + call stdlib_wlaein( .true., noinit, kr, h, ldh, wk, vr( 1, ks ),work, ldwork, & + rwork, eps3, smlnum, iinfo ) + if( iinfo>0 ) then + info = info + 1 + ifailr( ks ) = k + else + ifailr( ks ) = 0 + end if + do i = kr + 1, n + vr( i, ks ) = czero + end do + end if + ks = ks + 1 + end if + end do loop_100 + return + end subroutine stdlib_whsein + + !> ZHSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. + + pure subroutine stdlib_whseqr( job, compz, n, ilo, ihi, h, ldh, w, z, ldz,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + character, intent(in) :: compz, job + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(qp), intent(out) :: w(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: nl = 49 + real(qp), parameter :: rzero = 0.0_qp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_wlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== nl allocates some local workspace to help small matrices + ! . through a rare stdlib_wlahqr failure. nl > ntiny = 15 is + ! . required and nl <= nmin = stdlib_ilaenv(ispec=12,...) is recom- + ! . mended. (the default value of nmin is 75.) using nl = 49 + ! . allows up to six simultaneous shifts and a 16-by-16 + ! . deflation window. ==== + + + + ! Local Arrays + complex(qp) :: hl(nl,nl), workl(nl) + ! Local Scalars + integer(ilp) :: kbot, nmin + logical(lk) :: initz, lquery, wantt, wantz + ! Intrinsic Functions + intrinsic :: real,cmplx,max,min + ! Executable Statements + ! ==== decode and check the input parameters. ==== + wantt = stdlib_lsame( job, 'S' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + work( 1 ) = cmplx( real( max( 1, n ),KIND=qp), rzero,KIND=qp) + lquery = lwork==-1 + info = 0 + if( .not.stdlib_lsame( job, 'E' ) .and. .not.wantt ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( ldh1 )call stdlib_wcopy( ilo-1, h, ldh+1, w, 1 ) + if( ihinmin ) then + call stdlib_wlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, ilo, ihi,z, ldz, work, & + lwork, info ) + else + ! ==== small matrix ==== + call stdlib_wlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, ilo, ihi,z, ldz, info ) + + if( info>0 ) then + ! ==== a rare stdlib_wlahqr failure! stdlib_wlaqr0 sometimes succeeds + ! . when stdlib_wlahqr fails. ==== + kbot = info + if( n>=nl ) then + ! ==== larger matrices have enough subdiagonal scratch + ! . space to call stdlib_wlaqr0 directly. ==== + call stdlib_wlaqr0( wantt, wantz, n, ilo, kbot, h, ldh, w,ilo, ihi, z, ldz,& + work, lwork, info ) + else + ! ==== tiny matrices don't have enough subdiagonal + ! . scratch space to benefit from stdlib_wlaqr0. hence, + ! . tiny matrices must be copied into a larger + ! . array before calling stdlib_wlaqr0. ==== + call stdlib_wlacpy( 'A', n, n, h, ldh, hl, nl ) + hl( n+1, n ) = czero + call stdlib_wlaset( 'A', nl, nl-n, czero, czero, hl( 1, n+1 ),nl ) + call stdlib_wlaqr0( wantt, wantz, nl, ilo, kbot, hl, nl, w,ilo, ihi, z, & + ldz, workl, nl, info ) + if( wantt .or. info/=0 )call stdlib_wlacpy( 'A', n, n, hl, nl, h, ldh ) + + end if + end if + end if + ! ==== clear out the trash, if necessary. ==== + if( ( wantt .or. info/=0 ) .and. n>2 )call stdlib_wlaset( 'L', n-2, n-2, czero, & + czero, h( 3, 1 ), ldh ) + ! ==== ensure reported workspace size is backward-compatible with + ! . previous lapack versions. ==== + work( 1 ) = cmplx( max( real( max( 1, n ),KIND=qp),real( work( 1 ),KIND=qp) ), & + rzero,KIND=qp) + end if + end subroutine stdlib_whseqr + + !> ZLA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_wla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, ldab, m, n, kl, ku, trans + ! Array Arguments + complex(qp), intent(in) :: ab(ldab,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke + complex(qp) :: cdum + ! Intrinsic Functions + intrinsic :: max,abs,real,aimag,sign + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( kl<0 .or. kl>m-1 ) then + info = 4 + else if( ku<0 .or. ku>n-1 ) then + info = 5 + else if( ldab0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + kd = ku + 1 + ke = kl + 1 + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( kd+i-j, j ) ) + symb_wero = symb_wero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_wero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( ke-i+j, i ) ) + symb_wero = symb_wero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_wero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( kd+i-j, j ) ) + symb_wero = symb_wero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( ke-i+j, i ) ) + symb_wero = symb_wero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_wla_gbamv + + !> ZLA_GBRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(qp) function stdlib_wla_gbrcond_c( trans, n, kl, ku, ab,ldab, afb, ldafb, ipiv,c, & + capply, info, work,rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, kl, ku, ldab, ldafb + integer(ilp) :: kd, ke + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: ab(ldab,*), afb(ldafb,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(in) :: c(*) + real(qp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(qp) :: ainvnm, anorm, tmp + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_wla_gbrcond_c = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 .or. kl>n-1 ) then + info = -3 + else if( ku<0 .or. ku>n-1 ) then + info = -4 + else if( ldab ZLA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(qp) function stdlib_wla_gbrpvgrw( n, kl, ku, ncols, ab,ldab, afb, ldafb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, kl, ku, ncols, ldab, ldafb + ! Array Arguments + complex(qp), intent(in) :: ab(ldab,*), afb(ldafb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j, kd + real(qp) :: amax, umax, rpvgrw + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + rpvgrw = one + kd = ku + 1 + do j = 1, ncols + amax = zero + umax = zero + do i = max( j-ku, 1 ), min( j+kl, n ) + amax = max( cabs1( ab( kd+i-j, j ) ), amax ) + end do + do i = max( j-ku, 1 ), j + umax = max( cabs1( afb( kd+i-j, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_wla_gbrpvgrw = rpvgrw + end function stdlib_wla_gbrpvgrw + + !> ZLA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_wla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + integer(ilp), intent(in) :: trans + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny + complex(qp) :: cdum + ! Intrinsic Functions + intrinsic :: max,abs,real,aimag,sign + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = 1, lenx + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_wero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = 1, lenx + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_wero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = 1, lenx + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_wero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = 1, lenx + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_wla_geamv + + !> ZLA_GERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(qp) function stdlib_wla_gercond_c( trans, n, a, lda, af,ldaf, ipiv, c, capply,info, & + work, rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(in) :: c(*) + real(qp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(qp) :: ainvnm, anorm, tmp + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_wla_gercond_c = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(qp) function stdlib_wla_gerpvgrw( n, ncols, a, lda, af,ldaf ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, ncols, lda, ldaf + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: amax, umax, rpvgrw + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: max,min,abs,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + rpvgrw = one + do j = 1, ncols + amax = zero + umax = zero + do i = 1, n + amax = max( cabs1( a( i, j ) ), amax ) + end do + do i = 1, j + umax = max( cabs1( af( i, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_wla_gerpvgrw = rpvgrw + end function stdlib_wla_gerpvgrw + + !> ZLA_SYAMV performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_wla_heamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n, uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: max,abs,sign,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) )then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if (.not.symb_wero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if (.not.symb_wero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_wla_heamv + + !> ZLA_HERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(qp) function stdlib_wla_hercond_c( uplo, n, a, lda, af,ldaf, ipiv, c, capply,info, work,& + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(in) :: c(*) + real(qp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase, i, j + real(qp) :: ainvnm, anorm, tmp + logical(lk) :: up, upper + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_wla_hercond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_HERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(qp) function stdlib_wla_herpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(qp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper, lsame + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if (upper) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( cabs1( a( i,j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i,j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_wsytrs. + ! calls to stdlib_dswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k-1 ) =max( cabs1( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k+1 ) =max( cabs1( af( i, k+1 ) ) , work( k+1 ) ) + end do + work(k) = max( cabs1( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_wla_herpvgrw = rpvgrw + end function stdlib_wla_herpvgrw + + !> ZLA_LIN_BERR: computes componentwise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the componentwise absolute value of the matrix + !> or vector Z. + + pure subroutine stdlib_wla_lin_berr( n, nz, nrhs, res, ayb, berr ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, nz, nrhs + ! Array Arguments + real(qp), intent(in) :: ayb(n,nrhs) + real(qp), intent(out) :: berr(nrhs) + complex(qp), intent(in) :: res(n,nrhs) + ! ===================================================================== + ! Local Scalars + real(qp) :: tmp,safe1 + integer(ilp) :: i, j + complex(qp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + complex(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! adding safe1 to the numerator guards against spuriously zero + ! residuals. a similar safeguard is in the cla_yyamv routine used + ! to compute ayb. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (nz+1)*safe1 + do j = 1, nrhs + berr(j) = zero + do i = 1, n + if (ayb(i,j) /= zero) then + tmp = (safe1 + cabs1(res(i,j)))/ayb(i,j) + berr(j) = max( berr(j), tmp ) + end if + ! if ayb is exactly 0.0_qp (and if computed by cla_yyamv), then we know + ! the true residual also must be exactly zero. + end do + end do + end subroutine stdlib_wla_lin_berr + + !> ZLA_PORCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector + + real(qp) function stdlib_wla_porcond_c( uplo, n, a, lda, af,ldaf, c, capply, info,work, & + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(in) :: c(*) + real(qp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase + real(qp) :: ainvnm, anorm, tmp + integer(ilp) :: i, j + logical(lk) :: up, upper + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_wla_porcond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(qp) function stdlib_wla_porpvgrw( uplo, ncols, a, lda, af,ldaf, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols, lda, ldaf + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + real(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: amax, umax, rpvgrw + logical(lk) :: upper + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + ! stdlib_qpotrf will have factored only the ncolsxncols leading minor, so + ! we restrict the growth search to that minor and use only the first + ! 2*ncols workspace entries. + rpvgrw = one + do i = 1, 2*ncols + work( i ) = zero + end do + ! find the max magnitude entry of each column. + if ( upper ) then + do j = 1, ncols + do i = 1, j + work( ncols+j ) =max( cabs1( a( i, j ) ), work( ncols+j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( ncols+j ) =max( cabs1( a( i, j ) ), work( ncols+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of the factor in + ! af. no pivoting, so no permutations. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do j = 1, ncols + do i = 1, j + work( j ) = max( cabs1( af( i, j ) ), work( j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( j ) = max( cabs1( af( i, j ) ), work( j ) ) + end do + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_wla_porpvgrw = rpvgrw + end function stdlib_wla_porpvgrw + + !> ZLA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_wla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + integer(ilp), intent(in) :: uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + real(qp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_wero + real(qp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: max,abs,sign,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) )then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_qlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_wero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_wero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_wero = .true. + else + symb_wero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_wero = symb_wero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_wero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_wla_syamv + + !> ZLA_SYRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(qp) function stdlib_wla_syrcond_c( uplo, n, a, lda, af,ldaf, ipiv, c, capply,info, work,& + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(in) :: c(*) + real(qp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase + real(qp) :: ainvnm, anorm, tmp + integer(ilp) :: i, j + logical(lk) :: up, upper + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_wla_syrcond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(qp) function stdlib_wla_syrpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), af(ldaf,*) + real(qp), intent(out) :: work(*) + integer(ilp), intent(in) :: ipiv(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(qp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if ( upper ) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_wsytrs. + ! calls to stdlib_dswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k-1 ) =max( cabs1( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k+1 ) =max( cabs1( af( i, k+1 ) ), work( k+1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_wla_syrpvgrw = rpvgrw + end function stdlib_wla_syrpvgrw + + !> ZLA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + + pure subroutine stdlib_wla_wwaddw( n, x, y, w ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(inout) :: x(*), y(*) + complex(qp), intent(in) :: w(*) + ! ===================================================================== + ! Local Scalars + complex(qp) :: s + integer(ilp) :: i + ! Executable Statements + do 10 i = 1, n + s = x(i) + w(i) + s = (s + s) - s + y(i) = ((x(i) - s) + w(i)) + y(i) + x(i) = s + 10 continue + return + end subroutine stdlib_wla_wwaddw + + !> ZLABRD: reduces the first NB rows and columns of a complex general + !> m by n matrix A to upper or lower real bidiagonal form by a unitary + !> transformation Q**H * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by ZGEBRD + + pure subroutine stdlib_wlabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldx, ldy, m, n, nb + ! Array Arguments + real(qp), intent(out) :: d(*), e(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: taup(*), tauq(*), x(ldx,*), y(ldy,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( m>=n ) then + ! reduce to upper bidiagonal form + loop_10: do i = 1, nb + ! update a(i:m,i) + call stdlib_wlacgv( i-1, y( i, 1 ), ldy ) + call stdlib_wgemv( 'NO TRANSPOSE', m-i+1, i-1, -cone, a( i, 1 ),lda, y( i, 1 ), & + ldy, cone, a( i, i ), 1 ) + call stdlib_wlacgv( i-1, y( i, 1 ), ldy ) + call stdlib_wgemv( 'NO TRANSPOSE', m-i+1, i-1, -cone, x( i, 1 ),ldx, a( 1, i ), & + 1, cone, a( i, i ), 1 ) + ! generate reflection q(i) to annihilate a(i+1:m,i) + alpha = a( i, i ) + call stdlib_wlarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1,tauq( i ) ) + d( i ) = real( alpha,KIND=qp) + if( i ZLACGV: conjugates a complex vector of length N. + + pure subroutine stdlib_wlacgv( n, x, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ioff + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( incx==1 ) then + do i = 1, n + x( i ) = conjg( x( i ) ) + end do + else + ioff = 1 + if( incx<0 )ioff = 1 - ( n-1 )*incx + do i = 1, n + x( ioff ) = conjg( x( ioff ) ) + ioff = ioff + incx + end do + end if + return + end subroutine stdlib_wlacgv + + !> ZLACN2: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + pure subroutine stdlib_wlacn2( n, v, x, est, kase, isave ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(qp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(inout) :: isave(3) + complex(qp), intent(out) :: v(*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + ! Local Scalars + integer(ilp) :: i, jlast + real(qp) :: absxi, altsgn, estold, safmin, temp + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag + ! Executable Statements + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + if( kase==0 ) then + do i = 1, n + x( i ) = cmplx( one / real( n,KIND=qp),KIND=qp) + end do + kase = 1 + isave( 1 ) = 1 + return + end if + go to ( 20, 40, 70, 90, 120 )isave( 1 ) + ! ................ entry (isave( 1 ) = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 130 + end if + est = stdlib_qzsum1( n, x, 1 ) + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=qp) / absxi,aimag( x( i ) ) / absxi,KIND=qp) + + else + x( i ) = cone + end if + end do + kase = 2 + isave( 1 ) = 2 + return + ! ................ entry (isave( 1 ) = 2) + ! first iteration. x has been overwritten by ctrans(a)*x. + 40 continue + isave( 2 ) = stdlib_iwmax1( n, x, 1 ) + isave( 3 ) = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = czero + end do + x( isave( 2 ) ) = cone + kase = 1 + isave( 1 ) = 3 + return + ! ................ entry (isave( 1 ) = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_wcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_qzsum1( n, v, 1 ) + ! test for cycling. + if( est<=estold )go to 100 + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=qp) / absxi,aimag( x( i ) ) / absxi,KIND=qp) + + else + x( i ) = cone + end if + end do + kase = 2 + isave( 1 ) = 4 + return + ! ................ entry (isave( 1 ) = 4) + ! x has been overwritten by ctrans(a)*x. + 90 continue + jlast = isave( 2 ) + isave( 2 ) = stdlib_iwmax1( n, x, 1 ) + if( ( abs( x( jlast ) )/=abs( x( isave( 2 ) ) ) ) .and.( isave( 3 )est ) then + call stdlib_wcopy( n, x, 1, v, 1 ) + est = temp + end if + 130 continue + kase = 0 + return + end subroutine stdlib_wlacn2 + + !> ZLACON: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + subroutine stdlib_wlacon( n, v, x, est, kase ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(qp), intent(inout) :: est + ! Array Arguments + complex(qp), intent(out) :: v(n) + complex(qp), intent(inout) :: x(n) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + ! Local Scalars + integer(ilp) :: i, iter, j, jlast, jump + real(qp) :: absxi, altsgn, estold, safmin, temp + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag + ! Save Statement + save + ! Executable Statements + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + if( kase==0 ) then + do i = 1, n + x( i ) = cmplx( one / real( n,KIND=qp),KIND=qp) + end do + kase = 1 + jump = 1 + return + end if + go to ( 20, 40, 70, 90, 120 )jump + ! ................ entry (jump = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 130 + end if + est = stdlib_qzsum1( n, x, 1 ) + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=qp) / absxi,aimag( x( i ) ) / absxi,KIND=qp) + + else + x( i ) = cone + end if + end do + kase = 2 + jump = 2 + return + ! ................ entry (jump = 2) + ! first iteration. x has been overwritten by ctrans(a)*x. + 40 continue + j = stdlib_iwmax1( n, x, 1 ) + iter = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = czero + end do + x( j ) = cone + kase = 1 + jump = 3 + return + ! ................ entry (jump = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_wcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_qzsum1( n, v, 1 ) + ! test for cycling. + if( est<=estold )go to 100 + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=qp) / absxi,aimag( x( i ) ) / absxi,KIND=qp) + + else + x( i ) = cone + end if + end do + kase = 2 + jump = 4 + return + ! ................ entry (jump = 4) + ! x has been overwritten by ctrans(a)*x. + 90 continue + jlast = j + j = stdlib_iwmax1( n, x, 1 ) + if( ( abs( x( jlast ) )/=abs( x( j ) ) ) .and.( iterest ) then + call stdlib_wcopy( n, x, 1, v, 1 ) + est = temp + end if + 130 continue + kase = 0 + return + end subroutine stdlib_wlacon + + !> ZLACP2: copies all or part of a real two-dimensional matrix A to a + !> complex matrix B. + + pure subroutine stdlib_wlacp2( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_wlacp2 + + !> ZLACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + + pure subroutine stdlib_wlacpy( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_wlacpy + + !> ZLACRM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by N and complex; B is N by N and real; + !> C is M by N and complex. + + pure subroutine stdlib_wlacrm( m, n, a, lda, b, ldb, c, ldc, rwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + ! Array Arguments + real(qp), intent(in) :: b(ldb,*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + ! quick return if possible. + if( ( m==0 ) .or. ( n==0 ) )return + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = real( a( i, j ),KIND=qp) + end do + end do + l = m*n + 1 + call stdlib_qgemm( 'N', 'N', m, n, n, one, rwork, m, b, ldb, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = rwork( l+( j-1 )*m+i-1 ) + end do + end do + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = aimag( a( i, j ) ) + end do + end do + call stdlib_qgemm( 'N', 'N', m, n, n, one, rwork, m, b, ldb, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = cmplx( real( c( i, j ),KIND=qp),rwork( l+( j-1 )*m+i-1 ),KIND=qp) + + end do + end do + return + end subroutine stdlib_wlacrm + + !> ZLACRT: performs the operation + !> ( c s )( x ) ==> ( x ) + !> ( -s c )( y ) ( y ) + !> where c and s are complex and the vectors x and y are complex. + + pure subroutine stdlib_wlacrt( n, cx, incx, cy, incy, c, s ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + complex(qp), intent(in) :: c, s + ! Array Arguments + complex(qp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(qp) :: ctemp + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 )go to 20 + ! code for unequal increments or equal increments not equal to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + ctemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - s*cx( ix ) + cx( ix ) = ctemp + ix = ix + incx + iy = iy + incy + end do + return + ! code for both increments equal to 1 + 20 continue + do i = 1, n + ctemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - s*cx( i ) + cx( i ) = ctemp + end do + return + end subroutine stdlib_wlacrt + + !> ZLADIV: := X / Y, where X and Y are complex. The computation of X / Y + !> will not overflow on an intermediary step unless the results + !> overflows. + + pure complex(qp) function stdlib_wladiv( x, y ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: x, y + ! ===================================================================== + ! Local Scalars + real(qp) :: zi, zr + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + call stdlib_qladiv( real( x,KIND=qp), aimag( x ), real( y,KIND=qp), aimag( y ), zr,zi ) + + stdlib_wladiv = cmplx( zr, zi,KIND=qp) + return + end function stdlib_wladiv + + !> Using the divide and conquer method, ZLAED0: computes all eigenvalues + !> of a symmetric tridiagonal matrix which is one diagonal block of + !> those from reducing a dense or band Hermitian matrix and + !> corresponding eigenvectors of the dense or band matrix. + + pure subroutine stdlib_wlaed0( qsiz, n, d, e, q, ldq, qstore, ldqs, rwork,iwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, ldqs, n, qsiz + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: q(ldq,*) + complex(qp), intent(out) :: qstore(ldqs,*) + ! ===================================================================== + ! warning: n could be as big as qsiz! + + ! Local Scalars + integer(ilp) :: curlvl, curprb, curr, i, igivcl, igivnm, igivpt, indxq, iperm, iprmpt, & + iq, iqptr, iwrem, j, k, lgn, ll, matsiz, msd2, smlsiz, smm1, spm1, spm2, submat, & + subpbs, tlvls + real(qp) :: temp + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max + ! Executable Statements + ! test the input parameters. + info = 0 + ! if( icompq < 0 .or. icompq > 2 ) then + ! info = -1 + ! else if( ( icompq == 1 ) .and. ( qsiz < max( 0, n ) ) ) + ! $ then + if( qsizsmlsiz ) then + do j = subpbs, 1, -1 + iwork( 2*j ) = ( iwork( j )+1 ) / 2 + iwork( 2*j-1 ) = iwork( j ) / 2 + end do + tlvls = tlvls + 1 + subpbs = 2*subpbs + go to 10 + end if + do j = 2, subpbs + iwork( j ) = iwork( j ) + iwork( j-1 ) + end do + ! divide the matrix into subpbs submatrices of size at most smlsiz+1 + ! using rank-1 modifications (cuts). + spm1 = subpbs - 1 + do i = 1, spm1 + submat = iwork( i ) + 1 + smm1 = submat - 1 + d( smm1 ) = d( smm1 ) - abs( e( smm1 ) ) + d( submat ) = d( submat ) - abs( e( smm1 ) ) + end do + indxq = 4*n + 3 + ! set up workspaces for eigenvalues only/accumulate new vectors + ! routine + temp = log( real( n,KIND=qp) ) / log( two ) + lgn = int( temp,KIND=ilp) + if( 2**lgn0 ) then + info = submat*( n+1 ) + submat + matsiz - 1 + return + end if + k = 1 + do j = submat, iwork( i+1 ) + iwork( indxq+j ) = k + k = k + 1 + end do + end do + ! successively merge eigensystems of adjacent submatrices + ! into eigensystem for the corresponding larger matrix. + ! while ( subpbs > 1 ) + curlvl = 1 + 80 continue + if( subpbs>1 ) then + spm2 = subpbs - 2 + do i = 0, spm2, 2 + if( i==0 ) then + submat = 1 + matsiz = iwork( 2 ) + msd2 = iwork( 1 ) + curprb = 0 + else + submat = iwork( i ) + 1 + matsiz = iwork( i+2 ) - iwork( i ) + msd2 = matsiz / 2 + curprb = curprb + 1 + end if + ! merge lower order eigensystems (of size msd2 and matsiz - msd2) + ! into an eigensystem of size matsiz. stdlib_wlaed7 handles the case + ! when the eigenvectors of a full or band hermitian matrix (which + ! was reduced to tridiagonal form) are desired. + ! i am free to use q as a valuable working space until loop 150. + call stdlib_wlaed7( matsiz, msd2, qsiz, tlvls, curlvl, curprb,d( submat ), & + qstore( 1, submat ), ldqs,e( submat+msd2-1 ), iwork( indxq+submat ),rwork( iq ), & + iwork( iqptr ), iwork( iprmpt ),iwork( iperm ), iwork( igivpt ),iwork( igivcl ), & + rwork( igivnm ),q( 1, submat ), rwork( iwrem ),iwork( subpbs+1 ), info ) + if( info>0 ) then + info = submat*( n+1 ) + submat + matsiz - 1 + return + end if + iwork( i / 2+1 ) = iwork( i+2 ) + end do + subpbs = subpbs / 2 + curlvl = curlvl + 1 + go to 80 + end if + ! end while + ! re-merge the eigenvalues/vectors which were deflated at the final + ! merge step. + do i = 1, n + j = iwork( indxq+i ) + rwork( i ) = d( j ) + call stdlib_wcopy( qsiz, qstore( 1, j ), 1, q( 1, i ), 1 ) + end do + call stdlib_qcopy( n, rwork, 1, d, 1 ) + return + end subroutine stdlib_wlaed0 + + !> ZLAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense or banded + !> Hermitian matrix that has been reduced to tridiagonal form. + !> T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) + !> where Z = Q**Hu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by SLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_wlaed7( n, cutpnt, qsiz, tlvls, curlvl, curpbm, d, q,ldq, rho, indxq, & + qstore, qptr, prmptr, perm,givptr, givcol, givnum, work, rwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, cutpnt, ldq, n, qsiz, tlvls + integer(ilp), intent(out) :: info + real(qp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + + integer(ilp), intent(out) :: indxq(*), iwork(*) + real(qp), intent(inout) :: d(*), givnum(2,*), qstore(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: q(ldq,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: coltyp, curr, i, idlmda, indx, indxc, indxp, iq, iw, iz, k, n1, n2, & + ptr + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + ! if( icompq<0 .or. icompq>1 ) then + ! info = -1 + ! else if( n<0 ) then + if( n<0 ) then + info = -1 + else if( min( 1, n )>cutpnt .or. n ZLAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_wlaed8( k, n, qsiz, q, ldq, d, rho, cutpnt, z, dlamda,q2, ldq2, w, & + indxp, indx, indxq, perm, givptr,givcol, givnum, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, ldq, ldq2, n, qsiz + integer(ilp), intent(out) :: givptr, info, k + real(qp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: givcol(2,*), indx(*), indxp(*), perm(*) + integer(ilp), intent(inout) :: indxq(*) + real(qp), intent(inout) :: d(*), z(*) + real(qp), intent(out) :: dlamda(*), givnum(2,*), w(*) + complex(qp), intent(inout) :: q(ldq,*) + complex(qp), intent(out) :: q2(ldq2,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: mone = -1.0_qp + + ! Local Scalars + integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2 + real(qp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -2 + else if( qsizn ) then + info = -8 + else if( ldq2n )go to 90 + if( rho*abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + indxp( k2 ) = j + else + ! check if eigenvalues are close enough to allow deflation. + s = z( jlam ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_qlapy2( c, s ) + t = d( j ) - d( jlam ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( j ) = tau + z( jlam ) = zero + ! record the appropriate givens rotation + givptr = givptr + 1 + givcol( 1, givptr ) = indxq( indx( jlam ) ) + givcol( 2, givptr ) = indxq( indx( j ) ) + givnum( 1, givptr ) = c + givnum( 2, givptr ) = s + call stdlib_wdrot( qsiz, q( 1, indxq( indx( jlam ) ) ), 1,q( 1, indxq( indx( j ) & + ) ), 1, c, s ) + t = d( jlam )*c*c + d( j )*s*s + d( j ) = d( jlam )*s*s + d( j )*c*c + d( jlam ) = t + k2 = k2 - 1 + i = 1 + 80 continue + if( k2+i<=n ) then + if( d( jlam ) ZLAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue W of a complex upper Hessenberg + !> matrix H. + + pure subroutine stdlib_wlaein( rightv, noinit, n, h, ldh, w, v, b, ldb, rwork,eps3, smlnum, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: noinit, rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldh, n + real(qp), intent(in) :: eps3, smlnum + complex(qp), intent(in) :: w + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(out) :: b(ldb,*) + complex(qp), intent(in) :: h(ldh,*) + complex(qp), intent(inout) :: v(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: tenth = 1.0e-1_qp + + + ! Local Scalars + character :: normin, trans + integer(ilp) :: i, ierr, its, j + real(qp) :: growto, nrmsml, rootn, rtemp, scale, vnorm + complex(qp) :: cdum, ei, ej, temp, x + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! growto is the threshold used in the acceptance test for an + ! eigenvector. + rootn = sqrt( real( n,KIND=qp) ) + growto = tenth / rootn + nrmsml = max( one, eps3*rootn )*smlnum + ! form b = h - w*i (except that the subdiagonal elements are not + ! stored). + do j = 1, n + do i = 1, j - 1 + b( i, j ) = h( i, j ) + end do + b( j, j ) = h( j, j ) - w + end do + if( noinit ) then + ! initialize v. + do i = 1, n + v( i ) = eps3 + end do + else + ! scale supplied initial vector. + vnorm = stdlib_qznrm2( n, v, 1 ) + call stdlib_wdscal( n, ( eps3*rootn ) / max( vnorm, nrmsml ), v, 1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing czero + ! pivots by eps3. + do i = 1, n - 1 + ei = h( i+1, i ) + if( cabs1( b( i, i ) )=growto*scale )go to 120 + ! choose new orthogonal starting vector and try again. + rtemp = eps3 / ( rootn+one ) + v( 1 ) = eps3 + do i = 2, n + v( i ) = rtemp + end do + v( n-its+1 ) = v( n-its+1 ) - eps3*rootn + end do + ! failure to find eigenvector in n iterations. + info = 1 + 120 continue + ! normalize eigenvector. + i = stdlib_iwamax( n, v, 1 ) + call stdlib_wdscal( n, one / cabs1( v( i ) ), v, 1 ) + return + end subroutine stdlib_wlaein + + !> ZLAESY: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> ( ( A, B );( B, C ) ) + !> provided the norm of the matrix of eigenvectors is larger than + !> some threshold value. + !> RT1 is the eigenvalue of larger absolute value, and RT2 of + !> smaller absolute value. If the eigenvectors are computed, then + !> on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence + !> [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] + !> [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] + + pure subroutine stdlib_wlaesy( a, b, c, rt1, rt2, evscal, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: a, b, c + complex(qp), intent(out) :: cs1, evscal, rt1, rt2, sn1 + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1_qp + + + + + + ! Local Scalars + real(qp) :: babs, evnorm, tabs, z + complex(qp) :: s, t, tmp + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! special case: the matrix is actually diagonal. + ! to avoid divide by zero later, we treat this case separately. + if( abs( b )==zero ) then + rt1 = a + rt2 = c + if( abs( rt1 )zero )t = z*sqrt( ( t / z )**2+( b / z )**2 ) + ! compute the two eigenvalues. rt1 and rt2 are exchanged + ! if necessary so that rt1 will have the greater magnitude. + rt1 = s + t + rt2 = s - t + if( abs( rt1 )one ) then + t = tabs*sqrt( ( one / tabs )**2+( sn1 / tabs )**2 ) + else + t = sqrt( cone+sn1*sn1 ) + end if + evnorm = abs( t ) + if( evnorm>=thresh ) then + evscal = cone / t + cs1 = evscal + sn1 = sn1*evscal + else + evscal = zero + end if + end if + return + end subroutine stdlib_wlaesy + + !> ZLAEV2: computes the eigendecomposition of a 2-by-2 Hermitian matrix + !> [ A B ] + !> [ CONJG(B) C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + !> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + !> eigenvector for RT1, giving the decomposition + !> [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] + !> [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. + + pure subroutine stdlib_wlaev2( a, b, c, rt1, rt2, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(out) :: cs1, rt1, rt2 + complex(qp), intent(in) :: a, b, c + complex(qp), intent(out) :: sn1 + ! ===================================================================== + + + ! Local Scalars + real(qp) :: t + complex(qp) :: w + ! Intrinsic Functions + intrinsic :: abs,real,conjg + ! Executable Statements + if( abs( b )==zero ) then + w = one + else + w = conjg( b ) / abs( b ) + end if + call stdlib_qlaev2( real( a,KIND=qp), abs( b ), real( c,KIND=qp), rt1, rt2, cs1, t ) + + sn1 = w*t + return + end subroutine stdlib_wlaev2 + + !> ZLAG2C: converts a COMPLEX*16 matrix, SA, to a COMPLEX matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> ZLAG2C checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_wlag2c( m, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + complex(dp), intent(out) :: sa(ldsa,*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: rmax + ! Intrinsic Functions + intrinsic :: real,aimag + ! Executable Statements + rmax = stdlib_dlamch( 'O' ) + do j = 1, n + do i = 1, m + if( ( real( a( i, j ),KIND=qp)<-rmax ) .or.( real( a( i, j ),KIND=qp)>rmax ) & + .or.( aimag( a( i, j ) )<-rmax ) .or.( aimag( a( i, j ) )>rmax ) ) then + info = 1 + go to 30 + end if + sa( i, j ) = a( i, j ) + end do + end do + info = 0 + 30 continue + return + end subroutine stdlib_wlag2c + + !> ZLAGS2: computes 2-by-2 unitary matrices U, V and Q, such + !> that if ( UPPER ) then + !> U**H *A*Q = U**H *( A1 A2 )*Q = ( x 0 ) + !> ( 0 A3 ) ( x x ) + !> and + !> V**H*B*Q = V**H *( B1 B2 )*Q = ( x 0 ) + !> ( 0 B3 ) ( x x ) + !> or if ( .NOT.UPPER ) then + !> U**H *A*Q = U**H *( A1 0 )*Q = ( x x ) + !> ( A2 A3 ) ( 0 x ) + !> and + !> V**H *B*Q = V**H *( B1 0 )*Q = ( x x ) + !> ( B2 B3 ) ( 0 x ) + !> where + !> U = ( CSU SNU ), V = ( CSV SNV ), + !> ( -SNU**H CSU ) ( -SNV**H CSV ) + !> Q = ( CSQ SNQ ) + !> ( -SNQ**H CSQ ) + !> The rows of the transformed A and B are parallel. Moreover, if the + !> input 2-by-2 matrix A is not zero, then the transformed (1,1) entry + !> of A is not zero. If the input matrices A and B are both not zero, + !> then the transformed (2,2) element of B is not zero, except when the + !> first rows of input A and B are parallel and the second rows are + !> zero. + + pure subroutine stdlib_wlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu, csv,snv, csq, snq ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: upper + real(qp), intent(in) :: a1, a3, b1, b3 + real(qp), intent(out) :: csq, csu, csv + complex(qp), intent(in) :: a2, b2 + complex(qp), intent(out) :: snq, snu, snv + ! ===================================================================== + + ! Local Scalars + real(qp) :: a, aua11, aua12, aua21, aua22, avb12, avb11, avb21, avb22, csl, csr, d, fb,& + fc, s1, s2, snl, snr, ua11r, ua22r, vb11r, vb22r + complex(qp) :: b, c, d1, r, t, ua11, ua12, ua21, ua22, vb11, vb12, vb21, vb22 + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag + ! Statement Functions + real(qp) :: abs1 + ! Statement Function Definitions + abs1( t ) = abs( real( t,KIND=qp) ) + abs( aimag( t ) ) + ! Executable Statements + if( upper ) then + ! input matrices a and b are upper triangular matrices + ! form matrix c = a*adj(b) = ( a b ) + ! ( 0 d ) + a = a1*b3 + d = a3*b1 + b = a2*b1 - a1*b2 + fb = abs( b ) + ! transform complex 2-by-2 matrix c to real matrix by unitary + ! diagonal matrix diag(1,d1). + d1 = one + if( fb/=zero )d1 = b / fb + ! the svd of real 2 by 2 triangular c + ! ( csl -snl )*( a b )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( 0 d ) ( -snr csr ) ( 0 t ) + call stdlib_qlasv2( a, fb, d, s1, s2, snr, csr, snl, csl ) + if( abs( csl )>=abs( snl ) .or. abs( csr )>=abs( snr ) )then + ! compute the (1,1) and (1,2) elements of u**h *a and v**h *b, + ! and (1,2) element of |u|**h *|a| and |v|**h *|b|. + ua11r = csl*a1 + ua12 = csl*a2 + d1*snl*a3 + vb11r = csr*b1 + vb12 = csr*b2 + d1*snr*b3 + aua12 = abs( csl )*abs1( a2 ) + abs( snl )*abs( a3 ) + avb12 = abs( csr )*abs1( b2 ) + abs( snr )*abs( b3 ) + ! zero (1,2) elements of u**h *a and v**h *b + if( ( abs( ua11r )+abs1( ua12 ) )==zero ) then + call stdlib_wlartg( -cmplx( vb11r,KIND=qp), conjg( vb12 ), csq, snq,r ) + + else if( ( abs( vb11r )+abs1( vb12 ) )==zero ) then + call stdlib_wlartg( -cmplx( ua11r,KIND=qp), conjg( ua12 ), csq, snq,r ) + + else if( aua12 / ( abs( ua11r )+abs1( ua12 ) )<=avb12 /( abs( vb11r )+abs1( vb12 & + ) ) ) then + call stdlib_wlartg( -cmplx( ua11r,KIND=qp), conjg( ua12 ), csq, snq,r ) + + else + call stdlib_wlartg( -cmplx( vb11r,KIND=qp), conjg( vb12 ), csq, snq,r ) + + end if + csu = csl + snu = -d1*snl + csv = csr + snv = -d1*snr + else + ! compute the (2,1) and (2,2) elements of u**h *a and v**h *b, + ! and (2,2) element of |u|**h *|a| and |v|**h *|b|. + ua21 = -conjg( d1 )*snl*a1 + ua22 = -conjg( d1 )*snl*a2 + csl*a3 + vb21 = -conjg( d1 )*snr*b1 + vb22 = -conjg( d1 )*snr*b2 + csr*b3 + aua22 = abs( snl )*abs1( a2 ) + abs( csl )*abs( a3 ) + avb22 = abs( snr )*abs1( b2 ) + abs( csr )*abs( b3 ) + ! zero (2,2) elements of u**h *a and v**h *b, and then swap. + if( ( abs1( ua21 )+abs1( ua22 ) )==zero ) then + call stdlib_wlartg( -conjg( vb21 ), conjg( vb22 ), csq, snq,r ) + else if( ( abs1( vb21 )+abs( vb22 ) )==zero ) then + call stdlib_wlartg( -conjg( ua21 ), conjg( ua22 ), csq, snq,r ) + else if( aua22 / ( abs1( ua21 )+abs1( ua22 ) )<=avb22 /( abs1( vb21 )+abs1( vb22 & + ) ) ) then + call stdlib_wlartg( -conjg( ua21 ), conjg( ua22 ), csq, snq,r ) + else + call stdlib_wlartg( -conjg( vb21 ), conjg( vb22 ), csq, snq,r ) + end if + csu = snl + snu = d1*csl + csv = snr + snv = d1*csr + end if + else + ! input matrices a and b are lower triangular matrices + ! form matrix c = a*adj(b) = ( a 0 ) + ! ( c d ) + a = a1*b3 + d = a3*b1 + c = a2*b3 - a3*b2 + fc = abs( c ) + ! transform complex 2-by-2 matrix c to real matrix by unitary + ! diagonal matrix diag(d1,1). + d1 = one + if( fc/=zero )d1 = c / fc + ! the svd of real 2 by 2 triangular c + ! ( csl -snl )*( a 0 )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( c d ) ( -snr csr ) ( 0 t ) + call stdlib_qlasv2( a, fc, d, s1, s2, snr, csr, snl, csl ) + if( abs( csr )>=abs( snr ) .or. abs( csl )>=abs( snl ) )then + ! compute the (2,1) and (2,2) elements of u**h *a and v**h *b, + ! and (2,1) element of |u|**h *|a| and |v|**h *|b|. + ua21 = -d1*snr*a1 + csr*a2 + ua22r = csr*a3 + vb21 = -d1*snl*b1 + csl*b2 + vb22r = csl*b3 + aua21 = abs( snr )*abs( a1 ) + abs( csr )*abs1( a2 ) + avb21 = abs( snl )*abs( b1 ) + abs( csl )*abs1( b2 ) + ! zero (2,1) elements of u**h *a and v**h *b. + if( ( abs1( ua21 )+abs( ua22r ) )==zero ) then + call stdlib_wlartg( cmplx( vb22r,KIND=qp), vb21, csq, snq, r ) + else if( ( abs1( vb21 )+abs( vb22r ) )==zero ) then + call stdlib_wlartg( cmplx( ua22r,KIND=qp), ua21, csq, snq, r ) + else if( aua21 / ( abs1( ua21 )+abs( ua22r ) )<=avb21 /( abs1( vb21 )+abs( vb22r & + ) ) ) then + call stdlib_wlartg( cmplx( ua22r,KIND=qp), ua21, csq, snq, r ) + else + call stdlib_wlartg( cmplx( vb22r,KIND=qp), vb21, csq, snq, r ) + end if + csu = csr + snu = -conjg( d1 )*snr + csv = csl + snv = -conjg( d1 )*snl + else + ! compute the (1,1) and (1,2) elements of u**h *a and v**h *b, + ! and (1,1) element of |u|**h *|a| and |v|**h *|b|. + ua11 = csr*a1 + conjg( d1 )*snr*a2 + ua12 = conjg( d1 )*snr*a3 + vb11 = csl*b1 + conjg( d1 )*snl*b2 + vb12 = conjg( d1 )*snl*b3 + aua11 = abs( csr )*abs( a1 ) + abs( snr )*abs1( a2 ) + avb11 = abs( csl )*abs( b1 ) + abs( snl )*abs1( b2 ) + ! zero (1,1) elements of u**h *a and v**h *b, and then swap. + if( ( abs1( ua11 )+abs1( ua12 ) )==zero ) then + call stdlib_wlartg( vb12, vb11, csq, snq, r ) + else if( ( abs1( vb11 )+abs1( vb12 ) )==zero ) then + call stdlib_wlartg( ua12, ua11, csq, snq, r ) + else if( aua11 / ( abs1( ua11 )+abs1( ua12 ) )<=avb11 /( abs1( vb11 )+abs1( vb12 & + ) ) ) then + call stdlib_wlartg( ua12, ua11, csq, snq, r ) + else + call stdlib_wlartg( vb12, vb11, csq, snq, r ) + end if + csu = snr + snu = conjg( d1 )*csr + csv = snl + snv = conjg( d1 )*csl + end if + end if + return + end subroutine stdlib_wlags2 + + !> ZLAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + + pure subroutine stdlib_wlagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(in) :: alpha, beta + ! Array Arguments + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(in) :: d(*), dl(*), du(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( n==0 )return + ! multiply b by beta if beta/=1. + if( beta==zero ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = zero + end do + end do + else if( beta==-one ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = -b( i, j ) + end do + end do + end if + if( alpha==one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b + a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + dl( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + dl( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + du( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! compute b := b + a**t * x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + du( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + du( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + dl( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'C' ) ) then + ! compute b := b + a**h * x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + conjg( d( 1 ) )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + conjg( d( 1 ) )*x( 1, j ) +conjg( dl( 1 ) )*x( 2, & + j ) + b( n, j ) = b( n, j ) + conjg( du( n-1 ) )*x( n-1, j ) + conjg( d( n ) )*x(& + n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + conjg( du( i-1 ) )*x( i-1, j ) + conjg( d( i ) )& + *x( i, j ) + conjg( dl( i ) )*x( i+1, j ) + end do + end if + end do + end if + else if( alpha==-one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b - a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - dl( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - dl( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - du( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! compute b := b - a**t *x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - du( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - du( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - dl( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'C' ) ) then + ! compute b := b - a**h *x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - conjg( d( 1 ) )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - conjg( d( 1 ) )*x( 1, j ) -conjg( dl( 1 ) )*x( 2, & + j ) + b( n, j ) = b( n, j ) - conjg( du( n-1 ) )*x( n-1, j ) - conjg( d( n ) )*x(& + n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - conjg( du( i-1 ) )*x( i-1, j ) - conjg( d( i ) )& + *x( i, j ) - conjg( dl( i ) )*x( i+1, j ) + end do + end if + end do + end if + end if + return + end subroutine stdlib_wlagtm + + !> ZLAHEF: computes a partial factorization of a complex Hermitian + !> matrix A using the Bunch-Kaufman diagonal pivoting method. The + !> partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_wlahef( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(qp) :: absakk, alpha, colmax, r1, rowmax, t + complex(qp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_iwamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=qp) + else + ! ============================================================ + ! begin pivot search + ! case(1) + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! begin pivot search along imax row + ! copy column imax to column kw-1 of w and update it + call stdlib_wcopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + w( imax, kw-1 ) = real( a( imax, imax ),KIND=qp) + call stdlib_wcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_wlacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + jmax = stdlib_iwamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) ) + end if + ! case(2) + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + ! case(3) + else if( abs( real( w( imax, kw-1 ),KIND=qp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_wcopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + ! case(4) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + ! end pivot search along imax row + end if + ! end pivot search + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=qp) + call stdlib_wcopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + call stdlib_wlacgv( kk-1-kp, a( kp, kp+1 ), lda ) + if( kp>1 )call stdlib_wcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(4)) + r1 = one / real( a( k, k ),KIND=qp) + call stdlib_wdscal( k-1, r1, a( 1, k ), 1 ) + ! (2) conjugate column w(kw) + call stdlib_wlacgv( k-1, w( 1, kw ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( conj(d21)*( d11 ) d21*( -1 ) ) + ! ( ( -1 ) ( d22 ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = t/d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0, since in 2x2 pivot case(4) + ! |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=qp)-one ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = conjg( d21 )*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_wlacgv( k-1, w( 1, kw ), 1 ) + call stdlib_wlacgv( k-2, w( 1, kw-1 ), 1 ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=qp) + call stdlib_wgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=qp) + end do + ! update the rectangular superdiagonal block + call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_wswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + kstep = 1 + ! copy column k of a to column k of w and update it + w( k, k ) = real( a( k, k ),KIND=qp) + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! begin pivot search along imax row + ! copy column imax to column k+1 of w and update it + call stdlib_wcopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_wlacgv( imax-k, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( a( imax, imax ),KIND=qp) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + ! case(3) + else if( abs( real( w( imax, k+1 ),KIND=qp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_wcopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + ! case(4) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + ! end pivot search along imax row + end if + ! end pivot search + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=qp) + call stdlib_wcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_wlacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_wswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_wswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=qp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_wcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_wswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_wlahef + + !> DLAHEF_AA factorizes a panel of a complex hermitian matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_wlahef_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), h(ldh,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + complex(qp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_whetrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:n, j) := a(j, j:n) - h(j:n, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:n, j) has been initialized to be a(j, j:n) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_wlacgv( j-k1, a( 1, j ), 1 ) + call stdlib_wgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( 1, j ), 1,& + cone, h( j, j ), 1 ) + call stdlib_wlacgv( j-k1, a( 1, j ), 1 ) + end if + ! copy h(i:n, i) into work + call stdlib_wcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:n) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:n) stores u(j-1, j:n) + alpha = -conjg( a( k-1, j ) ) + call stdlib_waxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = real( work( 1 ),KIND=qp) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_waxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:n)|) + i2 = stdlib_iwamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply hermitian pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:n) with a(i1+1:n, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_wswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + call stdlib_wlacgv( i2-i1, a( j1+i1-1, i1+1 ), lda ) + call stdlib_wlacgv( i2-i1-1, a( j1+i1, i2 ), 1 ) + ! swap a(i1, i2+1:n) with a(i2, i2+1:n) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_wswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_whetrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:n, j) := a(j:n, j) - h(j:n, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:n, j) has been initialized to be a(j:n, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_wlacgv( j-k1, a( j, 1 ), lda ) + call stdlib_wgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( j, 1 ), & + lda,cone, h( j, j ), 1 ) + call stdlib_wlacgv( j-k1, a( j, 1 ), lda ) + end if + ! copy h(j:n, j) into work + call stdlib_wcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:n, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -conjg( a( j, k-1 ) ) + call stdlib_waxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = real( work( 1 ),KIND=qp) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_waxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:n)|) + i2 = stdlib_iwamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply hermitian pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:n, i1) with a(i2, i1+1:n) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_wswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + call stdlib_wlacgv( i2-i1, a( i1+1, j1+i1-1 ), 1 ) + call stdlib_wlacgv( i2-i1-1, a( i2, j1+i1 ), lda ) + ! swap a(i2+1:n, i1) with a(i2+1:n, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_wswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j ZLAHEF_RK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> ZLAHEF_RK is an auxiliary routine called by ZHETRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_wlahef_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: w(ldw,*), e(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, k, kk, kkw, kp, kstep, kw, p + real(qp) :: absakk, alpha, colmax, dtemp, r1, rowmax, t, sfmin + complex(qp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_qlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = czero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 )call stdlib_wcopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 ) + w( k, kw ) = real( a( k, k ),KIND=qp) + if( k1 ) then + imax = stdlib_iwamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( w( k, kw ),KIND=qp) + if( k>1 )call stdlib_wcopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_wcopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),1 ) + + w( imax, kw-1 ) = real( a( imax, imax ),KIND=qp) + call stdlib_wcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_wlacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + itemp = stdlib_iwamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,kw-1 ),KIND=qp) )1 )call stdlib_wcopy( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! interchange rows k and p in the last k+1 to n columns of a + ! (columns k and k-1 of a for 2-by-2 pivot will be + ! later overwritten). interchange rows k and p + ! in last kkw to nb columns of w. + if( k1 )call stdlib_wcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(3)) + ! handle division by a small number + t = real( a( k, k ),KIND=qp) + if( abs( t )>=sfmin ) then + r1 = one / t + call stdlib_wdscal( k-1, r1, a( 1, k ), 1 ) + else + do ii = 1, k-1 + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(kw) + call stdlib_wlacgv( k-1, w( 1, kw ), 1 ) + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! handle division by a small number. (note: order of + ! operations is important) + ! = ( t*(( d11 )/conj(d21)) t*(( -1 )/d21 ) ) + ! ( (( -1 ) ) (( d22 ) ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0 in 2x2 pivot case(4), + ! since |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=qp)-one ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /d21 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /conjg( d21 ) ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = czero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = czero + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_wlacgv( k-1, w( 1, kw ), 1 ) + call stdlib_wlacgv( k-2, w( 1, kw-1 ), 1 ) + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=qp) + call stdlib_wgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=qp) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 (note that conjg(w) is actually stored) + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update column k of w + w( k, k ) = real( a( k, k ),KIND=qp) + if( k1 ) then + call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, 1 ), & + ldw, cone, w( k, k ), 1 ) + w( k, k ) = real( w( k, k ),KIND=qp) + end if + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( w( k, k ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), lda, w( & + imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( w( imax, k+1 ),KIND=qp) + end if + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_iwamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,k+1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,k+1 ),KIND=qp) )1 )call stdlib_wswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + call stdlib_wswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw ) + end if + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=qp) + call stdlib_wcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_wlacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_wswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_wswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=qp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_wcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=sfmin ) then + r1 = one / t + call stdlib_wdscal( n-k, r1, a( k+1, k ), 1 ) + else + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(k) + call stdlib_wlacgv( n-k, w( k+1, k ), 1 ) + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2 + ! block d(k:k+1,k:k+1) in columns k and k+1 of a. + ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit + ! block and not stored. + ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1) + ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) = + ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) ) + if( k ZLAHEF_ROOK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting + !> method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> ZLAHEF_ROOK is an auxiliary routine called by ZHETRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_wlahef_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, jp1, jp2, k, kk, kkw, kp, kstep, kw, & + p + real(qp) :: absakk, alpha, colmax, dtemp, r1, rowmax, t, sfmin + complex(qp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_qlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 )call stdlib_wcopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 ) + w( k, kw ) = real( a( k, k ),KIND=qp) + if( k1 ) then + imax = stdlib_iwamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( w( k, kw ),KIND=qp) + if( k>1 )call stdlib_wcopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_wcopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),1 ) + + w( imax, kw-1 ) = real( a( imax, imax ),KIND=qp) + call stdlib_wcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_wlacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + itemp = stdlib_iwamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,kw-1 ),KIND=qp) )1 )call stdlib_wcopy( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! interchange rows k and p in the last k+1 to n columns of a + ! (columns k and k-1 of a for 2-by-2 pivot will be + ! later overwritten). interchange rows k and p + ! in last kkw to nb columns of w. + if( k1 )call stdlib_wcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(3)) + ! handle division by a small number + t = real( a( k, k ),KIND=qp) + if( abs( t )>=sfmin ) then + r1 = one / t + call stdlib_wdscal( k-1, r1, a( 1, k ), 1 ) + else + do ii = 1, k-1 + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(kw) + call stdlib_wlacgv( k-1, w( 1, kw ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! handle division by a small number. (note: order of + ! operations is important) + ! = ( t*(( d11 )/conj(d21)) t*(( -1 )/d21 ) ) + ! ( (( -1 ) ) (( d22 ) ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0 in 2x2 pivot case(4), + ! since |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=qp)-one ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /d21 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /conjg( d21 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_wlacgv( k-1, w( 1, kw ), 1 ) + call stdlib_wlacgv( k-2, w( 1, kw-1 ), 1 ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=qp) + call stdlib_wgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=qp) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in of rows in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows j and jp2 + ! (or j and jp2, and j+1 and jp1) at each step j + kstep = 1 + jp1 = 1 + ! (here, j is a diagonal index) + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + ! (here, j is a diagonal index) + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_wswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = jj + 1 + if( kstep==2 .and. jp1/=jj .and. j<=n )call stdlib_wswap( n-j+1, a( jp1, j ), & + lda, a( jj, j ), lda ) + if( j=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update column k of w + w( k, k ) = real( a( k, k ),KIND=qp) + if( k1 ) then + call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, 1 ), & + ldw, cone, w( k, k ), 1 ) + w( k, k ) = real( w( k, k ),KIND=qp) + end if + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( w( k, k ),KIND=qp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), lda, w( & + imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( w( imax, k+1 ),KIND=qp) + end if + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_iwamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,k+1 ),KIND=qp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,k+1 ),KIND=qp) )1 )call stdlib_wswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + call stdlib_wswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw ) + end if + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=qp) + call stdlib_wcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_wlacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_wswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_wswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=qp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_wcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=sfmin ) then + r1 = one / t + call stdlib_wdscal( n-k, r1, a( k+1, k ), 1 ) + else + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(k) + call stdlib_wlacgv( n-k, w( k+1, k ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2 + ! block d(k:k+1,k:k+1) in columns k and k+1 of a. + ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit + ! block and not stored. + ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1) + ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) = + ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) ) + if( k=1 )call stdlib_wswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = jj -1 + if( kstep==2 .and. jp1/=jj .and. j>=1 )call stdlib_wswap( j, a( jp1, 1 ), lda, a(& + jj, 1 ), lda ) + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_wlahef_rook + + !> ZLAHQR: is an auxiliary routine called by CHSEQR to update the + !> eigenvalues and Schur decomposition already computed by CHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + + pure subroutine stdlib_wlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info & + ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(qp), intent(out) :: w(*) + ! ========================================================= + ! Parameters + real(qp), parameter :: rzero = 0.0_qp + real(qp), parameter :: rone = 1.0_qp + real(qp), parameter :: dat1 = 3.0_qp/4.0_qp + integer(ilp), parameter :: kexsh = 10 + + + + + ! Local Scalars + complex(qp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y + real(qp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, & + ulp + integer(ilp) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl + ! Local Arrays + complex(qp) :: v(2) + ! Statement Functions + real(qp) :: cabs1 + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + if( ilo==ihi ) then + w( ilo ) = h( ilo, ilo ) + return + end if + ! ==== clear out the trash ==== + do j = ilo, ihi - 3 + h( j+2, j ) = czero + h( j+3, j ) = czero + end do + if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero + ! ==== ensure that subdiagonal entries are real ==== + if( wantt ) then + jlo = 1 + jhi = n + else + jlo = ilo + jhi = ihi + end if + do i = ilo + 1, ihi + if( aimag( h( i, i-1 ) )/=rzero ) then + ! ==== the following redundant normalization + ! . avoids problems with both gradual and + ! . sudden underflow in abs(h(i,i-1)) ==== + sc = h( i, i-1 ) / cabs1( h( i, i-1 ) ) + sc = conjg( sc ) / abs( sc ) + h( i, i-1 ) = abs( h( i, i-1 ) ) + call stdlib_wscal( jhi-i+1, sc, h( i, i ), ldh ) + call stdlib_wscal( min( jhi, i+1 )-jlo+1, conjg( sc ),h( jlo, i ), 1 ) + if( wantz )call stdlib_wscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1 ) + end if + end do + nh = ihi - ilo + 1 + nz = ihiz - iloz + 1 + ! set machine-dependent constants for the stopping criterion. + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( nh,KIND=qp) / ulp ) + ! i1 and i2 are the indices of the first row and last column of h + ! to which transformations must be applied. if eigenvalues only are + ! being computed, i1 and i2 are set inside the main loop. + if( wantt ) then + i1 = 1 + i2 = n + end if + ! itmax is the total number of qr iterations allowed. + itmax = 30 * max( 10, nh ) + ! kdefl counts the number of iterations since a deflation + kdefl = 0 + ! the main loop begins here. i is the loop index and decreases from + ! ihi to ilo in steps of 1. each iteration of the loop works + ! with the active submatrix in rows and columns l to i. + ! eigenvalues i+1 to ihi have already converged. either l = ilo, or + ! h(l,l-1) is negligible so that the matrix splits. + i = ihi + 30 continue + if( i=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=qp) ) + if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=qp) ) + end if + ! ==== the following is a conservative small subdiagonal + ! . deflation criterion due to ahues + ! . 1997). it has better mathematical foundation and + ! . improves accuracy in some examples. ==== + if( abs( real( h( k, k-1 ),KIND=qp) )<=ulp*tst ) then + ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) + ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) + aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) + bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) + s = aa + ab + if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50 + end if + end do + 50 continue + l = k + if( l>ilo ) then + ! h(l,l-1) is negligible + h( l, l-1 ) = czero + end if + ! exit from loop if a submatrix of order 1 has split off. + if( l>=i )go to 140 + kdefl = kdefl + 1 + ! now the active submatrix is in rows and columns l to i. if + ! eigenvalues only are being computed, only the active submatrix + ! need be transformed. + if( .not.wantt ) then + i1 = l + i2 = i + end if + if( mod(kdefl,2*kexsh)==0 ) then + ! exceptional shift. + s = dat1*abs( real( h( i, i-1 ),KIND=qp) ) + t = s + h( i, i ) + else if( mod(kdefl,kexsh)==0 ) then + ! exceptional shift. + s = dat1*abs( real( h( l+1, l ),KIND=qp) ) + t = s + h( l, l ) + else + ! wilkinson's shift. + t = h( i, i ) + u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) ) + s = cabs1( u ) + if( s/=rzero ) then + x = half*( h( i-1, i-1 )-t ) + sx = cabs1( x ) + s = max( s, cabs1( x ) ) + y = s*sqrt( ( x / s )**2+( u / s )**2 ) + if( sx>rzero ) then + if( real( x / sx,KIND=qp)*real( y,KIND=qp)+aimag( x / sx )*aimag( y )& + m )call stdlib_wcopy( 2, h( k, k-1 ), 1, v, 1 ) + call stdlib_wlarfg( 2, v( 1 ), v( 2 ), 1, t1 ) + if( k>m ) then + h( k, k-1 ) = v( 1 ) + h( k+1, k-1 ) = czero + end if + v2 = v( 2 ) + t2 = real( t1*v2,KIND=qp) + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j ) + h( k, j ) = h( k, j ) - sum + h( k+1, j ) = h( k+1, j ) - sum*v2 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+2,i). + do j = i1, min( k+2, i ) + sum = t1*h( j, k ) + t2*h( j, k+1 ) + h( j, k ) = h( j, k ) - sum + h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 ) + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = t1*z( j, k ) + t2*z( j, k+1 ) + z( j, k ) = z( j, k ) - sum + z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 ) + end do + end if + if( k==m .and. m>l ) then + ! if the qr step was started at row m > l because two + ! consecutive small subdiagonals were found, then extra + ! scaling must be performed to ensure that h(m,m-1) remains + ! real. + temp = cone - t1 + temp = temp / abs( temp ) + h( m+1, m ) = h( m+1, m )*conjg( temp ) + if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp + do j = m, i + if( j/=m+1 ) then + if( i2>j )call stdlib_wscal( i2-j, temp, h( j, j+1 ), ldh ) + call stdlib_wscal( j-i1, conjg( temp ), h( i1, j ), 1 ) + if( wantz ) then + call stdlib_wscal( nz, conjg( temp ), z( iloz, j ),1 ) + end if + end if + end do + end if + end do loop_120 + ! ensure that h(i,i-1) is real. + temp = h( i, i-1 ) + if( aimag( temp )/=rzero ) then + rtemp = abs( temp ) + h( i, i-1 ) = rtemp + temp = temp / rtemp + if( i2>i )call stdlib_wscal( i2-i, conjg( temp ), h( i, i+1 ), ldh ) + call stdlib_wscal( i-i1, temp, h( i1, i ), 1 ) + if( wantz ) then + call stdlib_wscal( nz, temp, z( iloz, i ), 1 ) + end if + end if + end do loop_130 + ! failure to converge in remaining number of iterations + info = i + return + 140 continue + ! h(i,i-1) is negligible: cone eigenvalue has converged. + w( i ) = h( i, i ) + ! reset deflation counter + kdefl = 0 + ! return to start of the main loop with new value of i. + i = l - 1 + go to 30 + 150 continue + return + end subroutine stdlib_wlahqr + + !> ZLAHR2: reduces the first NB columns of A complex general n-BY-(n-k+1) + !> matrix A so that elements below the k-th subdiagonal are zero. The + !> reduction is performed by an unitary similarity transformation + !> Q**H * A * Q. The routine returns the matrices V and T which determine + !> Q as a block reflector I - V*T*V**H, and also the matrix Y = A * V * T. + !> This is an auxiliary routine called by ZGEHRD. + + pure subroutine stdlib_wlahr2( n, k, nb, a, lda, tau, t, ldt, y, ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: k, lda, ldt, ldy, n, nb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: t(ldt,nb), tau(nb), y(ldy,nb) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(qp) :: ei + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( n<=1 )return + loop_10: do i = 1, nb + if( i>1 ) then + ! update a(k+1:n,i) + ! update i-th column of a - y * v**h + call stdlib_wlacgv( i-1, a( k+i-1, 1 ), lda ) + call stdlib_wgemv( 'NO TRANSPOSE', n-k, i-1, -cone, y(k+1,1), ldy,a( k+i-1, 1 ), & + lda, cone, a( k+1, i ), 1 ) + call stdlib_wlacgv( i-1, a( k+i-1, 1 ), lda ) + ! apply i - v * t**h * v**h to this column (call it b) from the + ! left, using the last column of t as workspace + ! let v = ( v1 ) and b = ( b1 ) (first i-1 rows) + ! ( v2 ) ( b2 ) + ! where v1 is unit lower triangular + ! w := v1**h * b1 + call stdlib_wcopy( i-1, a( k+1, i ), 1, t( 1, nb ), 1 ) + call stdlib_wtrmv( 'LOWER', 'CONJUGATE TRANSPOSE', 'UNIT',i-1, a( k+1, 1 ),lda, & + t( 1, nb ), 1 ) + ! w := w + v2**h * b2 + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', n-k-i+1, i-1,cone, a( k+i, 1 ),lda, a( & + k+i, i ), 1, cone, t( 1, nb ), 1 ) + ! w := t**h * w + call stdlib_wtrmv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, & + nb ), 1 ) + ! b2 := b2 - v2*w + call stdlib_wgemv( 'NO TRANSPOSE', n-k-i+1, i-1, -cone,a( k+i, 1 ),lda, t( 1, nb & + ), 1, cone, a( k+i, i ), 1 ) + ! b1 := b1 - v1*w + call stdlib_wtrmv( 'LOWER', 'NO TRANSPOSE','UNIT', i-1,a( k+1, 1 ), lda, t( 1, & + nb ), 1 ) + call stdlib_waxpy( i-1, -cone, t( 1, nb ), 1, a( k+1, i ), 1 ) + a( k+i-1, i-1 ) = ei + end if + ! generate the elementary reflector h(i) to annihilate + ! a(k+i+1:n,i) + call stdlib_wlarfg( n-k-i+1, a( k+i, i ), a( min( k+i+1, n ), i ), 1,tau( i ) ) + + ei = a( k+i, i ) + a( k+i, i ) = cone + ! compute y(k+1:n,i) + call stdlib_wgemv( 'NO TRANSPOSE', n-k, n-k-i+1,cone, a( k+1, i+1 ),lda, a( k+i, i )& + , 1, czero, y( k+1, i ), 1 ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', n-k-i+1, i-1,cone, a( k+i, 1 ), lda,a( k+& + i, i ), 1, czero, t( 1, i ), 1 ) + call stdlib_wgemv( 'NO TRANSPOSE', n-k, i-1, -cone,y( k+1, 1 ), ldy,t( 1, i ), 1, & + cone, y( k+1, i ), 1 ) + call stdlib_wscal( n-k, tau( i ), y( k+1, i ), 1 ) + ! compute t(1:i,i) + call stdlib_wscal( i-1, -tau( i ), t( 1, i ), 1 ) + call stdlib_wtrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, i ), 1 ) + + t( i, i ) = tau( i ) + end do loop_10 + a( k+nb, nb ) = ei + ! compute y(1:k,1:nb) + call stdlib_wlacpy( 'ALL', k, nb, a( 1, 2 ), lda, y, ldy ) + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE','UNIT', k, nb,cone, a( k+1, 1 ), & + lda, y, ldy ) + if( n>k+nb )call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', k,nb, n-k-nb, cone,a( 1,& + 2+nb ), lda, a( k+1+nb, 1 ), lda, cone, y,ldy ) + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE','NON-UNIT', k, nb,cone, t, ldt, y, & + ldy ) + return + end subroutine stdlib_wlahr2 + + !> ZLAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then ZLAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**H gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**H and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] + !> [ conjg(gamma) ] + !> where alpha = x**H * w. + + pure subroutine stdlib_wlaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: j, job + real(qp), intent(in) :: sest + real(qp), intent(out) :: sestpr + complex(qp), intent(out) :: c, s + complex(qp), intent(in) :: gamma + ! Array Arguments + complex(qp), intent(in) :: w(j), x(j) + ! ===================================================================== + + + ! Local Scalars + real(qp) :: absalp, absest, absgam, b, eps, norma, s1, s2, scl, t, test, tmp, zeta1, & + zeta2 + complex(qp) :: alpha, cosine, sine + ! Intrinsic Functions + intrinsic :: abs,conjg,max,sqrt + ! Executable Statements + eps = stdlib_qlamch( 'EPSILON' ) + alpha = stdlib_wdotc( j, x, 1, w, 1 ) + absalp = abs( alpha ) + absgam = abs( gamma ) + absest = abs( sest ) + if( job==1 ) then + ! estimating largest singular value + ! special cases + if( sest==zero ) then + s1 = max( absgam, absalp ) + if( s1==zero ) then + s = zero + c = one + sestpr = zero + else + s = alpha / s1 + c = gamma / s1 + tmp = real( sqrt( s*conjg( s )+c*conjg( c ) ),KIND=qp) + s = s / tmp + c = c / tmp + sestpr = s1*tmp + end if + return + else if( absgam<=eps*absest ) then + s = one + c = zero + tmp = max( absest, absalp ) + s1 = absest / tmp + s2 = absalp / tmp + sestpr = tmp*sqrt( s1*s1+s2*s2 ) + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = one + c = zero + sestpr = s2 + else + s = zero + c = one + sestpr = s1 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + scl = sqrt( one+tmp*tmp ) + sestpr = s2*scl + s = ( alpha / s2 ) / scl + c = ( gamma / s2 ) / scl + else + tmp = s2 / s1 + scl = sqrt( one+tmp*tmp ) + sestpr = s1*scl + s = ( alpha / s1 ) / scl + c = ( gamma / s1 ) / scl + end if + return + else + ! normal case + zeta1 = absalp / absest + zeta2 = absgam / absest + b = ( one-zeta1*zeta1-zeta2*zeta2 )*half + c = zeta1*zeta1 + if( b>zero ) then + t = real( c / ( b+sqrt( b*b+c ) ),KIND=qp) + else + t = real( sqrt( b*b+c ) - b,KIND=qp) + end if + sine = -( alpha / absest ) / t + cosine = -( gamma / absest ) / ( one+t ) + tmp = real( sqrt( sine * conjg( sine )+ cosine * conjg( cosine ) ),KIND=qp) + + s = sine / tmp + c = cosine / tmp + sestpr = sqrt( t+one )*absest + return + end if + else if( job==2 ) then + ! estimating smallest singular value + ! special cases + if( sest==zero ) then + sestpr = zero + if( max( absgam, absalp )==zero ) then + sine = one + cosine = zero + else + sine = -conjg( gamma ) + cosine = conjg( alpha ) + end if + s1 = max( abs( sine ), abs( cosine ) ) + s = sine / s1 + c = cosine / s1 + tmp = real( sqrt( s*conjg( s )+c*conjg( c ) ),KIND=qp) + s = s / tmp + c = c / tmp + return + else if( absgam<=eps*absest ) then + s = zero + c = one + sestpr = absgam + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = zero + c = one + sestpr = s1 + else + s = one + c = zero + sestpr = s2 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + scl = sqrt( one+tmp*tmp ) + sestpr = absest*( tmp / scl ) + s = -( conjg( gamma ) / s2 ) / scl + c = ( conjg( alpha ) / s2 ) / scl + else + tmp = s2 / s1 + scl = sqrt( one+tmp*tmp ) + sestpr = absest / scl + s = -( conjg( gamma ) / s1 ) / scl + c = ( conjg( alpha ) / s1 ) / scl + end if + return + else + ! normal case + zeta1 = absalp / absest + zeta2 = absgam / absest + norma = max( one+zeta1*zeta1+zeta1*zeta2,zeta1*zeta2+zeta2*zeta2 ) + ! see if root is closer to zero or to one + test = one + two*( zeta1-zeta2 )*( zeta1+zeta2 ) + if( test>=zero ) then + ! root is close to zero, compute directly + b = ( zeta1*zeta1+zeta2*zeta2+one )*half + c = zeta2*zeta2 + t = real( c / ( b+sqrt( abs( b*b-c ) ) ),KIND=qp) + sine = ( alpha / absest ) / ( one-t ) + cosine = -( gamma / absest ) / t + sestpr = sqrt( t+four*eps*eps*norma )*absest + else + ! root is closer to one, shift by that amount + b = ( zeta2*zeta2+zeta1*zeta1-one )*half + c = zeta1*zeta1 + if( b>=zero ) then + t = -c / ( b+sqrt( b*b+c ) ) + else + t = b - sqrt( b*b+c ) + end if + sine = -( alpha / absest ) / t + cosine = -( gamma / absest ) / ( one+t ) + sestpr = sqrt( one+t+four*eps*eps*norma )*absest + end if + tmp = real( sqrt( sine * conjg( sine )+ cosine * conjg( cosine ) ),KIND=qp) + + s = sine / tmp + c = cosine / tmp + return + end if + end if + return + end subroutine stdlib_wlaic1 + + !> ZLALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + + pure subroutine stdlib_wlals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: givptr, icompq, k, ldb, ldbx, ldgcol, ldgnum, nl, nr, nrhs,& + sqre + integer(ilp), intent(out) :: info + real(qp), intent(in) :: c, s + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), perm(*) + real(qp), intent(in) :: difl(*), difr(ldgnum,*), givnum(ldgnum,*), poles(ldgnum,*), z(& + *) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(out) :: bx(ldbx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jcol, jrow, m, n, nlp1 + real(qp) :: diflj, difrj, dj, dsigj, dsigjp, temp + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( nrhs<1 ) then + info = -5 + else if( ldb ZLALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by ZLALSA. + + pure subroutine stdlib_wlalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, difl,& + difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, rwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldb, ldbx, ldgcol, ldu, n, nrhs, smlsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), givptr(*), k(*), perm(ldgcol,*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(in) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), s(& + *), u(ldu,*), vt(ldu,*), z(ldu,*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(out) :: bx(ldbx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, im1, inode, j, jcol, jimag, jreal, jrow, lf, ll, lvl, lvl2, & + nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqre + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n ZLALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_wlalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, rwork, & + iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: ldb, n, nrhs, smlsiz + real(qp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: bx, bxst, c, difl, difr, givcol, givnum, givptr, i, icmpq1, icmpq2, & + irwb, irwib, irwrb, irwu, irwvt, irwwrk, iwk, j, jcol, jimag, jreal, jrow, k, nlvl, & + nm1, nrwork, nsize, nsub, perm, poles, s, sizei, smlszp, sqre, st, st1, u, vt, & + z + real(qp) :: cs, eps, orgnrm, rcnd, r, sn, tol + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -3 + else if( nrhs<1 ) then + info = -4 + else if( ( ldb<1 ) .or. ( ldb=one ) ) then + rcnd = eps + else + rcnd = rcond + end if + rank = 0 + ! quick return if possible. + if( n==0 ) then + return + else if( n==1 ) then + if( d( 1 )==zero ) then + call stdlib_wlaset( 'A', 1, nrhs, czero, czero, b, ldb ) + else + rank = 1 + call stdlib_wlascl( 'G', 0, 0, d( 1 ), one, 1, nrhs, b, ldb, info ) + d( 1 ) = abs( d( 1 ) ) + end if + return + end if + ! rotate the matrix if it is lower bidiagonal. + if( uplo=='L' ) then + do i = 1, n - 1 + call stdlib_qlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( nrhs==1 ) then + call stdlib_wdrot( 1, b( i, 1 ), 1, b( i+1, 1 ), 1, cs, sn ) + else + rwork( i*2-1 ) = cs + rwork( i*2 ) = sn + end if + end do + if( nrhs>1 ) then + do i = 1, nrhs + do j = 1, n - 1 + cs = rwork( j*2-1 ) + sn = rwork( j*2 ) + call stdlib_wdrot( 1, b( j, i ), 1, b( j+1, i ), 1, cs, sn ) + end do + end do + end if + end if + ! scale. + nm1 = n - 1 + orgnrm = stdlib_qlanst( 'M', n, d, e ) + if( orgnrm==zero ) then + call stdlib_wlaset( 'A', n, nrhs, czero, czero, b, ldb ) + return + end if + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, nm1, 1, e, nm1, info ) + ! if n is smaller than the minimum divide size smlsiz, then solve + ! the problem with another solver. + if( n<=smlsiz ) then + irwu = 1 + irwvt = irwu + n*n + irwwrk = irwvt + n*n + irwrb = irwwrk + irwib = irwrb + n*nrhs + irwb = irwib + n*nrhs + call stdlib_qlaset( 'A', n, n, zero, one, rwork( irwu ), n ) + call stdlib_qlaset( 'A', n, n, zero, one, rwork( irwvt ), n ) + call stdlib_qlasdq( 'U', 0, n, n, n, 0, d, e, rwork( irwvt ), n,rwork( irwu ), n, & + rwork( irwwrk ), 1,rwork( irwwrk ), info ) + if( info/=0 ) then + return + end if + ! in the real version, b is passed to stdlib_qlasdq and multiplied + ! internally by q**h. here b is complex and that product is + ! computed below in two steps (real and imaginary parts). + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=qp) + end do + end do + call stdlib_qgemm( 'T', 'N', n, nrhs, n, one, rwork( irwu ), n,rwork( irwb ), n, & + zero, rwork( irwrb ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_qgemm( 'T', 'N', n, nrhs, n, one, rwork( irwu ), n,rwork( irwb ), n, & + zero, rwork( irwib ), n ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = 1, n + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=qp) + end do + end do + tol = rcnd*abs( d( stdlib_iqamax( n, d, 1 ) ) ) + do i = 1, n + if( d( i )<=tol ) then + call stdlib_wlaset( 'A', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + else + call stdlib_wlascl( 'G', 0, 0, d( i ), one, 1, nrhs, b( i, 1 ),ldb, info ) + + rank = rank + 1 + end if + end do + ! since b is complex, the following call to stdlib_qgemm is performed + ! in two steps (real and imaginary parts). that is for v * b + ! (in the real version of the code v**h is stored in work). + ! call stdlib_qgemm( 't', 'n', n, nrhs, n, one, work, n, b, ldb, zero, + ! $ work( nwork ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=qp) + end do + end do + call stdlib_qgemm( 'T', 'N', n, nrhs, n, one, rwork( irwvt ), n,rwork( irwb ), n, & + zero, rwork( irwrb ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_qgemm( 'T', 'N', n, nrhs, n, one, rwork( irwvt ), n,rwork( irwb ), n, & + zero, rwork( irwib ), n ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = 1, n + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=qp) + end do + end do + ! unscale. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_qlasrt( 'D', n, d, info ) + call stdlib_wlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end if + ! book-keeping and setting up some constants. + nlvl = int( log( real( n,KIND=qp) / real( smlsiz+1,KIND=qp) ) / log( two ),KIND=ilp) + & + 1 + smlszp = smlsiz + 1 + u = 1 + vt = 1 + smlsiz*n + difl = vt + smlszp*n + difr = difl + nlvl*n + z = difr + nlvl*n*2 + c = z + nlvl*n + s = c + n + poles = s + n + givnum = poles + 2*nlvl*n + nrwork = givnum + 2*nlvl*n + bx = 1 + irwrb = nrwork + irwib = irwrb + smlsiz*nrhs + irwb = irwib + smlsiz*nrhs + sizei = 1 + n + k = sizei + n + givptr = k + n + perm = givptr + n + givcol = perm + nlvl*n + iwk = givcol + nlvl*n*2 + st = 1 + sqre = 0 + icmpq1 = 1 + icmpq2 = 0 + nsub = 0 + do i = 1, n + if( abs( d( i ) )=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - st + 1 + iwork( sizei+nsub-1 ) = nsize + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n), which is not solved + ! explicitly. + nsize = i - st + 1 + iwork( sizei+nsub-1 ) = nsize + nsub = nsub + 1 + iwork( nsub ) = n + iwork( sizei+nsub-1 ) = 1 + call stdlib_wcopy( nrhs, b( n, 1 ), ldb, work( bx+nm1 ), n ) + end if + st1 = st - 1 + if( nsize==1 ) then + ! this is a 1-by-1 subproblem and is not solved + ! explicitly. + call stdlib_wcopy( nrhs, b( st, 1 ), ldb, work( bx+st1 ), n ) + else if( nsize<=smlsiz ) then + ! this is a small subproblem and is solved by stdlib_qlasdq. + call stdlib_qlaset( 'A', nsize, nsize, zero, one,rwork( vt+st1 ), n ) + call stdlib_qlaset( 'A', nsize, nsize, zero, one,rwork( u+st1 ), n ) + call stdlib_qlasdq( 'U', 0, nsize, nsize, nsize, 0, d( st ),e( st ), rwork( & + vt+st1 ), n, rwork( u+st1 ),n, rwork( nrwork ), 1, rwork( nrwork ),info ) + + if( info/=0 ) then + return + end if + ! in the real version, b is passed to stdlib_qlasdq and multiplied + ! internally by q**h. here b is complex and that product is + ! computed below in two steps (real and imaginary parts). + j = irwb - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=qp) + end do + end do + call stdlib_qgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( u+st1 ), n, rwork(& + irwb ), nsize,zero, rwork( irwrb ), nsize ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_qgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( u+st1 ), n, rwork(& + irwb ), nsize,zero, rwork( irwib ), nsize ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=qp) + end do + end do + call stdlib_wlacpy( 'A', nsize, nrhs, b( st, 1 ), ldb,work( bx+st1 ), n ) + + else + ! a large problem. solve it using divide and conquer. + call stdlib_qlasda( icmpq1, smlsiz, nsize, sqre, d( st ),e( st ), rwork( u+& + st1 ), n, rwork( vt+st1 ),iwork( k+st1 ), rwork( difl+st1 ),rwork( difr+st1 ),& + rwork( z+st1 ),rwork( poles+st1 ), iwork( givptr+st1 ),iwork( givcol+st1 ), & + n, iwork( perm+st1 ),rwork( givnum+st1 ), rwork( c+st1 ),rwork( s+st1 ), & + rwork( nrwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + bxst = bx + st1 + call stdlib_wlalsa( icmpq2, smlsiz, nsize, nrhs, b( st, 1 ),ldb, work( bxst ),& + n, rwork( u+st1 ), n,rwork( vt+st1 ), iwork( k+st1 ),rwork( difl+st1 ), & + rwork( difr+st1 ),rwork( z+st1 ), rwork( poles+st1 ),iwork( givptr+st1 ), & + iwork( givcol+st1 ), n,iwork( perm+st1 ), rwork( givnum+st1 ),rwork( c+st1 ),& + rwork( s+st1 ),rwork( nrwork ), iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + st = i + 1 + end if + end do loop_240 + ! apply the singular values and treat the tiny ones as zero. + tol = rcnd*abs( d( stdlib_iqamax( n, d, 1 ) ) ) + do i = 1, n + ! some of the elements in d can be negative because 1-by-1 + ! subproblems were not solved explicitly. + if( abs( d( i ) )<=tol ) then + call stdlib_wlaset( 'A', 1, nrhs, czero, czero, work( bx+i-1 ), n ) + else + rank = rank + 1 + call stdlib_wlascl( 'G', 0, 0, d( i ), one, 1, nrhs,work( bx+i-1 ), n, info ) + + end if + d( i ) = abs( d( i ) ) + end do + ! now apply back the right singular vectors. + icmpq2 = 1 + loop_320: do i = 1, nsub + st = iwork( i ) + st1 = st - 1 + nsize = iwork( sizei+i-1 ) + bxst = bx + st1 + if( nsize==1 ) then + call stdlib_wcopy( nrhs, work( bxst ), n, b( st, 1 ), ldb ) + else if( nsize<=smlsiz ) then + ! since b and bx are complex, the following call to stdlib_qgemm + ! is performed in two steps (real and imaginary parts). + ! call stdlib_qgemm( 't', 'n', nsize, nrhs, nsize, one, + ! $ rwork( vt+st1 ), n, rwork( bxst ), n, zero, + ! $ b( st, 1 ), ldb ) + j = bxst - n - 1 + jreal = irwb - 1 + do jcol = 1, nrhs + j = j + n + do jrow = 1, nsize + jreal = jreal + 1 + rwork( jreal ) = real( work( j+jrow ),KIND=qp) + end do + end do + call stdlib_qgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( vt+st1 ), n, rwork( & + irwb ), nsize, zero,rwork( irwrb ), nsize ) + j = bxst - n - 1 + jimag = irwb - 1 + do jcol = 1, nrhs + j = j + n + do jrow = 1, nsize + jimag = jimag + 1 + rwork( jimag ) = aimag( work( j+jrow ) ) + end do + end do + call stdlib_qgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( vt+st1 ), n, rwork( & + irwb ), nsize, zero,rwork( irwib ), nsize ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=qp) + end do + end do + else + call stdlib_wlalsa( icmpq2, smlsiz, nsize, nrhs, work( bxst ), n,b( st, 1 ), ldb,& + rwork( u+st1 ), n,rwork( vt+st1 ), iwork( k+st1 ),rwork( difl+st1 ), rwork( & + difr+st1 ),rwork( z+st1 ), rwork( poles+st1 ),iwork( givptr+st1 ), iwork( & + givcol+st1 ), n,iwork( perm+st1 ), rwork( givnum+st1 ),rwork( c+st1 ), rwork( s+& + st1 ),rwork( nrwork ), iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + end do loop_320 + ! unscale and sort the singular values. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_qlasrt( 'D', n, d, info ) + call stdlib_wlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end subroutine stdlib_wlalsd + + !> ZLAMSWLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (ZLASWLQ) + + pure subroutine stdlib_wlamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), t(ldt,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * mb + else + lw = m * mb + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( k<0 ) then + info = -5 + else if( m=max(m,n,k))) then + call stdlib_wgemlqt( side, trans, m, n, k, mb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.tran) then + ! multiply q to the last block of c + kk = mod((m-k),(nb-k)) + ctr = (m-k)/(nb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_wtpmlqt('L','C',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+nb) + ctr = ctr - 1 + call stdlib_wtpmlqt('L','C',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:nb) + call stdlib_wgemlqt('L','C',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.notran) then + ! multiply q to the first block of c + kk = mod((m-k),(nb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_wgemlqt('L','N',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (i:i+nb,1:n) + call stdlib_wtpmlqt('L','N',nb-k , n, k, 0,mb, a(1,i), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_wtpmlqt('L','N',kk , n, k, 0, mb, a(1,ii), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.notran) then + ! multiply q to the last block of c + kk = mod((n-k),(nb-k)) + ctr = (n-k)/(nb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_wtpmlqt('R','N',m , kk, k, 0, mb, a(1, ii), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_wtpmlqt('R','N', m, nb-k, k, 0, mb, a(1, i), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_wgemlqt('R','N',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.tran) then + ! multiply q to the first block of c + kk = mod((n-k),(nb-k)) + ii=n-kk+1 + call stdlib_wgemlqt('R','C',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + ctr = 1 + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_wtpmlqt('R','C',m , nb-k, k, 0,mb, a(1,i), lda,t(1,ctr *k+1), ldt, c(1,& + 1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_wtpmlqt('R','C',m , kk, k, 0,mb, a(1,ii), lda,t(1, ctr * k + 1),ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_wlamswlq + + !> ZLAMTSQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (ZLATSQR) + + pure subroutine stdlib_wlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), t(ldt,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr, q + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * nb + q = m + else + lw = m * nb + q = n + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m=max(m,n,k))) then + call stdlib_wgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.notran) then + ! multiply q to the last block of c + kk = mod((m-k),(mb-k)) + ctr = (m-k)/(mb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_wtpmqrt('L','N',kk , n, k, 0, nb, a(ii,1), lda,t(1, ctr * k + 1),ldt ,& + c(1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + ctr = ctr - 1 + call stdlib_wtpmqrt('L','N',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, & + c(1,1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:mb,1:n) + call stdlib_wgemqrt('L','N',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.tran) then + ! multiply q to the first block of c + kk = mod((m-k),(mb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_wgemqrt('L','C',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + call stdlib_wtpmqrt('L','C',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_wtpmqrt('L','C',kk , n, k, 0,nb, a(ii,1), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.tran) then + ! multiply q to the last block of c + kk = mod((n-k),(mb-k)) + ctr = (n-k)/(mb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_wtpmqrt('R','C',m , kk, k, 0, nb, a(ii,1), lda,t(1,ctr * k + 1), ldt,& + c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_wtpmqrt('R','C',m , mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_wgemqrt('R','C',m , mb, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.notran) then + ! multiply q to the first block of c + kk = mod((n-k),(mb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_wgemqrt('R','N', m, mb , k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_wtpmqrt('R','N', m, mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_wtpmqrt('R','N', m, kk , k, 0,nb, a(ii,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_wlamtsqr + + !> ZLANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + + real(qp) function stdlib_wlangb( norm, n, kl, ku, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: kl, ku, ldab, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k, l + real(qp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + temp = abs( ab( i, j ) ) + if( value ZLANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex matrix A. + + real(qp) function stdlib_wlange( norm, m, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, m + temp = abs( a( i, j ) ) + if( value ZLANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex tridiagonal matrix A. + + pure real(qp) function stdlib_wlangt( norm, n, dl, d, du ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(in) :: d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: anorm, scale, sum, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + if( anorm1 ) then + call stdlib_wlassq( n-1, dl, 1, scale, sum ) + call stdlib_wlassq( n-1, du, 1, scale, sum ) + end if + anorm = scale*sqrt( sum ) + end if + stdlib_wlangt = anorm + return + end function stdlib_wlangt + + !> ZLANHB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n hermitian band matrix A, with k super-diagonals. + + real(qp) function stdlib_wlanhb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + sum = abs( real( ab( k+1, j ),KIND=qp) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + sum = abs( real( ab( 1, j ),KIND=qp) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( real( ab( k+1, j ),KIND=qp) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( ab( 1, j ),KIND=qp) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_wlassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_wlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + do j = 1, n + if( real( ab( l, j ),KIND=qp)/=zero ) then + absa = abs( real( ab( l, j ),KIND=qp) ) + if( scale ZLANHE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A. + + real(qp) function stdlib_wlanhe( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j - 1 + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + sum = abs( real( a( j, j ),KIND=qp) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + sum = abs( real( a( j, j ),KIND=qp) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + do i = j + 1, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( real( a( j, j ),KIND=qp) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( a( j, j ),KIND=qp) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_wlassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_wlassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + do i = 1, n + if( real( a( i, i ),KIND=qp)/=zero ) then + absa = abs( real( a( i, i ),KIND=qp) ) + if( scale ZLANHF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian matrix A in RFP format. + + real(qp) function stdlib_wlanhf( norm, transr, uplo, n, a, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, transr, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(out) :: work(0:*) + complex(qp), intent(in) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, ifm, ilu, noe, n1, k, l, lda + real(qp) :: scale, s, value, aa, temp + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + stdlib_wlanhf = zero + return + else if( n==1 ) then + stdlib_wlanhf = abs(real(a(0),KIND=qp)) + return + end if + ! set noe = 1 if n is odd. if n is even set noe=0 + noe = 1 + if( mod( n, 2 )==0 )noe = 0 + ! set ifm = 0 when form='c' or 'c' and 1 otherwise + ifm = 1 + if( stdlib_lsame( transr, 'C' ) )ifm = 0 + ! set ilu = 0 when uplo='u or 'u' and 1 otherwise + ilu = 1 + if( stdlib_lsame( uplo, 'U' ) )ilu = 0 + ! set lda = (n+1)/2 when ifm = 0 + ! set lda = n when ifm = 1 and noe = 1 + ! set lda = n+1 when ifm = 1 and noe = 0 + if( ifm==1 ) then + if( noe==1 ) then + lda = n + else + ! noe=0 + lda = n + 1 + end if + else + ! ifm=0 + lda = ( n+1 ) / 2 + end if + if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = ( n+1 ) / 2 + value = zero + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is n by k + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(0,0) + temp = abs( real( a( j+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = j - 1 + ! l(k+j,k+j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = j + ! -> l(j,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = j + 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k + j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = k + j - 1 + ! -> u(i,i) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = i + 1 + ! =k+j; i -> u(j,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = k + j + 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + do i = 0, n - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + ! j=k-1 + end do + ! i=n-1 -> u(n-1,n-1) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end if + else + ! xpose case; a is k by n + if( ilu==1 ) then + ! uplo ='l' + do j = 0, k - 2 + do i = 0, j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = j + ! l(i,i) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = j + 1 + ! l(j+k,j+k) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = j + 2, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + j = k - 1 + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = k - 1 + ! -> l(i,i) is at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do j = k, n - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + j = k - 1 + ! -> u(j,j) is at a(0,j) + temp = abs( real( a( 0+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + do j = k, n - 1 + do i = 0, j - k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = j - k + ! -> u(i,i) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = j - k + 1 + ! u(j,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = j - k + 2, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + end if + end if + else + ! n is even + if( ifm==1 ) then + ! a is n+1 by k + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(k,k) + temp = abs( real( a( j+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + temp = abs( real( a( j+1+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = j + ! l(k+j,k+j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = j + 1 + ! -> l(j,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = j + 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k + j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = k + j + ! -> u(i,i) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = i + 1 + ! =k+j+1; i -> u(j,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = k + j + 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + do i = 0, n - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + ! j=k-1 + end do + ! i=n-1 -> u(n-1,n-1) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = n + ! -> u(k-1,k-1) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end if + else + ! xpose case; a is k by n+1 + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(k,k) at a(0,0) + temp = abs( real( a( j+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = j - 1 + ! l(i,i) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = j + ! l(j+k,j+k) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = j + 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + j = k + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = k - 1 + ! -> l(i,i) is at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do j = k + 1, n + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + j = k + ! -> u(j,j) is at a(0,j) + temp = abs( real( a( 0+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + do j = k + 1, n - 1 + do i = 0, j - k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = j - k - 1 + ! -> u(i,i) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + i = j - k + ! u(j,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + do i = j - k + 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end do + j = n + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + i = k - 1 + ! u(k,k) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=qp) ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end if + end if + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + if( ifm==1 ) then + ! a is 'n' + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + ! uplo = 'u' + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + if( i==k+k )go to 10 + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + 10 continue + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu = 1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + if( j>0 ) then + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + end if + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + ! uplo = 'u' + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu = 1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + end if + else + ! ifm=0 + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + ! uplo = 'u' + n1 = k + ! n/2 + k = k + 1 + ! k is the row size and lda + do i = n1, n - 1 + work( i ) = zero + end do + do j = 0, n1 - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,n1+i) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=n1=k-1 is special + s = abs( real( a( 0+j*lda ),KIND=qp) ) + ! a(k-1,k-1) + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,i+n1) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k, n - 1 + s = zero + do i = 0, j - k - 1 + aa = abs( a( i+j*lda ) ) + ! a(i,j-k) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-k + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(j-k,j-k) + s = s + aa + work( j-k ) = work( j-k ) + s + i = i + 1 + s = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(j,j) + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu=1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 2 + ! process + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! i=j so process of a(j,j) + s = s + aa + work( j ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( real( a( i+j*lda ),KIND=qp) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k-1 is special :process col a(k-1,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k, n - 1 + ! process col j of a = a(j,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + ! uplo = 'u' + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i+k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=k + aa = abs( real( a( 0+j*lda ),KIND=qp) ) + ! a(k,k) + s = aa + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k,k+i) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k + 1, n - 1 + s = zero + do i = 0, j - 2 - k + aa = abs( a( i+j*lda ) ) + ! a(i,j-k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-1-k + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(j-k-1,j-k-1) + s = s + aa + work( j-k-1 ) = work( j-k-1 ) + s + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(j,j) + s = aa + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + ! j=n + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(i,k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = work( i ) + s + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + else + ! ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + ! j=0 is special :process col a(k:n-1,k) + s = abs( real( a( 0 ),KIND=qp) ) + ! a(k,k) + do i = 1, k - 1 + aa = abs( a( i ) ) + ! a(k+i,k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( k ) = work( k ) + s + do j = 1, k - 1 + ! process + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! i=j-1 so process of a(j-1,j-1) + s = s + aa + work( j-1 ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( real( a( i+j*lda ),KIND=qp) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k is special :process col a(k,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=qp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k + 1, n + ! process col j-1 of a = a(j-1,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j-1 ) = work( j-1 ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_qisnan( temp ) )value = temp + end do + end if + end if + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + k = ( n+1 ) / 2 + scale = zero + s = one + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 3 + call stdlib_wlassq( k-j-2, a( k+j+1+j*lda ), 1, scale, s ) + ! l at a(k,0) + end do + do j = 0, k - 1 + call stdlib_wlassq( k+j-1, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + l = k - 1 + ! -> u(k,k) at a(k-1,0) + do i = 0, k - 2 + aa = real( a( l ),KIND=qp) + ! u(k+i,k+i) + if( aa/=zero ) then + if( scale l(k,k) at a(0,1) + do i = 1, k - 1 + aa = real( a( l ),KIND=qp) + ! l(k-1+i,k-1+i) + if( aa/=zero ) then + if( scale u(k-1,k-1) at a(0,k-1) + aa = real( a( l ),KIND=qp) + ! u(k-1,k-1) + if( aa/=zero ) then + if( scale u(0,0) at a(0,k) + do j = k, n - 1 + aa = real( a( l ),KIND=qp) + ! -> u(j-k,j-k) + if( aa/=zero ) then + if( scale u(j,j) + if( aa/=zero ) then + if( scale l(0,0) at a(0,0) + do i = 0, k - 2 + aa = real( a( l ),KIND=qp) + ! l(i,i) + if( aa/=zero ) then + if( scale k-1 + (k-1)*lda or l(k-1,k-1) at a(k-1,k-1) + aa = real( a( l ),KIND=qp) + ! l(k-1,k-1) at a(k-1,k-1) + if( aa/=zero ) then + if( scale u(k,k) at a(k,0) + do i = 0, k - 1 + aa = real( a( l ),KIND=qp) + ! u(k+i,k+i) + if( aa/=zero ) then + if( scale l(k,k) at a(0,0) + do i = 0, k - 1 + aa = real( a( l ),KIND=qp) + ! l(k-1+i,k-1+i) + if( aa/=zero ) then + if( scale u(k,k) at a(0,k) + aa = real( a( l ),KIND=qp) + ! u(k,k) + if( aa/=zero ) then + if( scale u(0,0) at a(0,k+1) + do j = k + 1, n - 1 + aa = real( a( l ),KIND=qp) + ! -> u(j-k-1,j-k-1) + if( aa/=zero ) then + if( scale u(j,j) + if( aa/=zero ) then + if( scale u(k-1,k-1) at a(k-1,n) + aa = real( a( l ),KIND=qp) + ! u(k,k) + if( aa/=zero ) then + if( scale l(k,k) at a(0,0) + aa = real( a( l ),KIND=qp) + ! l(k,k) at a(0,0) + if( aa/=zero ) then + if( scale l(0,0) at a(0,1) + do i = 0, k - 2 + aa = real( a( l ),KIND=qp) + ! l(i,i) + if( aa/=zero ) then + if( scale k - 1 + k*lda or l(k-1,k-1) at a(k-1,k) + aa = real( a( l ),KIND=qp) + ! l(k-1,k-1) at a(k-1,k) + if( aa/=zero ) then + if( scale ZLANHP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A, supplied in packed form. + + real(qp) function stdlib_wlanhp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 0 + do j = 1, n + do i = k + 1, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + sum = abs( real( ap( k ),KIND=qp) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + k = 1 + do j = 1, n + sum = abs( real( ap( k ),KIND=qp) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( real( ap( k ),KIND=qp) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( ap( k ),KIND=qp) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_wlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_wlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( real( ap( k ),KIND=qp)/=zero ) then + absa = abs( real( ap( k ),KIND=qp) ) + if( scale ZLANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + + real(qp) function stdlib_wlanhs( norm, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, min( n, j+1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + do j = 1, n + sum = zero + do i = 1, min( n, j+1 ) + sum = sum + abs( a( i, j ) ) + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, min( n, j+1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + do j = 1, n + call stdlib_wlassq( min( n, j+1 ), a( 1, j ), 1, scale, sum ) + end do + value = scale*sqrt( sum ) + end if + stdlib_wlanhs = value + return + end function stdlib_wlanhs + + !> ZLANHT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian tridiagonal matrix A. + + pure real(qp) function stdlib_wlanht( norm, n, d, e ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(in) :: d(*) + complex(qp), intent(in) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: anorm, scale, sum + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + sum = abs( d( i ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + sum = abs( e( i ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + end do + else if( stdlib_lsame( norm, 'O' ) .or. norm=='1' .or.stdlib_lsame( norm, 'I' ) ) & + then + ! find norm1(a). + if( n==1 ) then + anorm = abs( d( 1 ) ) + else + anorm = abs( d( 1 ) )+abs( e( 1 ) ) + sum = abs( e( n-1 ) )+abs( d( n ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + do i = 2, n - 1 + sum = abs( d( i ) )+abs( e( i ) )+abs( e( i-1 ) ) + if( anorm < sum .or. stdlib_qisnan( sum ) ) anorm = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( n>1 ) then + call stdlib_wlassq( n-1, e, 1, scale, sum ) + sum = 2*sum + end if + call stdlib_qlassq( n, d, 1, scale, sum ) + anorm = scale*sqrt( sum ) + end if + stdlib_wlanht = anorm + return + end function stdlib_wlanht + + !> ZLANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + + real(qp) function stdlib_wlansb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( ab( k+1, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ab( 1, j ) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_wlassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_wlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + call stdlib_wlassq( n, ab( l, 1 ), ldab, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_wlansb = value + return + end function stdlib_wlansb + + !> ZLANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A, supplied in packed form. + + real(qp) function stdlib_wlansp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,aimag,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 1 + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + end do + else + k = 1 + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( ap( k ) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ap( k ) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_wlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_wlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( real( ap( k ),KIND=qp)/=zero ) then + absa = abs( real( ap( k ),KIND=qp) ) + if( scale ZLANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A. + + real(qp) function stdlib_wlansy( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( a( j, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( a( j, j ) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_wlassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_wlassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + call stdlib_wlassq( n, a, lda+1, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_wlansy = value + return + end function stdlib_wlansy + + !> ZLANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + + real(qp) function stdlib_wlantb( norm, uplo, diag, n, k, ab,ldab, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, l + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = max( k+2-j, 1 ), k + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = max( k+2-j, 1 ), k + 1 + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = 2, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = 1, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = 1 - j + do i = j + 1, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = 1 - j + do i = j, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + end if + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 2, n + call stdlib_wlassq( min( j-1, k ),ab( max( k+2-j, 1 ), j ), 1, scale,& + sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_wlassq( min( j, k+1 ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 1, n - 1 + call stdlib_wlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_wlassq( min( n-j+1, k+1 ), ab( 1, j ), 1, scale,sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_wlantb = value + return + end function stdlib_wlantb + + !> ZLANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + + real(qp) function stdlib_wlantp( norm, uplo, diag, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, k + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = 1 + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 2 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + k = 1 + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = k, k + j - 2 + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + j - 1 + sum = sum + abs( ap( i ) ) + end do + end if + k = k + j + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = k + 1, k + n - j + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + n - j + sum = sum + abs( ap( i ) ) + end do + end if + k = k + n - j + 1 + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + do i = 1, j - 1 + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + k = k + 1 + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, j + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + k = k + 1 + do i = j + 1, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = j, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + end if + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 2, n + call stdlib_wlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_wlassq( j, ap( k ), 1, scale, sum ) + k = k + j + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 1, n - 1 + call stdlib_wlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_wlassq( n-j+1, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_wlantp = value + return + end function stdlib_wlantp + + !> ZLANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + + real(qp) function stdlib_wlantr( norm, uplo, diag, m, n, a, lda,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(out) :: work(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j + real(qp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j-1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j + 1, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( ( udiag ) .and. ( j<=m ) ) then + sum = one + do i = 1, j - 1 + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = 1, min( m, j ) + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = j + 1, m + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = j, m + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, m + work( i ) = one + end do + do j = 1, n + do i = 1, min( m, j-1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = 1, min( m, j ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, min( m, n ) + work( i ) = one + end do + do i = n + 1, m + work( i ) = zero + end do + do j = 1, n + do i = j + 1, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = j, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + end if + value = zero + do i = 1, m + sum = work( i ) + if( value < sum .or. stdlib_qisnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 2, n + call stdlib_wlassq( min( m, j-1 ), a( 1, j ), 1, scale, sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_wlassq( min( m, j ), a( 1, j ), 1, scale, sum ) + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 1, n + call stdlib_wlassq( m-j, a( min( m, j+1 ), j ), 1, scale,sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_wlassq( m-j+1, a( j, j ), 1, scale, sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_wlantr = value + return + end function stdlib_wlantr + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + + pure subroutine stdlib_wlapll( n, x, incx, y, incy, ssmin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(qp), intent(out) :: ssmin + ! Array Arguments + complex(qp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + + ! Local Scalars + real(qp) :: ssmax + complex(qp) :: a11, a12, a22, c, tau + ! Intrinsic Functions + intrinsic :: abs,conjg + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + ssmin = zero + return + end if + ! compute the qr factorization of the n-by-2 matrix ( x y ) + call stdlib_wlarfg( n, x( 1 ), x( 1+incx ), incx, tau ) + a11 = x( 1 ) + x( 1 ) = cone + c = -conjg( tau )*stdlib_wdotc( n, x, incx, y, incy ) + call stdlib_waxpy( n, c, x, incx, y, incy ) + call stdlib_wlarfg( n-1, y( 1+incy ), y( 1+2*incy ), incy, tau ) + a12 = y( 1 ) + a22 = y( 1+incy ) + ! compute the svd of 2-by-2 upper triangular matrix. + call stdlib_qlas2( abs( a11 ), abs( a12 ), abs( a22 ), ssmin, ssmax ) + return + end subroutine stdlib_wlapll + + !> ZLAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + + pure subroutine stdlib_wlapmr( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, in, j, jj + complex(qp) :: temp + ! Executable Statements + if( m<=1 )return + do i = 1, m + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, m + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do jj = 1, n + temp = x( j, jj ) + x( j, jj ) = x( in, jj ) + x( in, jj ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, m + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do jj = 1, n + temp = x( i, jj ) + x( i, jj ) = x( j, jj ) + x( j, jj ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_wlapmr + + !> ZLAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + + pure subroutine stdlib_wlapmt( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ii, in, j + complex(qp) :: temp + ! Executable Statements + if( n<=1 )return + do i = 1, n + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, n + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do ii = 1, m + temp = x( ii, j ) + x( ii, j ) = x( ii, in ) + x( ii, in ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, n + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do ii = 1, m + temp = x( ii, i ) + x( ii, i ) = x( ii, j ) + x( ii, j ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_wlapmt + + !> ZLAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + + pure subroutine stdlib_wlaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(qp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(in) :: c(*), r(*) + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*ab( ku+1+i-j, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_wlaqgb + + !> ZLAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + + pure subroutine stdlib_wlaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: lda, m, n + real(qp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(qp), intent(in) :: c(*), r(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*a( i, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = 1, m + a( i, j ) = r( i )*a( i, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*r( i )*a( i, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_wlaqge + + !> ZLAQHB: equilibrates a Hermitian band matrix A + !> using the scaling factors in the vector S. + + pure subroutine stdlib_wlaqhb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(out) :: s(*) + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j - 1 + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + ab( kd+1, j ) = cj*cj*real( ab( kd+1, j ),KIND=qp) + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + ab( 1, j ) = cj*cj*real( ab( 1, j ),KIND=qp) + do i = j + 1, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_wlaqhb + + !> ZLAQHE: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_wlaqhe( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j - 1 + a( i, j ) = cj*s( i )*a( i, j ) + end do + a( j, j ) = cj*cj*real( a( j, j ),KIND=qp) + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + a( j, j ) = cj*cj*real( a( j, j ),KIND=qp) + do i = j + 1, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_wlaqhe + + !> ZLAQHP: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_wlaqhp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j - 1 + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + ap( jc+j-1 ) = cj*cj*real( ap( jc+j-1 ),KIND=qp) + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + ap( jc ) = cj*cj*real( ap( jc ),KIND=qp) + do i = j + 1, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_wlaqhp + + !> ZLAQP2: computes a QR factorization with column pivoting of + !> the block A(OFFSET+1:M,1:N). + !> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_wlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, m, n, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(inout) :: vn1(*), vn2(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, itemp, j, mn, offpi, pvt + real(qp) :: temp, temp2, tol3z + complex(qp) :: aii + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,sqrt + ! Executable Statements + mn = min( m-offset, n ) + tol3z = sqrt(stdlib_qlamch('EPSILON')) + ! compute factorization. + loop_20: do i = 1, mn + offpi = offset + i + ! determine ith pivot column and swap if necessary. + pvt = ( i-1 ) + stdlib_iqamax( n-i+1, vn1( i ), 1 ) + if( pvt/=i ) then + call stdlib_wswap( m, a( 1, pvt ), 1, a( 1, i ), 1 ) + itemp = jpvt( pvt ) + jpvt( pvt ) = jpvt( i ) + jpvt( i ) = itemp + vn1( pvt ) = vn1( i ) + vn2( pvt ) = vn2( i ) + end if + ! generate elementary reflector h(i). + if( offpi ZLAQPS: computes a step of QR factorization with column pivoting + !> of a complex M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_wlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda, ldf, m, n, nb, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(qp), intent(inout) :: vn1(*), vn2(*) + complex(qp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*) + complex(qp), intent(out) :: tau(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: itemp, j, k, lastrk, lsticc, pvt, rk + real(qp) :: temp, temp2, tol3z + complex(qp) :: akk + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,min,nint,sqrt + ! Executable Statements + lastrk = min( m, n+offset ) + lsticc = 0 + k = 0 + tol3z = sqrt(stdlib_qlamch('EPSILON')) + ! beginning of while loop. + 10 continue + if( ( k1 ) then + do j = 1, k - 1 + f( k, j ) = conjg( f( k, j ) ) + end do + call stdlib_wgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1 ),lda, f( k, 1 ),& + ldf, cone, a( rk, k ), 1 ) + do j = 1, k - 1 + f( k, j ) = conjg( f( k, j ) ) + end do + end if + ! generate elementary reflector h(k). + if( rk1 ) then + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1 ), lda,& + a( rk, k ), 1, czero,auxv( 1 ), 1 ) + call stdlib_wgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1, 1 ), ldf,auxv( 1 ), 1, & + cone, f( 1, k ), 1 ) + end if + ! update the current row of a: + ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h. + if( k0 ) then + itemp = nint( vn2( lsticc ),KIND=ilp) + vn1( lsticc ) = stdlib_qznrm2( m-rk, a( rk+1, lsticc ), 1 ) + ! note: the computation of vn1( lsticc ) relies on the fact that + ! stdlib_dnrm2 does not fail on vectors with norm below the value of + ! sqrt(stdlib_qlamch('s')) + vn2( lsticc ) = vn1( lsticc ) + lsticc = itemp + go to 60 + end if + return + end subroutine stdlib_wlaqps + + !> ZLAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + pure subroutine stdlib_wlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(qp), intent(out) :: w(*), work(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(qp), parameter :: wilk1 = 0.75_qp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_wlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constant wilk1 is used to form the exceptional + ! . shifts. ==== + + + + ! Local Scalars + complex(qp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 + real(qp) :: s + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + complex(qp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,int,max,min,mod,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = cone + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_wlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_wlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & + z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_wlaqr3 ==== + call stdlib_wlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_wlaqr5, stdlib_wlaqr3) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + return + end if + ! ==== stdlib_wlahqr/stdlib_wlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_70: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_wlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, w, h( kv, 1 ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh, work,& + lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_wlaqr3 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_wlaqr3 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, ks + 1, -2 + w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) + w( i-1 ) = w( i ) + end do + else + ! ==== got ns/2 or fewer shifts? use stdlib_wlaqr4 or + ! . stdlib_wlahqr on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_wlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + if( ns>nmin ) then + call stdlib_wlaqr4( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( & + ks ), 1, 1,zdum, 1, work, lwork, inf ) + else + call stdlib_wlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( & + ks ), 1, 1,zdum, 1, inf ) + end if + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. scale to avoid + ! . overflows, underflows and subnormals. + ! . (the scale factor s can not be czero, + ! . because h(kbot,kbot-1) is nonzero.) ==== + if( ks>=kbot ) then + s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & + h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) + aa = h( kbot-1, kbot-1 ) / s + cc = h( kbot, kbot-1 ) / s + bb = h( kbot-1, kbot ) / s + dd = h( kbot, kbot ) / s + tr2 = ( aa+dd ) / two + det = ( aa-tr2 )*( dd-tr2 ) - bb*cc + rtdisc = sqrt( -det ) + w( kbot-1 ) = ( tr2+rtdisc )*s + w( kbot ) = ( tr2-rtdisc )*s + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( cabs1( w( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_70 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 80 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + end subroutine stdlib_wlaqr0 + + !> Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - s1*I)*(H - s2*I) + !> scaling to avoid overflows and most underflows. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + + pure subroutine stdlib_wlaqr1( n, h, ldh, s1, s2, v ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(qp), intent(in) :: s1, s2 + integer(ilp), intent(in) :: ldh, n + ! Array Arguments + complex(qp), intent(in) :: h(ldh,*) + complex(qp), intent(out) :: v(*) + ! ================================================================ + ! Parameters + real(qp), parameter :: rzero = 0.0_qp + + + ! Local Scalars + complex(qp) :: cdum, h21s, h31s + real(qp) :: s + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! quick return if possible + if( n/=2 .and. n/=3 ) then + return + end if + if( n==2 ) then + s = cabs1( h( 1, 1 )-s2 ) + cabs1( h( 2, 1 ) ) + if( s==rzero ) then + v( 1 ) = czero + v( 2 ) = czero + else + h21s = h( 2, 1 ) / s + v( 1 ) = h21s*h( 1, 2 ) + ( h( 1, 1 )-s1 )*( ( h( 1, 1 )-s2 ) / s ) + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-s1-s2 ) + end if + else + s = cabs1( h( 1, 1 )-s2 ) + cabs1( h( 2, 1 ) ) +cabs1( h( 3, 1 ) ) + if( s==czero ) then + v( 1 ) = czero + v( 2 ) = czero + v( 3 ) = czero + else + h21s = h( 2, 1 ) / s + h31s = h( 3, 1 ) / s + v( 1 ) = ( h( 1, 1 )-s1 )*( ( h( 1, 1 )-s2 ) / s ) +h( 1, 2 )*h21s + h( 1, 3 )& + *h31s + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-s1-s2 ) + h( 2, 3 )*h31s + v( 3 ) = h31s*( h( 1, 1 )+h( 3, 3 )-s1-s2 ) + h21s*h( 3, 2 ) + end if + end if + end subroutine stdlib_wlaqr1 + + !> ZLAQR2: is identical to ZLAQR3 except that it avoids + !> recursion by calling ZLAHQR instead of ZLAQR4. + !> Aggressive early deflation: + !> ZLAQR2 accepts as input an upper Hessenberg matrix + !> H and performs an unitary similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an unitary similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + pure subroutine stdlib_wlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(qp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(qp), parameter :: rzero = 0.0_qp + real(qp), parameter :: rone = 1.0_qp + + + ! Local Scalars + complex(qp) :: beta, cdum, s, tau + real(qp) :: foo, safmax, safmin, smlnum, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & + lwk1, lwk2, lwkopt + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,int,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_wgehrd ==== + call stdlib_wgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_wunmhr ==== + call stdlib_wunmhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = jw + max( lwk1, lwk2 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = cone + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = czero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sh( kwtop ) = h( kwtop, kwtop ) + ns = 1 + nd = 0 + if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero + end if + work( 1 ) = cone + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_wlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_wcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_wlaset( 'A', jw, jw, czero, cone, v, ldv ) + call stdlib_wlahqr( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + infqr ) + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + do knt = infqr + 1, jw + ! ==== small spike tip deflation test ==== + foo = cabs1( t( ns, ns ) ) + if( foo==rzero )foo = cabs1( s ) + if( cabs1( s )*cabs1( v( 1, ns ) )<=max( smlnum, ulp*foo ) )then + ! ==== cone more converged eigenvalue ==== + ns = ns - 1 + else + ! ==== cone undeflatable eigenvalue. move it up out of the + ! . way. (stdlib_wtrexc can not fail in this case.) ==== + ifst = ns + call stdlib_wtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + ilst = ilst + 1 + end if + end do + ! ==== return to hessenberg form ==== + if( ns==0 )s = czero + if( nscabs1( t( ifst, ifst ) ) )ifst = j + end do + ilst = i + if( ifst/=ilst )call stdlib_wtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + + end do + end if + ! ==== restore shift/eigenvalue array from t ==== + do i = infqr + 1, jw + sh( kwtop+i-1 ) = t( i, i ) + end do + if( ns1 .and. s/=czero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_wcopy( ns, v, ldv, work, 1 ) + do i = 1, ns + work( i ) = conjg( work( i ) ) + end do + beta = work( 1 ) + call stdlib_wlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = cone + call stdlib_wlaset( 'L', jw-2, jw-2, czero, czero, t( 3, 1 ), ldt ) + call stdlib_wlarf( 'L', ns, jw, work, 1, conjg( tau ), t, ldt,work( jw+1 ) ) + + call stdlib_wlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_wlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_wgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*conjg( v( 1, 1 ) ) + call stdlib_wlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_wcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=czero )call stdlib_wunmhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_wgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & + czero, wv, ldwv ) + call stdlib_wlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_wgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & + czero, t, ldt ) + call stdlib_wlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_wgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & + czero, wv, ldwv ) + call stdlib_wlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + end subroutine stdlib_wlaqr2 + + !> Aggressive early deflation: + !> ZLAQR3: accepts as input an upper Hessenberg matrix + !> H and performs an unitary similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an unitary similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + pure subroutine stdlib_wlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(qp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(qp), parameter :: rzero = 0.0_qp + real(qp), parameter :: rone = 1.0_qp + + + ! Local Scalars + complex(qp) :: beta, cdum, s, tau + real(qp) :: foo, safmax, safmin, smlnum, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & + lwk1, lwk2, lwk3, lwkopt, nmin + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,int,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_wgehrd ==== + call stdlib_wgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_wunmhr ==== + call stdlib_wunmhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_wlaqr4 ==== + call stdlib_wlaqr4( .true., .true., jw, 1, jw, t, ldt, sh, 1, jw, v,ldv, work, -1, & + infqr ) + lwk3 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = cone + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = czero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sh( kwtop ) = h( kwtop, kwtop ) + ns = 1 + nd = 0 + if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero + end if + work( 1 ) = cone + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_wlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_wcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_wlaset( 'A', jw, jw, czero, cone, v, ldv ) + nmin = stdlib_ilaenv( 12, 'ZLAQR3', 'SV', jw, 1, jw, lwork ) + if( jw>nmin ) then + call stdlib_wlaqr4( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + work, lwork, infqr ) + else + call stdlib_wlahqr( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + infqr ) + end if + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + do knt = infqr + 1, jw + ! ==== small spike tip deflation test ==== + foo = cabs1( t( ns, ns ) ) + if( foo==rzero )foo = cabs1( s ) + if( cabs1( s )*cabs1( v( 1, ns ) )<=max( smlnum, ulp*foo ) )then + ! ==== cone more converged eigenvalue ==== + ns = ns - 1 + else + ! ==== cone undeflatable eigenvalue. move it up out of the + ! . way. (stdlib_wtrexc can not fail in this case.) ==== + ifst = ns + call stdlib_wtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + ilst = ilst + 1 + end if + end do + ! ==== return to hessenberg form ==== + if( ns==0 )s = czero + if( nscabs1( t( ifst, ifst ) ) )ifst = j + end do + ilst = i + if( ifst/=ilst )call stdlib_wtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + + end do + end if + ! ==== restore shift/eigenvalue array from t ==== + do i = infqr + 1, jw + sh( kwtop+i-1 ) = t( i, i ) + end do + if( ns1 .and. s/=czero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_wcopy( ns, v, ldv, work, 1 ) + do i = 1, ns + work( i ) = conjg( work( i ) ) + end do + beta = work( 1 ) + call stdlib_wlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = cone + call stdlib_wlaset( 'L', jw-2, jw-2, czero, czero, t( 3, 1 ), ldt ) + call stdlib_wlarf( 'L', ns, jw, work, 1, conjg( tau ), t, ldt,work( jw+1 ) ) + + call stdlib_wlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_wlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_wgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*conjg( v( 1, 1 ) ) + call stdlib_wlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_wcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=czero )call stdlib_wunmhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_wgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & + czero, wv, ldwv ) + call stdlib_wlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_wgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & + czero, t, ldt ) + call stdlib_wlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_wgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & + czero, wv, ldwv ) + call stdlib_wlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + end subroutine stdlib_wlaqr3 + + !> ZLAQR4: implements one level of recursion for ZLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by ZLAQR0 and, for large enough + !> deflation window size, it may be called by ZLAQR3. This + !> subroutine is identical to ZLAQR0 except that it calls ZLAQR2 + !> instead of ZLAQR3. + !> ZLAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + pure subroutine stdlib_wlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(qp), intent(out) :: w(*), work(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(qp), parameter :: wilk1 = 0.75_qp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_wlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constant wilk1 is used to form the exceptional + ! . shifts. ==== + + + + ! Local Scalars + complex(qp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 + real(qp) :: s + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + complex(qp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,int,max,min,mod,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = cone + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_wlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_wlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & + z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_wlaqr2 ==== + call stdlib_wlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_wlaqr5, stdlib_wlaqr2) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + return + end if + ! ==== stdlib_wlahqr/stdlib_wlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_70: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_wlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, w, h( kv, 1 ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh, work,& + lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_wlaqr2 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_wlaqr2 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, ks + 1, -2 + w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) + w( i-1 ) = w( i ) + end do + else + ! ==== got ns/2 or fewer shifts? use stdlib_wlahqr + ! . on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_wlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + call stdlib_wlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( ks )& + , 1, 1, zdum,1, inf ) + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. scale to avoid + ! . overflows, underflows and subnormals. + ! . (the scale factor s can not be czero, + ! . because h(kbot,kbot-1) is nonzero.) ==== + if( ks>=kbot ) then + s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & + h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) + aa = h( kbot-1, kbot-1 ) / s + cc = h( kbot, kbot-1 ) / s + bb = h( kbot-1, kbot ) / s + dd = h( kbot, kbot ) / s + tr2 = ( aa+dd ) / two + det = ( aa-tr2 )*( dd-tr2 ) - bb*cc + rtdisc = sqrt( -det ) + w( kbot-1 ) = ( tr2+rtdisc )*s + w( kbot ) = ( tr2-rtdisc )*s + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( cabs1( w( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_70 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 80 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=qp) + end subroutine stdlib_wlaqr4 + + !> ZLAQR5:, called by ZLAQR0, performs a + !> single small-bulge multi-shift QR sweep. + + pure subroutine stdlib_wlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, & + ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & + ldz, n, nh, nshfts, nv + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(qp), intent(inout) :: h(ldh,*), s(*), z(ldz,*) + complex(qp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(qp), parameter :: rzero = 0.0_qp + real(qp), parameter :: rone = 1.0_qp + + + ! Local Scalars + complex(qp) :: alpha, beta, cdum, refsum + real(qp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp + integer(ilp) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,& + m, m22, mbot, mtop, nbmps, ndcol, ns, nu + logical(lk) :: accum, bmp22 + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,mod + ! Local Arrays + complex(qp) :: vt(3) + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== if there are no shifts, then there is nothing to do. ==== + if( nshfts<2 )return + ! ==== if the active block is empty or 1-by-1, then there + ! . is nothing to do. ==== + if( ktop>=kbot )return + ! ==== nshfts is supposed to be even, but if it is odd, + ! . then simply reduce it by cone. ==== + ns = nshfts - mod( nshfts, 2 ) + ! ==== machine constants for deflation ==== + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp) / ulp ) + ! ==== use accumulated reflections to update far-from-diagonal + ! . entries ? ==== + accum = ( kacc22==1 ) .or. ( kacc22==2 ) + ! ==== clear trash ==== + if( ktop+2<=kbot )h( ktop+2, ktop ) = czero + ! ==== nbmps = number of 2-shift bulges in the chain ==== + nbmps = ns / 2 + ! ==== kdu = width of slab ==== + kdu = 4*nbmps + ! ==== create and chase chains of nbmps bulges ==== + loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps + ! jtop = index from which updates from the right start. + if( accum ) then + jtop = max( ktop, incol ) + else if( wantt ) then + jtop = 1 + else + jtop = ktop + end if + ndcol = incol + kdu + if( accum )call stdlib_wlaset( 'ALL', kdu, kdu, czero, cone, u, ldu ) + ! ==== near-the-diagonal bulge chase. the following loop + ! . performs the near-the-diagonal part of a small bulge + ! . multi-shift qr sweep. each 4*nbmps column diagonal + ! . chunk extends from column incol to column ndcol + ! . (including both column incol and column ndcol). the + ! . following loop chases a 2*nbmps+1 column long chain of + ! . nbmps bulges 2*nbmps columns to the right. (incol + ! . may be less than ktop and and ndcol may be greater than + ! . kbot indicating phantom columns from which to chase + ! . bulges before they are actually introduced or to which + ! . to chase bulges beyond column kbot.) ==== + loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) + ! ==== bulges number mtop to mbot are active double implicit + ! . shift bulges. there may or may not also be small + ! . 2-by-2 bulge, if there is room. the inactive bulges + ! . (if any) must wait until the active bulges have moved + ! . down the diagonal to make room. the phantom matrix + ! . paradigm described above helps keep track. ==== + mtop = max( 1, ( ktop-krcol ) / 2+1 ) + mbot = min( nbmps, ( kbot-krcol-1 ) / 2 ) + m22 = mbot + 1 + bmp22 = ( mbot=ktop ) then + if( h( k+1, k )/=czero ) then + tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) + if( tst1==rzero ) then + if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) + end if + if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then + h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==rzero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( & + k+1, k ) = czero + end if + end if + end if + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + kms = k - incol + do j = max( 1, ktop-incol ), kdu + refsum = v( 1, m22 )*( u( j, kms+1 )+v( 2, m22 )*u( j, kms+2 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2, m22 ) ) + end do + else if( wantz ) then + do j = iloz, ihiz + refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*z( j, k+2 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2, m22 ) ) + end do + end if + end if + ! ==== normal case: chain of 3-by-3 reflections ==== + loop_80: do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + if( k==ktop-1 ) then + call stdlib_wlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),s( 2*m ), v( 1, m )& + ) + alpha = v( 1, m ) + call stdlib_wlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) ) + else + ! ==== perform delayed transformation of row below + ! . mth bulge. exploit fact that first two elements + ! . of row are actually czero. ==== + refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 ) + h( k+3, k ) = -refsum + h( k+3, k+1 ) = -refsum*conjg( v( 2, m ) ) + h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3, m ) ) + ! ==== calculate reflection to move + ! . mth bulge cone step. ==== + beta = h( k+1, k ) + v( 2, m ) = h( k+2, k ) + v( 3, m ) = h( k+3, k ) + call stdlib_wlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) ) + ! ==== a bulge may collapse because of vigilant + ! . deflation or destructive underflow. in the + ! . underflow case, try the two-small-subdiagonals + ! . trick to try to reinflate the bulge. ==== + if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero & + ) then + ! ==== typical case: not collapsed (yet). ==== + h( k+1, k ) = beta + h( k+2, k ) = czero + h( k+3, k ) = czero + else + ! ==== atypical case: collapsed. attempt to + ! . reintroduce ignoring h(k+1,k) and h(k+2,k). + ! . if the fill resulting from the new + ! . reflector is too large, then abandon it. + ! . otherwise, use the new cone. ==== + call stdlib_wlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),s( 2*m ), vt ) + + alpha = vt( 1 ) + call stdlib_wlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) ) + refsum = conjg( vt( 1 ) )*( h( k+1, k )+conjg( vt( 2 ) )*h( k+2, k ) ) + + if( cabs1( h( k+2, k )-refsum*vt( 2 ) )+cabs1( refsum*vt( 3 ) )>ulp*( & + cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) & + then + ! ==== starting a new bulge here would + ! . create non-negligible fill. use + ! . the old cone with trepidation. ==== + h( k+1, k ) = beta + h( k+2, k ) = czero + h( k+3, k ) = czero + else + ! ==== starting a new bulge here would + ! . create only negligible fill. + ! . replace the old reflector with + ! . the new cone. ==== + h( k+1, k ) = h( k+1, k ) - refsum + h( k+2, k ) = czero + h( k+3, k ) = czero + v( 1, m ) = vt( 1 ) + v( 2, m ) = vt( 2 ) + v( 3, m ) = vt( 3 ) + end if + end if + end if + ! ==== apply reflection from the right and + ! . the first column of update from the left. + ! . these updates are required for the vigilant + ! . deflation check. we still delay most of the + ! . updates from the left for efficiency. ==== + do j = jtop, min( kbot, k+3 ) + refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*h( j, k+2 )+v( 3, m )*h( j, k+3 & + ) ) + h( j, k+1 ) = h( j, k+1 ) - refsum + h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2, m ) ) + h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3, m ) ) + end do + ! ==== perform update from left for subsequent + ! . column. ==== + refsum = conjg( v( 1, m ) )*( h( k+1, k+1 )+conjg( v( 2, m ) )*h( k+2, k+1 )+& + conjg( v( 3, m ) )*h( k+3, k+1 ) ) + h( k+1, k+1 ) = h( k+1, k+1 ) - refsum + h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m ) + h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m ) + ! ==== the following convergence test requires that + ! . the tradition small-compared-to-nearby-diagonals + ! . criterion and the ahues + ! . criteria both be satisfied. the latter improves + ! . accuracy in some examples. falling back on an + ! . alternate convergence criterion when tst1 or tst2 + ! . is czero (as done here) is traditional but probably + ! . unnecessary. ==== + if( k=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) + end if + if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==rzero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,& + k ) = czero + end if + end if + end do loop_80 + ! ==== multiply h by reflections from the left ==== + if( accum ) then + jbot = min( ndcol, kbot ) + else if( wantt ) then + jbot = n + else + jbot = kbot + end if + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = max( ktop, krcol + 2*m ), jbot + refsum = conjg( v( 1, m ) )*( h( k+1, j )+conjg( v( 2, m ) )*h( k+2, j )+& + conjg( v( 3, m ) )*h( k+3, j ) ) + h( k+1, j ) = h( k+1, j ) - refsum + h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m ) + h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m ) + end do + end do + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + ! ==== accumulate u. (if needed, update z later + ! . with an efficient matrix-matrix + ! . multiply.) ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + kms = k - incol + i2 = max( 1, ktop-incol ) + i2 = max( i2, kms-(krcol-incol)+1 ) + i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 ) + do j = i2, i4 + refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*u( j, kms+2 )+v( 3, m )*u( & + j, kms+3 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2, m ) ) + u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3, m ) ) + end do + end do + else if( wantz ) then + ! ==== u is not accumulated, so update z + ! . now by multiplying by reflections + ! . from the right. ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = iloz, ihiz + refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*z( j, k+2 )+v( 3, m )*z( j, & + k+3 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2, m ) ) + z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3, m ) ) + end do + end do + end if + ! ==== end of near-the-diagonal bulge chase. ==== + end do loop_145 + ! ==== use u (if accumulated) to update far-from-diagonal + ! . entries in h. if required, use u to update z as + ! . well. ==== + if( accum ) then + if( wantt ) then + jtop = 1 + jbot = n + else + jtop = ktop + jbot = kbot + end if + k1 = max( 1, ktop-incol ) + nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1 + ! ==== horizontal multiply ==== + do jcol = min( ndcol, kbot ) + 1, jbot, nh + jlen = min( nh, jbot-jcol+1 ) + call stdlib_wgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,& + jcol ), ldh, czero, wh,ldwh ) + call stdlib_wlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) + + end do + ! ==== vertical multiply ==== + do jrow = jtop, max( ktop, incol ) - 1, nv + jlen = min( nv, max( ktop, incol )-jrow ) + call stdlib_wgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( & + k1, k1 ),ldu, czero, wv, ldwv ) + call stdlib_wlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) + + end do + ! ==== z multiply (also vertical) ==== + if( wantz ) then + do jrow = iloz, ihiz, nv + jlen = min( nv, ihiz-jrow+1 ) + call stdlib_wgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, & + u( k1, k1 ),ldu, czero, wv, ldwv ) + call stdlib_wlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) + + end do + end if + end if + end do loop_180 + end subroutine stdlib_wlaqr5 + + !> ZLAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_wlaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_wlaqsb + + !> ZLAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_wlaqsp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(qp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = j, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_wlaqsp + + !> ZLAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_wlaqsy( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: amax, scond + ! Array Arguments + real(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: thresh = 0.1e+0_qp + + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_qlamch( 'SAFE MINIMUM' ) / stdlib_qlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_wlaqsy + + !> ZLAQZ0: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by ZGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices, P and S are an upper triangular + !> matrices. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the unitary factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + + recursive subroutine stdlib_wlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, & + beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) + ! arguments + character, intent( in ) :: wants, wantq, wantz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec + integer(ilp), intent( out ) :: info + complex(qp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & + alpha( * ), beta( * ), work( * ) + real(qp), intent( out ) :: rwork( * ) + + + ! local scalars + real(qp) :: smlnum, ulp, safmin, safmax, c1, tempr + complex(qp) :: eshift, s1, temp + integer(ilp) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& + nibble, n_undeflated, n_qeflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & + istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & + rcost + logical(lk) :: ilschur, ilq, ilz + character :: jbcmpz*3 + if( stdlib_lsame( wants, 'E' ) ) then + ilschur = .false. + iwants = 1 + else if( stdlib_lsame( wants, 'S' ) ) then + ilschur = .true. + iwants = 2 + else + iwants = 0 + end if + if( stdlib_lsame( wantq, 'N' ) ) then + ilq = .false. + iwantq = 1 + else if( stdlib_lsame( wantq, 'V' ) ) then + ilq = .true. + iwantq = 2 + else if( stdlib_lsame( wantq, 'I' ) ) then + ilq = .true. + iwantq = 3 + else + iwantq = 0 + end if + if( stdlib_lsame( wantz, 'N' ) ) then + ilz = .false. + iwantz = 1 + else if( stdlib_lsame( wantz, 'V' ) ) then + ilz = .true. + iwantz = 2 + else if( stdlib_lsame( wantz, 'I' ) ) then + ilz = .true. + iwantz = 3 + else + iwantz = 0 + end if + ! check argument values + info = 0 + if( iwants==0 ) then + info = -1 + else if( iwantq==0 ) then + info = -2 + else if( iwantz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi= 2 ) then + call stdlib_whgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,& + ldq, z, ldz, work, lwork, rwork,info ) + return + end if + ! find out required workspace + ! workspace query to stdlib_wlaqz2 + nw = max( nwr, nmin ) + call stdlib_wlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & + n_undeflated, n_qeflated, alpha,beta, work, nw, work, nw, work, -1, rwork, rec,& + aed_info ) + itemp1 = int( work( 1 ),KIND=ilp) + ! workspace query to stdlib_wlaqz3 + call stdlib_wlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, & + ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1, sweep_info ) + itemp2 = int( work( 1 ),KIND=ilp) + lworkreq = max( itemp1+2*nw**2, itemp2+2*nbr**2 ) + if ( lwork ==-1 ) then + work( 1 ) = real( lworkreq,KIND=qp) + return + else if ( lwork < lworkreq ) then + info = -19 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLAQZ0', info ) + return + end if + ! initialize q and z + if( iwantq==3 ) call stdlib_wlaset( 'FULL', n, n, czero, cone, q,ldq ) + if( iwantz==3 ) call stdlib_wlaset( 'FULL', n, n, czero, cone, z,ldz ) + ! get machine constants + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp)/ulp ) + istart = ilo + istop = ihi + maxit = 30*( ihi-ilo+1 ) + ld = 0 + do iiter = 1, maxit + if( iiter >= maxit ) then + info = istop+1 + goto 80 + end if + if ( istart+1 >= istop ) then + istop = istart + exit + end if + ! check deflations at the end + if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( & + a( istop-1,istop-1 ) ) ) ) ) then + a( istop, istop-1 ) = czero + istop = istop-1 + ld = 0 + eshift = czero + end if + ! check deflations at the start + if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+& + abs( a( istart+1,istart+1 ) ) ) ) ) then + a( istart+1, istart ) = czero + istart = istart+1 + ld = 0 + eshift = czero + end if + if ( istart+1 >= istop ) then + exit + end if + ! check interior deflations + istart2 = istart + do k = istop, istart+1, -1 + if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & + ) ) ) ) then + a( k, k-1 ) = czero + istart2 = k + exit + end if + end do + ! get range to apply rotations to + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = istart2 + istopm = istop + end if + ! check infinite eigenvalues, this is done without blocking so might + ! slow down the method when many infinite eigenvalues are present + k = istop + do while ( k>=istart2 ) + tempr = zero + if( k < istop ) then + tempr = tempr+abs( b( k, k+1 ) ) + end if + if( k > istart2 ) then + tempr = tempr+abs( b( k-1, k ) ) + end if + if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then + ! a diagonal element of b is negligable, move it + ! to the top and deflate it + do k2 = k, istart2+1, -1 + call stdlib_wlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) + b( k2-1, k2 ) = temp + b( k2-1, k2-1 ) = czero + call stdlib_wrot( k2-2-istartm+1, b( istartm, k2 ), 1,b( istartm, k2-1 ), & + 1, c1, s1 ) + call stdlib_wrot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1, a( & + istartm, k2-1 ), 1, c1, s1 ) + if ( ilz ) then + call stdlib_wrot( n, z( 1, k2 ), 1, z( 1, k2-1 ), 1, c1,s1 ) + end if + if( k2= istop ) then + istop = istart2-1 + ld = 0 + eshift = czero + cycle + end if + nw = nwr + nshifts = nsr + nblock = nbr + if ( istop-istart2+1 < nmin ) then + ! setting nw to the size of the subblock will make aed deflate + ! all the eigenvalues. this is slightly more efficient than just + ! using qz_small because the off diagonal part gets updated via blas. + if ( istop-istart+1 < nmin ) then + nw = istop-istart+1 + istart2 = istart + else + nw = istop-istart2+1 + end if + end if + ! time for aed + call stdlib_wlaqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& + z, ldz, n_undeflated, n_qeflated,alpha, beta, work, nw, work( nw**2+1 ), nw,work( & + 2*nw**2+1 ), lwork-2*nw**2, rwork, rec,aed_info ) + if ( n_qeflated > 0 ) then + istop = istop-n_qeflated + ld = 0 + eshift = czero + end if + if ( 100*n_qeflated > nibble*( n_qeflated+n_undeflated ) .or.istop-istart2+1 < nmin & + ) then + ! aed has uncovered many eigenvalues. skip a qz sweep and run + ! aed again. + cycle + end if + ld = ld+1 + ns = min( nshifts, istop-istart2 ) + ns = min( ns, n_undeflated ) + shiftpos = istop-n_qeflated-n_undeflated+1 + if ( mod( ld, 6 ) == 0 ) then + ! exceptional shift. chosen for no particularly good reason. + if( ( real( maxit,KIND=qp)*safmin )*abs( a( istop,istop-1 ) ) ZLAQZ1: chases a 1x1 shift bulge in a matrix pencil down a single position + + pure subroutine stdlib_wlaqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & + q, ldq, nz, zstart, z, ldz ) + ! arguments + logical(lk), intent( in ) :: ilq, ilz + integer(ilp), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & + zstart, ihi + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + + + ! local variables + real(qp) :: c + complex(qp) :: s, temp + if( k+1 == ihi ) then + ! shift is located on the edge of the matrix, remove it + call stdlib_wlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp ) + b( ihi, ihi ) = temp + b( ihi, ihi-1 ) = czero + call stdlib_wrot( ihi-istartm, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c, s ) + + call stdlib_wrot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c, s ) + + if ( ilz ) then + call stdlib_wrot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c, s ) + + end if + else + ! normal operation, move bulge down + ! apply transformation from the right + call stdlib_wlartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp ) + b( k+1, k+1 ) = temp + b( k+1, k ) = czero + call stdlib_wrot( k+2-istartm+1, a( istartm, k+1 ), 1, a( istartm,k ), 1, c, s ) + + call stdlib_wrot( k-istartm+1, b( istartm, k+1 ), 1, b( istartm, k ),1, c, s ) + + if ( ilz ) then + call stdlib_wrot( nz, z( 1, k+1-zstart+1 ), 1, z( 1, k-zstart+1 ),1, c, s ) + + end if + ! apply transformation from the left + call stdlib_wlartg( a( k+1, k ), a( k+2, k ), c, s, temp ) + a( k+1, k ) = temp + a( k+2, k ) = czero + call stdlib_wrot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s ) + call stdlib_wrot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s ) + if ( ilq ) then + call stdlib_wrot( nq, q( 1, k+1-qstart+1 ), 1, q( 1, k+2-qstart+1 ), 1, c, conjg(& + s ) ) + end if + end if + end subroutine stdlib_wlaqz1 + + !> ZLAQZ2: performs AED + + recursive subroutine stdlib_wlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & + ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info ) + ! arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & + rec + complex(qp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & + alpha( * ), beta( * ) + integer(ilp), intent( out ) :: ns, nd, info + complex(qp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(out) :: rwork(*) + + + ! local scalars + integer(ilp) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ztgexc_info, ifst, ilst, & + lworkreq, qz_small_info + real(qp) ::smlnum, ulp, safmin, safmax, c1, tempr + complex(qp) :: s, s1, temp + info = 0 + ! set up deflation window + jw = min( nw, ihi-ilo+1 ) + kwtop = ihi-jw+1 + if ( kwtop == ilo ) then + s = czero + else + s = a( kwtop, kwtop-1 ) + end if + ! determine required workspace + ifst = 1 + ilst = jw + call stdlib_wlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1, rwork, rec+1, qz_small_info ) + lworkreq = int( work( 1 ),KIND=ilp)+2*jw**2 + lworkreq = max( lworkreq, n*nw, 2*nw**2+n ) + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = lworkreq + return + else if ( lwork < lworkreq ) then + info = -26 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLAQZ2', -info ) + return + end if + ! get machine constants + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_qlabad( safmin, safmax ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=qp)/ulp ) + if ( ihi == kwtop ) then + ! 1 by 1 deflation window, just try a regular deflation + alpha( kwtop ) = a( kwtop, kwtop ) + beta( kwtop ) = b( kwtop, kwtop ) + ns = 1 + nd = 0 + if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if ( kwtop > ilo ) then + a( kwtop, kwtop-1 ) = czero + end if + end if + end if + ! store window in case of convergence failure + call stdlib_wlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) + call stdlib_wlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2+1 ), jw ) + + ! transform window to real schur form + call stdlib_wlaset( 'FULL', jw, jw, czero, cone, qc, ldqc ) + call stdlib_wlaset( 'FULL', jw, jw, czero, cone, zc, ldzc ) + call stdlib_wlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2*jw**2+1 ), lwork-2*jw**2, rwork,rec+1, & + qz_small_info ) + if( qz_small_info /= 0 ) then + ! convergence failure, restore the window and exit + nd = 0 + ns = jw-qz_small_info + call stdlib_wlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) + call stdlib_wlacpy( 'ALL', jw, jw, work( jw**2+1 ), jw, b( kwtop,kwtop ), ldb ) + + return + end if + ! deflation detection loop + if ( kwtop == ilo .or. s == czero ) then + kwbot = kwtop-1 + else + kwbot = ihi + k = 1 + k2 = 1 + do while ( k <= jw ) + ! try to deflate eigenvalue + tempr = abs( a( kwbot, kwbot ) ) + if( tempr == zero ) then + tempr = abs( s ) + end if + if ( ( abs( s*qc( 1, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) & + then + ! deflatable + kwbot = kwbot-1 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_wtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ztgexc_info ) + k2 = k2+1 + end if + k = k+1 + end do + end if + ! store eigenvalues + nd = ihi-kwbot + ns = jw-nd + k = kwtop + do while ( k <= ihi ) + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + k = k+1 + end do + if ( kwtop /= ilo .and. s /= czero ) then + ! reflect spike back, this will create optimally packed bulges + a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1,1:jw-nd ) ) + do k = kwbot-1, kwtop, -1 + call stdlib_wlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) + a( k, kwtop-1 ) = temp + a( k+1, kwtop-1 ) = czero + k2 = max( kwtop, k-1 ) + call stdlib_wrot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) + call stdlib_wrot( ihi-( k-1 )+1, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) + + call stdlib_wrot( jw, qc( 1, k-kwtop+1 ), 1, qc( 1, k+1-kwtop+1 ),1, c1, conjg( & + s1 ) ) + end do + ! chase bulges down + istartm = kwtop + istopm = ihi + k = kwbot-1 + do while ( k >= kwtop ) + ! move bulge down and remove it + do k2 = k, kwbot-1 + call stdlib_wlaqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, & + ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc ) + end do + k = k-1 + end do + end if + ! apply qc and zc to rest of the matrix + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + if ( istopm-ihi > 0 ) then + call stdlib_wgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), & + lda, czero, work, jw ) + call stdlib_wlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) + call stdlib_wgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), & + ldb, czero, work, jw ) + call stdlib_wlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) + end if + if ( ilq ) then + call stdlib_wgemm( 'N', 'N', n, jw, jw, cone, q( 1, kwtop ), ldq, qc,ldqc, czero, & + work, n ) + call stdlib_wlacpy( 'ALL', n, jw, work, n, q( 1, kwtop ), ldq ) + end if + if ( kwtop-1-istartm+1 > 0 ) then + call stdlib_wgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, & + zc, ldzc, czero, work,kwtop-istartm ) + call stdlib_wlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )& + , lda ) + call stdlib_wgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, & + zc, ldzc, czero, work,kwtop-istartm ) + call stdlib_wlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )& + , ldb ) + end if + if ( ilz ) then + call stdlib_wgemm( 'N', 'N', n, jw, jw, cone, z( 1, kwtop ), ldz, zc,ldzc, czero, & + work, n ) + call stdlib_wlacpy( 'ALL', n, jw, work, n, z( 1, kwtop ), ldz ) + end if + end subroutine stdlib_wlaqz2 + + !> ZLAQZ3: Executes a single multishift QZ sweep + + pure subroutine stdlib_wlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_qesired, alpha,& + beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info ) + ! function arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & + nblock_qesired, ldqc, ldzc + complex(qp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), qc( & + ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * ) + integer(ilp), intent( out ) :: info + + + ! local scalars + integer(ilp) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & + ishift, nblock, npos + real(qp) :: safmin, safmax, c, scale + complex(qp) :: temp, temp2, temp3, s + info = 0 + if ( nblock_qesired < nshifts+1 ) then + info = -8 + end if + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = n*nblock_qesired + return + else if ( lwork < n*nblock_qesired ) then + info = -25 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLAQZ3', -info ) + return + end if + ! executable statements + ! get machine constants + safmin = stdlib_qlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_qlabad( safmin, safmax ) + if ( ilo >= ihi ) then + return + end if + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + ns = nshifts + npos = max( nblock_qesired-ns, 1 ) + ! the following block introduces the shifts and chases + ! them down one by one just enough to make space for + ! the other shifts. the near-the-diagonal block is + ! of size (ns+1) x ns. + call stdlib_wlaset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc ) + call stdlib_wlaset( 'FULL', ns, ns, czero, cone, zc, ldzc ) + do i = 1, ns + ! introduce the shift + scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) ) + if( scale >= safmin .and. scale <= safmax ) then + alpha( i ) = alpha( i )/scale + beta( i ) = beta( i )/scale + end if + temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo ) + temp3 = beta( i )*a( ilo+1, ilo ) + if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then + temp2 = cone + temp3 = czero + end if + call stdlib_wlartg( temp2, temp3, c, s, temp ) + call stdlib_wrot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s ) + call stdlib_wrot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s ) + call stdlib_wrot( ns+1, qc( 1, 1 ), 1, qc( 1, 2 ), 1, c,conjg( s ) ) + ! chase the shift down + do j = 1, ns-i + call stdlib_wlaqz1( .true., .true., j, 1, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & + ilo, ilo ), ldb, ns+1, 1, qc,ldqc, ns, 1, zc, ldzc ) + end do + end do + ! update the rest of the pencil + ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) + ! from the left with qc(1:ns+1,1:ns+1)' + sheight = ns+1 + swidth = istopm-( ilo+ns )+1 + if ( swidth > 0 ) then + call stdlib_wgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+& + ns ), lda, czero, work, sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) + + call stdlib_wgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+& + ns ), ldb, czero, work, sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) + + end if + if ( ilq ) then + call stdlib_wgemm( 'N', 'N', n, sheight, sheight, cone, q( 1, ilo ),ldq, qc, ldqc, & + czero, work, n ) + call stdlib_wlacpy( 'ALL', n, sheight, work, n, q( 1, ilo ), ldq ) + end if + ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) + ! from the right with zc(1:ns,1:ns) + sheight = ilo-1-istartm+1 + swidth = ns + if ( sheight > 0 ) then + call stdlib_wgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, & + zc, ldzc, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) + + call stdlib_wgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, & + zc, ldzc, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) + + end if + if ( ilz ) then + call stdlib_wgemm( 'N', 'N', n, swidth, swidth, cone, z( 1, ilo ),ldz, zc, ldzc, & + czero, work, n ) + call stdlib_wlacpy( 'ALL', n, swidth, work, n, z( 1, ilo ), ldz ) + end if + ! the following block chases the shifts down to the bottom + ! right block. if possible, a shift is moved down npos + ! positions at a time + k = ilo + do while ( k < ihi-ns ) + np = min( ihi-ns-k, npos ) + ! size of the near-the-diagonal block + nblock = ns+np + ! istartb points to the first row we will be updating + istartb = k+1 + ! istopb points to the last column we will be updating + istopb = k+nblock-1 + call stdlib_wlaset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc ) + call stdlib_wlaset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc ) + ! near the diagonal shift chase + do i = ns-1, 0, -1 + do j = 0, np-1 + ! move down the block with index k+i+j, updating + ! the (ns+np x ns+np) block: + ! (k:k+ns+np,k:k+ns+np-1) + call stdlib_wlaqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, & + ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(k+1:k+ns+np, k+ns+np:istopm) and + ! b(k+1:k+ns+np, k+ns+np:istopm) + ! from the left with qc(1:ns+np,1:ns+np)' + sheight = ns+np + swidth = istopm-( k+ns+np )+1 + if ( swidth > 0 ) then + call stdlib_wgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+& + ns+np ), lda, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & + ) + call stdlib_wgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+& + ns+np ), ldb, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & + ) + end if + if ( ilq ) then + call stdlib_wgemm( 'N', 'N', n, nblock, nblock, cone, q( 1, k+1 ),ldq, qc, ldqc, & + czero, work, n ) + call stdlib_wlacpy( 'ALL', n, nblock, work, n, q( 1, k+1 ), ldq ) + end if + ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) + ! from the right with zc(1:ns+np,1:ns+np) + sheight = k-istartm+1 + swidth = nblock + if ( sheight > 0 ) then + call stdlib_wgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, & + zc, ldzc, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) + + call stdlib_wgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, & + zc, ldzc, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) + + end if + if ( ilz ) then + call stdlib_wgemm( 'N', 'N', n, nblock, nblock, cone, z( 1, k ),ldz, zc, ldzc, & + czero, work, n ) + call stdlib_wlacpy( 'ALL', n, nblock, work, n, z( 1, k ), ldz ) + end if + k = k+np + end do + ! the following block removes the shifts from the bottom right corner + ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). + call stdlib_wlaset( 'FULL', ns, ns, czero, cone, qc, ldqc ) + call stdlib_wlaset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc ) + ! istartb points to the first row we will be updating + istartb = ihi-ns+1 + ! istopb points to the last column we will be updating + istopb = ihi + do i = 1, ns + ! chase the shift down to the bottom right corner + do ishift = ihi-i, ihi-1 + call stdlib_wlaqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & + ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(ihi-ns+1:ihi, ihi+1:istopm) + ! from the left with qc(1:ns,1:ns)' + sheight = ns + swidth = istopm-( ihi+1 )+1 + if ( swidth > 0 ) then + call stdlib_wgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, & + ihi+1 ), lda, czero, work, sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & + ) + call stdlib_wgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, & + ihi+1 ), ldb, czero, work, sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & + ) + end if + if ( ilq ) then + call stdlib_wgemm( 'N', 'N', n, ns, ns, cone, q( 1, ihi-ns+1 ), ldq,qc, ldqc, czero,& + work, n ) + call stdlib_wlacpy( 'ALL', n, ns, work, n, q( 1, ihi-ns+1 ), ldq ) + end if + ! update a(istartm:ihi-ns,ihi-ns:ihi) + ! from the right with zc(1:ns+1,1:ns+1) + sheight = ihi-ns-istartm+1 + swidth = ns+1 + if ( sheight > 0 ) then + call stdlib_wgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), & + lda, zc, ldzc, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & + ) + call stdlib_wgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), & + ldb, zc, ldzc, czero, work,sheight ) + call stdlib_wlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & + ) + end if + if ( ilz ) then + call stdlib_wgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1, ihi-ns ), ldz,zc, ldzc, & + czero, work, n ) + call stdlib_wlacpy( 'ALL', n, ns+1, work, n, z( 1, ihi-ns ), ldz ) + end if + end subroutine stdlib_wlaqz3 + + !> ZLAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + + pure subroutine stdlib_wlar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc, & + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1, bn, n + integer(ilp), intent(out) :: negcnt + integer(ilp), intent(inout) :: r + real(qp), intent(in) :: gaptol, lambda, pivmin + real(qp), intent(out) :: mingma, nrminv, resid, rqcorr, ztz + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*) + real(qp), intent(in) :: d(*), l(*), ld(*), lld(*) + real(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: z(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: sawnan1, sawnan2 + integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2 + real(qp) :: dminus, dplus, eps, s, tmp + ! Intrinsic Functions + intrinsic :: abs,real + ! Executable Statements + eps = stdlib_qlamch( 'PRECISION' ) + if( r==0 ) then + r1 = b1 + r2 = bn + else + r1 = r + r2 = r + end if + ! storage for lplus + indlpl = 0 + ! storage for uminus + indumn = n + inds = 2*n + 1 + indp = 3*n + 1 + if( b1==1 ) then + work( inds ) = zero + else + work( inds+b1-1 ) = lld( b1-1 ) + end if + ! compute the stationary transform (using the differential form) + ! until the index r2. + sawnan1 = .false. + neg1 = 0 + s = work( inds+b1-1 ) - lambda + do i = b1, r1 - 1 + dplus = d( i ) + s + work( indlpl+i ) = ld( i ) / dplus + if(dplus ZLAR2V: applies a vector of complex plane rotations with real cosines + !> from both sides to a sequence of 2-by-2 complex Hermitian matrices, + !> defined by the elements of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := + !> ( conjg(z(i)) y(i) ) + !> ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) + !> ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) + + pure subroutine stdlib_wlar2v( n, x, y, z, incx, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, n + ! Array Arguments + real(qp), intent(in) :: c(*) + complex(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: x(*), y(*), z(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix + real(qp) :: ci, sii, sir, t1i, t1r, t5, t6, xi, yi, zii, zir + complex(qp) :: si, t2, t3, t4, zi + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag + ! Executable Statements + ix = 1 + ic = 1 + do i = 1, n + xi = real( x( ix ),KIND=qp) + yi = real( y( ix ),KIND=qp) + zi = z( ix ) + zir = real( zi,KIND=qp) + zii = aimag( zi ) + ci = c( ic ) + si = s( ic ) + sir = real( si,KIND=qp) + sii = aimag( si ) + t1r = sir*zir - sii*zii + t1i = sir*zii + sii*zir + t2 = ci*zi + t3 = t2 - conjg( si )*xi + t4 = conjg( t2 ) + si*yi + t5 = ci*xi + t1r + t6 = ci*yi - t1r + x( ix ) = ci*t5 + ( sir*real( t4,KIND=qp)+sii*aimag( t4 ) ) + y( ix ) = ci*t6 - ( sir*real( t3,KIND=qp)-sii*aimag( t3 ) ) + z( ix ) = ci*t3 + conjg( si )*cmplx( t6, t1i,KIND=qp) + ix = ix + incx + ic = ic + incc + end do + return + end subroutine stdlib_wlar2v + + !> ZLARCM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by M and real; B is M by N and complex; + !> C is M by N and complex. + + pure subroutine stdlib_wlarcm( m, n, a, lda, b, ldb, c, ldc, rwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + ! Array Arguments + real(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: b(ldb,*) + complex(qp), intent(out) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + ! quick return if possible. + if( ( m==0 ) .or. ( n==0 ) )return + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = real( b( i, j ),KIND=qp) + end do + end do + l = m*n + 1 + call stdlib_qgemm( 'N', 'N', m, n, m, one, a, lda, rwork, m, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = rwork( l+( j-1 )*m+i-1 ) + end do + end do + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = aimag( b( i, j ) ) + end do + end do + call stdlib_qgemm( 'N', 'N', m, n, m, one, a, lda, rwork, m, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = cmplx( real( c( i, j ),KIND=qp),rwork( l+( j-1 )*m+i-1 ),KIND=qp) + + end do + end do + return + end subroutine stdlib_wlarcm + + !> ZLARF: applies a complex elementary reflector H to a complex M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H, supply conjg(tau) instead + !> tau. + + pure subroutine stdlib_wlarf( side, m, n, v, incv, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, ldc, m, n + complex(qp), intent(in) :: tau + ! Array Arguments + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(in) :: v(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: applyleft + integer(ilp) :: i, lastv, lastc + ! Executable Statements + applyleft = stdlib_lsame( side, 'L' ) + lastv = 0 + lastc = 0 + if( tau/=czero ) then + ! set up variables for scanning v. lastv begins pointing to the end + ! of v. + if( applyleft ) then + lastv = m + else + lastv = n + end if + if( incv>0 ) then + i = 1 + (lastv-1) * incv + else + i = 1 + end if + ! look for the last non-czero row in v. + do while( lastv>0 .and. v( i )==czero ) + lastv = lastv - 1 + i = i - incv + end do + if( applyleft ) then + ! scan for the last non-czero column in c(1:lastv,:). + lastc = stdlib_ilawlc(lastv, n, c, ldc) + else + ! scan for the last non-czero row in c(:,1:lastv). + lastc = stdlib_ilawlr(m, lastv, c, ldc) + end if + end if + ! note that lastc.eq.0_qp renders the blas operations null; no special + ! case is needed at this level. + if( applyleft ) then + ! form h * c + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastv,1:lastc)**h * v(1:lastv,1) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', lastv, lastc, cone,c, ldc, v, incv, & + czero, work, 1 ) + ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**h + call stdlib_wgerc( lastv, lastc, -tau, v, incv, work, 1, c, ldc ) + end if + else + ! form c * h + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1) + call stdlib_wgemv( 'NO TRANSPOSE', lastc, lastv, cone, c, ldc,v, incv, czero, & + work, 1 ) + ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**h + call stdlib_wgerc( lastc, lastv, -tau, work, 1, v, incv, c, ldc ) + end if + end if + return + end subroutine stdlib_wlarf + + !> ZLARFB: applies a complex block reflector H or its transpose H**H to a + !> complex M-by-N matrix C, from either the left or the right. + + pure subroutine stdlib_wlarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, ldc, & + work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(in) :: t(ldt,*), v(ldv,*) + complex(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'C' + else + transt = 'N' + end if + if( stdlib_lsame( storev, 'C' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 ) (first k rows) + ! ( v2 ) + ! where v1 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v = (c1**h * v1 + c2**h * v2) (stored in work) + ! w := c1**h + do j = 1, k + call stdlib_wcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_wlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v, & + ldv, work, ldwork ) + if( m>k ) then + ! w := w + c2**h * v2 + call stdlib_wgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', n,k, m-k, cone, & + c( k+1, 1 ), ldc,v( k+1, 1 ), ldv, cone, work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_wtrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v * w**h + if( m>k ) then + ! c2 := c2 - v2 * w**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',m-k, n, k, -cone, & + v( k+1, 1 ), ldv, work,ldwork, cone, c( k+1, 1 ), ldc ) + end if + ! w := w * v1**h + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v, ldv, work, ldwork ) + ! c1 := c1 - w**h + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_wcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v, & + ldv, work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,cone, c( 1, k+& + 1 ), ldc, v( k+1, 1 ), ldv,cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_wtrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v**h + if( n>k ) then + ! c2 := c2 - w * v2**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,n-k, k, -cone, & + work, ldwork, v( k+1, 1 ),ldv, cone, c( 1, k+1 ), ldc ) + end if + ! w := w * v1**h + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v, ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 ) + ! ( v2 ) (last k rows) + ! where v2 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v = (c1**h * v1 + c2**h * v2) (stored in work) + ! w := c2**h + do j = 1, k + call stdlib_wcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_wlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v( m-& + k+1, 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**h * v1 + call stdlib_wgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', n,k, m-k, cone, & + c, ldc, v, ldv, cone, work,ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_wtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v * w**h + if( m>k ) then + ! c1 := c1 - v1 * w**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',m-k, n, k, -cone, & + v, ldv, work, ldwork,cone, c, ldc ) + end if + ! w := w * v2**h + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v( m-k+1, 1 ), ldv, work,ldwork ) + ! c2 := c2 - w**h + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) -conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_wcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v( n-& + k+1, 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,cone, c, ldc, & + v, ldv, cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_wtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v**h + if( n>k ) then + ! c1 := c1 - w * v1**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,n-k, k, -cone, & + work, ldwork, v, ldv, cone,c, ldc ) + end if + ! w := w * v2**h + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v( n-k+1, 1 ), ldv, work,ldwork ) + ! c2 := c2 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + else if( stdlib_lsame( storev, 'R' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 v2 ) (v1: first k columns) + ! where v1 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v**h = (c1**h * v1**h + c2**h * v2**h) (stored in work) + ! w := c1**h + do j = 1, k + call stdlib_wcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_wlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v1**h + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v, ldv, work, ldwork ) + if( m>k ) then + ! w := w + c2**h * v2**h + call stdlib_wgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', n, k, m-k, & + cone,c( k+1, 1 ), ldc, v( 1, k+1 ), ldv, cone,work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_wtrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v**h * w**h + if( m>k ) then + ! c2 := c2 - v2**h * w**h + call stdlib_wgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', m-k, n, k, & + -cone,v( 1, k+1 ), ldv, work, ldwork, cone,c( k+1, 1 ), ldc ) + end if + ! w := w * v1 + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v, & + ldv, work, ldwork ) + ! c1 := c1 - w**h + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v**h = (c1*v1**h + c2*v2**h) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_wcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1**h + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v, ldv, work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,k, n-k, cone, & + c( 1, k+1 ), ldc,v( 1, k+1 ), ldv, cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_wtrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c2 := c2 - w * v2 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-cone, work, & + ldwork, v( 1, k+1 ), ldv, cone,c( 1, k+1 ), ldc ) + end if + ! w := w * v1 + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v, & + ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 v2 ) (v2: last k columns) + ! where v2 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v**h = (c1**h * v1**h + c2**h * v2**h) (stored in work) + ! w := c2**h + do j = 1, k + call stdlib_wcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_wlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v2**h + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v( 1, m-k+1 ), ldv, work,ldwork ) + if( m>k ) then + ! w := w + c1**h * v1**h + call stdlib_wgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', n, k, m-k, & + cone, c,ldc, v, ldv, cone, work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_wtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v**h * w**h + if( m>k ) then + ! c1 := c1 - v1**h * w**h + call stdlib_wgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', m-k, n, k, & + -cone, v,ldv, work, ldwork, cone, c, ldc ) + end if + ! w := w * v2 + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v( 1, & + m-k+1 ), ldv, work, ldwork ) + ! c2 := c2 - w**h + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) -conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v**h = (c1*v1**h + c2*v2**h) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_wcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2**h + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v( 1, n-k+1 ), ldv, work,ldwork ) + if( n>k ) then + ! w := w + c1 * v1**h + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,k, n-k, cone, & + c, ldc, v, ldv, cone, work,ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_wtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c1 := c1 - w * v1 + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-cone, work, & + ldwork, v, ldv, cone, c, ldc ) + end if + ! w := w * v2 + call stdlib_wtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v( 1, & + n-k+1 ), ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + end if + return + end subroutine stdlib_wlarfb + + !> ZLARFB_GETT: applies a complex Householder block reflector H from the + !> left to a complex (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + + pure subroutine stdlib_wlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ident + integer(ilp), intent(in) :: k, lda, ldb, ldt, ldwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(in) :: t(ldt,*) + complex(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnotident + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<0 .or. n<=0 .or. k==0 .or. k>n )return + lnotident = .not.stdlib_lsame( ident, 'I' ) + ! ------------------------------------------------------------------ + ! first step. computation of the column block 2: + ! ( a2 ) := h * ( a2 ) + ! ( b2 ) ( b2 ) + ! ------------------------------------------------------------------ + if( n>k ) then + ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n) + ! into w2=work(1:k, 1:n-k) column-by-column. + do j = 1, n-k + call stdlib_wcopy( k, a( 1, k+j ), 1, work( 1, j ), 1 ) + end do + if( lnotident ) then + ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2, + ! v1 is not an identy matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_wtrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork ) + + end if + ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2 + ! v2 stored in b1. + if( m>0 ) then + call stdlib_wgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1, k+1 ), ldb, cone, & + work, ldwork ) + end if + ! col2_(4) compute w2: = t * w2, + ! t is upper-triangular. + call stdlib_wtrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork ) + ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2, + ! v2 stored in b1. + if( m>0 ) then + call stdlib_wgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1, & + k+1 ), ldb ) + end if + if( lnotident ) then + ! col2_(6) compute w2: = v1 * w2 = a1 * w2, + ! v1 is not an identity matrix, but unit lower-triangular, + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_wtrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork ) + + end if + ! col2_(7) compute a2: = a2 - w2 = + ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k), + ! column-by-column. + do j = 1, n-k + do i = 1, k + a( i, k+j ) = a( i, k+j ) - work( i, j ) + end do + end do + end if + ! ------------------------------------------------------------------ + ! second step. computation of the column block 1: + ! ( a1 ) := h * ( a1 ) + ! ( b1 ) ( 0 ) + ! ------------------------------------------------------------------ + ! col1_(1) compute w1: = a1. copy the upper-triangular + ! a1 = a(1:k, 1:k) into the upper-triangular + ! w1 = work(1:k, 1:k) column-by-column. + do j = 1, k + call stdlib_wcopy( j, a( 1, j ), 1, work( 1, j ), 1 ) + end do + ! set the subdiagonal elements of w1 to zero column-by-column. + do j = 1, k - 1 + do i = j + 1, k + work( i, j ) = czero + end do + end do + if( lnotident ) then + ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_wtrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork ) + end if + ! col1_(3) compute w1: = t * w1, + ! t is upper-triangular, + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_wtrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork ) + ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1, + ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal. + if( m>0 ) then + call stdlib_wtrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb ) + end if + if( lnotident ) then + ! col1_(5) compute w1: = v1 * w1 = a1 * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular on input with zeroes below the diagonal, + ! and square on output. + call stdlib_wtrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork ) + ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k) + ! column-by-column. a1 is upper-triangular on input. + ! if ident, a1 is square on output, and w1 is square, + ! if not ident, a1 is upper-triangular on output, + ! w1 is upper-triangular. + ! col1_(6)_a compute elements of a1 below the diagonal. + do j = 1, k - 1 + do i = j + 1, k + a( i, j ) = - work( i, j ) + end do + end do + end if + ! col1_(6)_b compute elements of a1 on and above the diagonal. + do j = 1, k + do i = 1, j + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + return + end subroutine stdlib_wlarfb_gett + + !> ZLARFG: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, with beta real, and x is an + !> (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + !> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + pure subroutine stdlib_wlarfg( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + complex(qp), intent(inout) :: alpha + complex(qp), intent(out) :: tau + ! Array Arguments + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(qp) :: alphi, alphr, beta, rsafmn, safmin, xnorm + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_qznrm2( n-1, x, incx ) + alphr = real( alpha,KIND=qp) + alphi = aimag( alpha ) + if( xnorm==zero .and. alphi==zero ) then + ! h = i + tau = zero + else + ! general case + beta = -sign( stdlib_qlapy3( alphr, alphi, xnorm ), alphr ) + safmin = stdlib_qlamch( 'S' ) / stdlib_qlamch( 'E' ) + rsafmn = one / safmin + knt = 0 + if( abs( beta ) ZLARFGP: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is real and non-negative, and + !> x is an (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + + subroutine stdlib_wlarfgp( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + complex(qp), intent(inout) :: alpha + complex(qp), intent(out) :: tau + ! Array Arguments + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(qp) :: alphi, alphr, beta, bignum, smlnum, xnorm + complex(qp) :: savealpha + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_qznrm2( n-1, x, incx ) + alphr = real( alpha,KIND=qp) + alphi = aimag( alpha ) + if( xnorm==zero ) then + ! h = [1-alpha/abs(alpha) 0; 0 i], sign chosen so alpha >= 0. + if( alphi==zero ) then + if( alphr>=zero ) then + ! when tau.eq.zero, the vector is special-cased to be + ! all zeros in the application routines. we do not need + ! to clear it. + tau = zero + else + ! however, the application routines rely on explicit + ! zero checks when tau.ne.zero, and we must clear x. + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + alpha = -alpha + end if + else + ! only "reflecting" the diagonal entry to be real and non-negative. + xnorm = stdlib_qlapy2( alphr, alphi ) + tau = cmplx( one - alphr / xnorm, -alphi / xnorm,KIND=qp) + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + alpha = xnorm + end if + else + ! general case + beta = sign( stdlib_qlapy3( alphr, alphi, xnorm ), alphr ) + smlnum = stdlib_qlamch( 'S' ) / stdlib_qlamch( 'E' ) + bignum = one / smlnum + knt = 0 + if( abs( beta )=zero ) then + tau = zero + else + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + beta = real( -savealpha,KIND=qp) + end if + else + xnorm = stdlib_qlapy2( alphr, alphi ) + tau = cmplx( one - alphr / xnorm, -alphi / xnorm,KIND=qp) + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + beta = xnorm + end if + else + ! this is the general case. + call stdlib_wscal( n-1, alpha, x, incx ) + end if + ! if beta is subnormal, it may lose relative accuracy + do j = 1, knt + beta = beta*smlnum + end do + alpha = beta + end if + return + end subroutine stdlib_wlarfgp + + !> ZLARFT: forms the triangular factor T of a complex block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + + pure subroutine stdlib_wlarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + complex(qp), intent(out) :: t(ldt,*) + complex(qp), intent(in) :: tau(*), v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, prevlastv, lastv + ! Executable Statements + ! quick return if possible + if( n==0 )return + if( stdlib_lsame( direct, 'F' ) ) then + prevlastv = n + do i = 1, k + prevlastv = max( prevlastv, i ) + if( tau( i )==czero ) then + ! h(i) = i + do j = 1, i + t( j, i ) = czero + end do + else + ! general case + if( stdlib_lsame( storev, 'C' ) ) then + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( lastv, i )/=czero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * conjg( v( i , j ) ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**h * v(i:j,i) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', j-i, i-1,-tau( i ), v( i+1, 1 ), & + ldv,v( i+1, i ), 1, cone, t( 1, i ), 1 ) + else + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( i, lastv )/=czero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( j , i ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**h + call stdlib_wgemm( 'N', 'C', i-1, 1, j-i, -tau( i ),v( 1, i+1 ), ldv, v( i,& + i+1 ), ldv,cone, t( 1, i ), ldt ) + end if + ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i) + call stdlib_wtrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', i-1, t,ldt, t( 1, i ),& + 1 ) + t( i, i ) = tau( i ) + if( i>1 ) then + prevlastv = max( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + end do + else + prevlastv = 1 + do i = k, 1, -1 + if( tau( i )==czero ) then + ! h(i) = i + do j = i, k + t( j, i ) = czero + end do + else + ! general case + if( i1 ) then + prevlastv = min( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + t( i, i ) = tau( i ) + end if + end do + end if + return + end subroutine stdlib_wlarft + + !> ZLARFX: applies a complex elementary reflector H to a complex m by n + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix + !> This version uses inline code if H has order < 11. + + pure subroutine stdlib_wlarfx( side, m, n, v, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: ldc, m, n + complex(qp), intent(in) :: tau + ! Array Arguments + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(in) :: v(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j + complex(qp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, & + v6, v7, v8, v9 + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( tau==czero )return + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c, where h has order m. + go to ( 10, 30, 50, 70, 90, 110, 130, 150,170, 190 )m + ! code for general m + call stdlib_wlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 10 continue + ! special code for 1 x 1 householder + t1 = cone - tau*v( 1 )*conjg( v( 1 ) ) + do j = 1, n + c( 1, j ) = t1*c( 1, j ) + end do + go to 410 + 30 continue + ! special code for 2 x 2 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + end do + go to 410 + 50 continue + ! special code for 3 x 3 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + end do + go to 410 + 70 continue + ! special code for 4 x 4 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + end do + go to 410 + 90 continue + ! special code for 5 x 5 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + end do + go to 410 + 110 continue + ! special code for 6 x 6 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + end do + go to 410 + 130 continue + ! special code for 7 x 7 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + end do + go to 410 + 150 continue + ! special code for 8 x 8 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + end do + go to 410 + 170 continue + ! special code for 9 x 9 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + v9 = conjg( v( 9 ) ) + t9 = tau*conjg( v9 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + end do + go to 410 + 190 continue + ! special code for 10 x 10 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + v9 = conjg( v( 9 ) ) + t9 = tau*conjg( v9 ) + v10 = conjg( v( 10 ) ) + t10 = tau*conjg( v10 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) +v10*c( 10, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + c( 10, j ) = c( 10, j ) - sum*t10 + end do + go to 410 + else + ! form c * h, where h has order n. + go to ( 210, 230, 250, 270, 290, 310, 330, 350,370, 390 )n + ! code for general n + call stdlib_wlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 210 continue + ! special code for 1 x 1 householder + t1 = cone - tau*v( 1 )*conjg( v( 1 ) ) + do j = 1, m + c( j, 1 ) = t1*c( j, 1 ) + end do + go to 410 + 230 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + end do + go to 410 + 250 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + end do + go to 410 + 270 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + end do + go to 410 + 290 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + end do + go to 410 + 310 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + end do + go to 410 + 330 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + end do + go to 410 + 350 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + end do + go to 410 + 370 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + v9 = v( 9 ) + t9 = tau*conjg( v9 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + end do + go to 410 + 390 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + v9 = v( 9 ) + t9 = tau*conjg( v9 ) + v10 = v( 10 ) + t10 = tau*conjg( v10 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) +v10*c( j, 10 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + c( j, 10 ) = c( j, 10 ) - sum*t10 + end do + go to 410 + end if + 410 continue + return + end subroutine stdlib_wlarfx + + !> ZLARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n Hermitian matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + + pure subroutine stdlib_wlarfy( uplo, n, v, incv, tau, c, ldc, work ) + ! -- lapack test routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv, ldc, n + complex(qp), intent(in) :: tau + ! Array Arguments + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(in) :: v(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + complex(qp) :: alpha + ! Executable Statements + if( tau==czero )return + ! form w:= c * v + call stdlib_whemv( uplo, n, cone, c, ldc, v, incv, czero, work, 1 ) + alpha = -chalf*tau*stdlib_wdotc( n, work, 1, v, incv ) + call stdlib_waxpy( n, alpha, v, incv, work, 1 ) + ! c := c - v * w' - w * v' + call stdlib_wher2( uplo, n, -tau, v, incv, work, 1, c, ldc ) + return + end subroutine stdlib_wlarfy + + !> ZLARGV: generates a vector of complex plane rotations with real + !> cosines, determined by elements of the complex vectors x and y. + !> For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( r(i) ) + !> ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) + !> where c(i)**2 + ABS(s(i))**2 = 1 + !> The following conventions are used (these are the same as in ZLARTG, + !> but differ from the BLAS1 routine ZROTG): + !> If y(i)=0, then c(i)=1 and s(i)=0. + !> If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. + + pure subroutine stdlib_wlargv( n, x, incx, y, incy, c, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(qp), intent(out) :: c(*) + complex(qp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + + ! Local Scalars + ! logical first + integer(ilp) :: count, i, ic, ix, iy, j + real(qp) :: cs, d, di, dr, eps, f2, f2s, g2, g2s, safmin, safmn2, safmx2, scale + complex(qp) :: f, ff, fs, g, gs, r, sn + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,int,log,max,sqrt + ! Statement Functions + real(qp) :: abs1, abssq + ! Save Statement + ! save first, safmx2, safmin, safmn2 + ! Data Statements + ! data first / .true. / + ! Statement Function Definitions + abs1( ff ) = max( abs( real( ff,KIND=qp) ), abs( aimag( ff ) ) ) + abssq( ff ) = real( ff,KIND=qp)**2 + aimag( ff )**2 + ! Executable Statements + ! if( first ) then + ! first = .false. + safmin = stdlib_qlamch( 'S' ) + eps = stdlib_qlamch( 'E' ) + safmn2 = stdlib_qlamch( 'B' )**int( log( safmin / eps ) /log( stdlib_qlamch( 'B' ) )& + / two,KIND=ilp) + safmx2 = one / safmn2 + ! end if + ix = 1 + iy = 1 + ic = 1 + loop_60: do i = 1, n + f = x( ix ) + g = y( iy ) + ! use identical algorithm as in stdlib_wlartg + scale = max( abs1( f ), abs1( g ) ) + fs = f + gs = g + count = 0 + if( scale>=safmx2 ) then + 10 continue + count = count + 1 + fs = fs*safmn2 + gs = gs*safmn2 + scale = scale*safmn2 + if( scale>=safmx2 .and. count < 20 )go to 10 + else if( scale<=safmn2 ) then + if( g==czero ) then + cs = one + sn = czero + r = f + go to 50 + end if + 20 continue + count = count - 1 + fs = fs*safmx2 + gs = gs*safmx2 + scale = scale*safmx2 + if( scale<=safmn2 )go to 20 + end if + f2 = abssq( fs ) + g2 = abssq( gs ) + if( f2<=max( g2, one )*safmin ) then + ! this is a rare case: f is very small. + if( f==czero ) then + cs = zero + r = stdlib_qlapy2( real( g,KIND=qp), aimag( g ) ) + ! do complex/real division explicitly with two real + ! divisions + d = stdlib_qlapy2( real( gs,KIND=qp), aimag( gs ) ) + sn = cmplx( real( gs,KIND=qp) / d, -aimag( gs ) / d,KIND=qp) + go to 50 + end if + f2s = stdlib_qlapy2( real( fs,KIND=qp), aimag( fs ) ) + ! g2 and g2s are accurate + ! g2 is at least safmin, and g2s is at least safmn2 + g2s = sqrt( g2 ) + ! error in cs from underflow in f2s is at most + ! unfl / safmn2 .lt. sqrt(unfl*eps) .lt. eps + ! if max(g2,one)=g2, then f2 .lt. g2*safmin, + ! and so cs .lt. sqrt(safmin) + ! if max(g2,one)=one, then f2 .lt. safmin + ! and so cs .lt. sqrt(safmin)/safmn2 = sqrt(eps) + ! therefore, cs = f2s/g2s / sqrt( 1 + (f2s/g2s)**2 ) = f2s/g2s + cs = f2s / g2s + ! make sure abs(ff) = 1 + ! do complex/real division explicitly with 2 real divisions + if( abs1( f )>one ) then + d = stdlib_qlapy2( real( f,KIND=qp), aimag( f ) ) + ff = cmplx( real( f,KIND=qp) / d, aimag( f ) / d,KIND=qp) + else + dr = safmx2*real( f,KIND=qp) + di = safmx2*aimag( f ) + d = stdlib_qlapy2( dr, di ) + ff = cmplx( dr / d, di / d,KIND=qp) + end if + sn = ff*cmplx( real( gs,KIND=qp) / g2s, -aimag( gs ) / g2s,KIND=qp) + r = cs*f + sn*g + else + ! this is the most common case. + ! neither f2 nor f2/g2 are less than safmin + ! f2s cannot overflow, and it is accurate + f2s = sqrt( one+g2 / f2 ) + ! do the f2s(real)*fs(complex) multiply with two real + ! multiplies + r = cmplx( f2s*real( fs,KIND=qp), f2s*aimag( fs ),KIND=qp) + cs = one / f2s + d = f2 + g2 + ! do complex/real division explicitly with two real divisions + sn = cmplx( real( r,KIND=qp) / d, aimag( r ) / d,KIND=qp) + sn = sn*conjg( gs ) + if( count/=0 ) then + if( count>0 ) then + do j = 1, count + r = r*safmx2 + end do + else + do j = 1, -count + r = r*safmn2 + end do + end if + end if + end if + 50 continue + c( ic ) = cs + y( iy ) = sn + x( ix ) = r + ic = ic + incc + iy = iy + incy + ix = ix + incx + end do loop_60 + return + end subroutine stdlib_wlargv + + !> ZLARNV: returns a vector of n random complex numbers from a uniform or + !> normal distribution. + + pure subroutine stdlib_wlarnv( idist, iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: idist, n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + complex(qp), intent(out) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + real(qp), parameter :: twopi = 6.28318530717958647692528676655900576839e+0_qp + + + + ! Local Scalars + integer(ilp) :: i, il, iv + ! Local Arrays + real(qp) :: u(lv) + ! Intrinsic Functions + intrinsic :: cmplx,exp,log,min,sqrt + ! Executable Statements + do 60 iv = 1, n, lv / 2 + il = min( lv / 2, n-iv+1 ) + ! call stdlib_qlaruv to generate 2*il realnumbers from a uniform (0,1,KIND=qp) + ! distribution (2*il <= lv) + call stdlib_qlaruv( iseed, 2*il, u ) + if( idist==1 ) then + ! copy generated numbers + do i = 1, il + x( iv+i-1 ) = cmplx( u( 2*i-1 ), u( 2*i ),KIND=qp) + end do + else if( idist==2 ) then + ! convert generated numbers to uniform (-1,1) distribution + do i = 1, il + x( iv+i-1 ) = cmplx( two*u( 2*i-1 )-one,two*u( 2*i )-one,KIND=qp) + end do + else if( idist==3 ) then + ! convert generated numbers to normal (0,1) distribution + do i = 1, il + x( iv+i-1 ) = sqrt( -two*log( u( 2*i-1 ) ) )*exp( cmplx( zero, twopi*u( 2*i ),& + KIND=qp) ) + end do + else if( idist==4 ) then + ! convert generated numbers to complex numbers uniformly + ! distributed on the unit disk + do i = 1, il + x( iv+i-1 ) = sqrt( u( 2*i-1 ) )*exp( cmplx( zero, twopi*u( 2*i ),KIND=qp) ) + + end do + else if( idist==5 ) then + ! convert generated numbers to complex numbers uniformly + ! distributed on the unit circle + do i = 1, il + x( iv+i-1 ) = exp( cmplx( zero, twopi*u( 2*i ),KIND=qp) ) + end do + end if + 60 continue + return + end subroutine stdlib_wlarnv + + !> ZLARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by DLARRE. + + pure subroutine stdlib_wlarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dol, dou, ldz, m, n + integer(ilp), intent(out) :: info + real(qp), intent(in) :: minrgp, pivmin, vl, vu + real(qp), intent(inout) :: rtol1, rtol2 + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), indexw(*), isplit(*) + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), l(*), w(*), werr(*), wgap(*) + real(qp), intent(in) :: gers(*) + real(qp), intent(out) :: work(*) + complex(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 10 + + + + ! Local Scalars + logical(lk) :: eskip, needbs, stp2ii, tryrqc, usedbs, usedrq + integer(ilp) :: done, i, ibegin, idone, iend, ii, iindc1, iindc2, iindr, iindwk, iinfo,& + im, in, indeig, indld, indlld, indwrk, isupmn, isupmx, iter, itmp1, j, jblk, k, & + miniwsize, minwsize, nclus, ndepth, negcnt, newcls, newfst, newftt, newlst, newsiz, & + offset, oldcls, oldfst, oldien, oldlst, oldncl, p, parity, q, wbegin, wend, windex, & + windmn, windpl, zfrom, zto, zusedl, zusedu, zusedw + integer(ilp) :: indin1, indin2 + real(qp) :: bstres, bstw, eps, fudge, gap, gaptol, gl, gu, lambda, left, lgap, mingma, & + nrminv, resid, rgap, right, rqcorr, rqtol, savgap, sgndef, sigma, spdiam, ssigma, tau, & + tmp, tol, ztz + ! Intrinsic Functions + intrinsic :: abs,real,max,min + intrinsic :: cmplx + ! Executable Statements + info = 0 + ! quick return if possible + if( (n<=0).or.(m<=0) ) then + return + end if + ! the first n entries of work are reserved for the eigenvalues + indld = n+1 + indlld= 2*n+1 + indin1 = 3*n + 1 + indin2 = 4*n + 1 + indwrk = 5*n + 1 + minwsize = 12 * n + do i= 1,minwsize + work( i ) = zero + end do + ! iwork(iindr+1:iindr+n) hold the twist indices r for the + ! factorization used to compute the fp vector + iindr = 0 + ! iwork(iindc1+1:iinc2+n) are used to store the clusters of the current + ! layer and the one above. + iindc1 = n + iindc2 = 2*n + iindwk = 3*n + 1 + miniwsize = 7 * n + do i= 1,miniwsize + iwork( i ) = 0 + end do + zusedl = 1 + if(dol>1) then + ! set lower bound for use of z + zusedl = dol-1 + endif + zusedu = m + if(doudou) ) then + ibegin = iend + 1 + wbegin = wend + 1 + cycle loop_170 + end if + ! find local spectral diameter of the block + gl = gers( 2*ibegin-1 ) + gu = gers( 2*ibegin ) + do i = ibegin+1 , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + ! oldien is the last index of the previous block + oldien = ibegin - 1 + ! calculate the size of the current block + in = iend - ibegin + 1 + ! the number of eigenvalues in the current block + im = wend - wbegin + 1 + ! this is for a 1x1 block + if( ibegin==iend ) then + done = done+1 + z( ibegin, wbegin ) = cmplx( one, zero,KIND=qp) + isuppz( 2*wbegin-1 ) = ibegin + isuppz( 2*wbegin ) = ibegin + w( wbegin ) = w( wbegin ) + sigma + work( wbegin ) = w( wbegin ) + ibegin = iend + 1 + wbegin = wbegin + 1 + cycle loop_170 + end if + ! the desired (shifted) eigenvalues are stored in w(wbegin:wend) + ! note that these can be approximations, in this case, the corresp. + ! entries of werr give the size of the uncertainty interval. + ! the eigenvalue approximations will be refined when necessary as + ! high relative accuracy is required for the computation of the + ! corresponding eigenvectors. + call stdlib_qcopy( im, w( wbegin ), 1,work( wbegin ), 1 ) + ! we store in w the eigenvalue approximations w.r.t. the original + ! matrix t. + do i=1,im + w(wbegin+i-1) = w(wbegin+i-1)+sigma + end do + ! ndepth is the current depth of the representation tree + ndepth = 0 + ! parity is either 1 or 0 + parity = 1 + ! nclus is the number of clusters for the next level of the + ! representation tree, we start with nclus = 1 for the root + nclus = 1 + iwork( iindc1+1 ) = 1 + iwork( iindc1+2 ) = im + ! idone is the number of eigenvectors already computed in the current + ! block + idone = 0 + ! loop while( idonem ) then + info = -2 + return + endif + ! breadth first processing of the current level of the representation + ! tree: oldncl = number of clusters on current level + oldncl = nclus + ! reset nclus to count the number of child clusters + nclus = 0 + parity = 1 - parity + if( parity==0 ) then + oldcls = iindc1 + newcls = iindc2 + else + oldcls = iindc2 + newcls = iindc1 + end if + ! process the clusters on the current level + loop_150: do i = 1, oldncl + j = oldcls + 2*i + ! oldfst, oldlst = first, last index of current cluster. + ! cluster indices start with 1 and are relative + ! to wbegin when accessing w, wgap, werr, z + oldfst = iwork( j-1 ) + oldlst = iwork( j ) + if( ndepth>0 ) then + ! retrieve relatively robust representation (rrr) of cluster + ! that has been computed at the previous level + ! the rrr is stored in z and overwritten once the eigenvectors + ! have been computed or when the cluster is refined + if((dol==1).and.(dou==m)) then + ! get representation from location of the leftmost evalue + ! of the cluster + j = wbegin + oldfst - 1 + else + if(wbegin+oldfst-1dou) then + ! get representation from the right end of z array + j = dou + else + j = wbegin + oldfst - 1 + endif + endif + do k = 1, in - 1 + d( ibegin+k-1 ) = real( z( ibegin+k-1,j ),KIND=qp) + l( ibegin+k-1 ) = real( z( ibegin+k-1,j+1 ),KIND=qp) + end do + d( iend ) = real( z( iend, j ),KIND=qp) + sigma = real( z( iend, j+1 ),KIND=qp) + ! set the corresponding entries in z to zero + call stdlib_wlaset( 'FULL', in, 2, czero, czero,z( ibegin, j), ldz ) + + end if + ! compute dl and dll of current rrr + do j = ibegin, iend-1 + tmp = d( j )*l( j ) + work( indld-1+j ) = tmp + work( indlld-1+j ) = tmp*l( j ) + end do + if( ndepth>0 ) then + ! p and q are index of the first and last eigenvalue to compute + ! within the current block + p = indexw( wbegin-1+oldfst ) + q = indexw( wbegin-1+oldlst ) + ! offset for the arrays work, wgap and werr, i.e., the p-offset + ! through the q-offset elements of these arrays are to be used. + ! offset = p-oldfst + offset = indexw( wbegin ) - 1 + ! perform limited bisection (if necessary) to get approximate + ! eigenvalues to the precision needed. + call stdlib_qlarrb( in, d( ibegin ),work(indlld+ibegin-1),p, q, rtol1, & + rtol2, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ), iwork(& + iindwk ),pivmin, spdiam, in, iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + ! we also recompute the extremal gaps. w holds all eigenvalues + ! of the unshifted matrix and must be used for computation + ! of wgap, the entries of work might stem from rrrs with + ! different shifts. the gaps from wbegin-1+oldfst to + ! wbegin-1+oldlst are correctly computed in stdlib_qlarrb. + ! however, we only allow the gaps to become greater since + ! this is what should happen when we decrease werr + if( oldfst>1) then + wgap( wbegin+oldfst-2 ) =max(wgap(wbegin+oldfst-2),w(wbegin+oldfst-1)-& + werr(wbegin+oldfst-1)- w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) ) + + endif + if( wbegin + oldlst -1 < wend ) then + wgap( wbegin+oldlst-1 ) =max(wgap(wbegin+oldlst-1),w(wbegin+oldlst)-& + werr(wbegin+oldlst)- w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) ) + endif + ! each time the eigenvalues in work get refined, we store + ! the newly found approximation with all shifts applied in w + do j=oldfst,oldlst + w(wbegin+j-1) = work(wbegin+j-1)+sigma + end do + end if + ! process the current node. + newfst = oldfst + loop_140: do j = oldfst, oldlst + if( j==oldlst ) then + ! we are at the right end of the cluster, this is also the + ! boundary of the child cluster + newlst = j + else if ( wgap( wbegin + j -1)>=minrgp* abs( work(wbegin + j -1) ) ) & + then + ! the right relative gap is big enough, the child cluster + ! (newfst,..,newlst) is well separated from the following + newlst = j + else + ! inside a child cluster, the relative gap is not + ! big enough. + cycle loop_140 + end if + ! compute size of child cluster found + newsiz = newlst - newfst + 1 + ! newftt is the place in z where the new rrr or the computed + ! eigenvector is to be stored + if((dol==1).and.(dou==m)) then + ! store representation at location of the leftmost evalue + ! of the cluster + newftt = wbegin + newfst - 1 + else + if(wbegin+newfst-1dou) then + ! store representation at the right end of z array + newftt = dou + else + newftt = wbegin + newfst - 1 + endif + endif + if( newsiz>1) then + ! current child is not a singleton but a cluster. + ! compute and store new representation of child. + ! compute left and right cluster gap. + ! lgap and rgap are not computed from work because + ! the eigenvalue approximations may stem from rrrs + ! different shifts. however, w hold all eigenvalues + ! of the unshifted matrix. still, the entries in wgap + ! have to be computed from work since the entries + ! in w might be of the same order so that gaps are not + ! exhibited correctly for very close eigenvalues. + if( newfst==1 ) then + lgap = max( zero,w(wbegin)-werr(wbegin) - vl ) + else + lgap = wgap( wbegin+newfst-2 ) + endif + rgap = wgap( wbegin+newlst-1 ) + ! compute left- and rightmost eigenvalue of child + ! to high precision in order to shift as close + ! as possible and obtain as large relative gaps + ! as possible + do k =1,2 + if(k==1) then + p = indexw( wbegin-1+newfst ) + else + p = indexw( wbegin-1+newlst ) + endif + offset = indexw( wbegin ) - 1 + call stdlib_qlarrb( in, d(ibegin),work( indlld+ibegin-1 ),p,p,rqtol, & + rqtol, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ),& + iwork( iindwk ), pivmin, spdiam,in, iinfo ) + end do + if((wbegin+newlst-1dou)) then + ! if the cluster contains no desired eigenvalues + ! skip the computation of that branch of the rep. tree + ! we could skip before the refinement of the extremal + ! eigenvalues of the child, but then the representation + ! tree could be different from the one when nothing is + ! skipped. for this reason we skip at this place. + idone = idone + newlst - newfst + 1 + goto 139 + endif + ! compute rrr of child cluster. + ! note that the new rrr is stored in z + ! stdlib_qlarrf needs lwork = 2*n + call stdlib_qlarrf( in, d( ibegin ), l( ibegin ),work(indld+ibegin-1),& + newfst, newlst, work(wbegin),wgap(wbegin), werr(wbegin),spdiam, lgap, & + rgap, pivmin, tau,work( indin1 ), work( indin2 ),work( indwrk ), iinfo ) + + ! in the complex case, stdlib_qlarrf cannot write + ! the new rrr directly into z and needs an intermediate + ! workspace + do k = 1, in-1 + z( ibegin+k-1, newftt ) =cmplx( work( indin1+k-1 ), zero,KIND=qp) + + z( ibegin+k-1, newftt+1 ) =cmplx( work( indin2+k-1 ), zero,KIND=qp) + + end do + z( iend, newftt ) =cmplx( work( indin1+in-1 ), zero,KIND=qp) + if( iinfo==0 ) then + ! a new rrr for the cluster was found by stdlib_qlarrf + ! update shift and store it + ssigma = sigma + tau + z( iend, newftt+1 ) = cmplx( ssigma, zero,KIND=qp) + ! work() are the midpoints and werr() the semi-width + ! note that the entries in w are unchanged. + do k = newfst, newlst + fudge =three*eps*abs(work(wbegin+k-1)) + work( wbegin + k - 1 ) =work( wbegin + k - 1) - tau + fudge = fudge +four*eps*abs(work(wbegin+k-1)) + ! fudge errors + werr( wbegin + k - 1 ) =werr( wbegin + k - 1 ) + fudge + ! gaps are not fudged. provided that werr is small + ! when eigenvalues are close, a zero gap indicates + ! that a new representation is needed for resolving + ! the cluster. a fudge could lead to a wrong decision + ! of judging eigenvalues 'separated' which in + ! reality are not. this could have a negative impact + ! on the orthogonality of the computed eigenvectors. + end do + nclus = nclus + 1 + k = newcls + 2*nclus + iwork( k-1 ) = newfst + iwork( k ) = newlst + else + info = -2 + return + endif + else + ! compute eigenvector of singleton + iter = 0 + tol = four * log(real(in,KIND=qp)) * eps + k = newfst + windex = wbegin + k - 1 + windmn = max(windex - 1,1) + windpl = min(windex + 1,m) + lambda = work( windex ) + done = done + 1 + ! check if eigenvector computation is to be skipped + if((windexdou)) then + eskip = .true. + goto 125 + else + eskip = .false. + endif + left = work( windex ) - werr( windex ) + right = work( windex ) + werr( windex ) + indeig = indexw( windex ) + ! note that since we compute the eigenpairs for a child, + ! all eigenvalue approximations are w.r.t the same shift. + ! in this case, the entries in work should be used for + ! computing the gaps since they exhibit even very small + ! differences in the eigenvalues, as opposed to the + ! entries in w which might "look" the same. + if( k == 1) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vl, the formula + ! lgap = max( zero, (sigma - vl) + lambda ) + ! can lead to an overestimation of the left gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small left gap. + lgap = eps*max(abs(left),abs(right)) + else + lgap = wgap(windmn) + endif + if( k == im) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vu, the formula + ! can lead to an overestimation of the right gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small right gap. + rgap = eps*max(abs(left),abs(right)) + else + rgap = wgap(windex) + endif + gap = min( lgap, rgap ) + if(( k == 1).or.(k == im)) then + ! the eigenvector support can become wrong + ! because significant entries could be cut off due to a + ! large gaptol parameter in lar1v. prevent this. + gaptol = zero + else + gaptol = gap * eps + endif + isupmn = in + isupmx = 1 + ! update wgap so that it holds the minimum gap + ! to the left or the right. this is crucial in the + ! case where bisection is used to ensure that the + ! eigenvalue is refined up to the required precision. + ! the correct value is restored afterwards. + savgap = wgap(windex) + wgap(windex) = gap + ! we want to use the rayleigh quotient correction + ! as often as possible since it converges quadratically + ! when we are close enough to the desired eigenvalue. + ! however, the rayleigh quotient can have the wrong sign + ! and lead us away from the desired eigenvalue. in this + ! case, the best we can do is to use bisection. + usedbs = .false. + usedrq = .false. + ! bisection is initially turned off unless it is forced + needbs = .not.tryrqc + 120 continue + ! check if bisection should be used to refine eigenvalue + if(needbs) then + ! take the bisection as new iterate + usedbs = .true. + itmp1 = iwork( iindr+windex ) + offset = indexw( wbegin ) - 1 + call stdlib_qlarrb( in, d(ibegin),work(indlld+ibegin-1),indeig,& + indeig,zero, two*eps, offset,work(wbegin),wgap(wbegin),werr(wbegin),& + work( indwrk ),iwork( iindwk ), pivmin, spdiam,itmp1, iinfo ) + if( iinfo/=0 ) then + info = -3 + return + endif + lambda = work( windex ) + ! reset twist index from inaccurate lambda to + ! force computation of true mingma + iwork( iindr+windex ) = 0 + endif + ! given lambda, compute the eigenvector. + call stdlib_wlar1v( in, 1, in, lambda, d( ibegin ),l( ibegin ), work(& + indld+ibegin-1),work(indlld+ibegin-1),pivmin, gaptol, z( ibegin, windex & + ),.not.usedbs, negcnt, ztz, mingma,iwork( iindr+windex ), isuppz( & + 2*windex-1 ),nrminv, resid, rqcorr, work( indwrk ) ) + if(iter == 0) then + bstres = resid + bstw = lambda + elseif(residtol*gap .and. abs( rqcorr )>rqtol*abs( lambda ) .and. .not. & + usedbs)then + ! we need to check that the rqcorr update doesn't + ! move the eigenvalue away from the desired one and + ! towards a neighbor. -> protection with bisection + if(indeig<=negcnt) then + ! the wanted eigenvalue lies to the left + sgndef = -one + else + ! the wanted eigenvalue lies to the right + sgndef = one + endif + ! we only use the rqcorr if it improves the + ! the iterate reasonably. + if( ( rqcorr*sgndef>=zero ).and.( lambda + rqcorr<= right).and.( & + lambda + rqcorr>= left)) then + usedrq = .true. + ! store new midpoint of bisection interval in work + if(sgndef==one) then + ! the current lambda is on the left of the true + ! eigenvalue + left = lambda + ! we prefer to assume that the error estimate + ! is correct. we could make the interval not + ! as a bracket but to be modified if the rqcorr + ! chooses to. in this case, the right side should + ! be modified as follows: + ! right = max(right, lambda + rqcorr) + else + ! the current lambda is on the right of the true + ! eigenvalue + right = lambda + ! see comment about assuming the error estimate is + ! correct above. + ! left = min(left, lambda + rqcorr) + endif + work( windex ) =half * (right + left) + ! take rqcorr since it has the correct sign and + ! improves the iterate reasonably + lambda = lambda + rqcorr + ! update width of error interval + werr( windex ) =half * (right-left) + else + needbs = .true. + endif + if(right-leftzto) then + do ii = zto+1,isupmx + z( ii, windex ) = zero + end do + endif + call stdlib_wdscal( zto-zfrom+1, nrminv,z( zfrom, windex ), 1 ) + 125 continue + ! update w + w( windex ) = lambda+sigma + ! recompute the gaps on the left and right + ! but only allow them to become larger and not + ! smaller (which can only happen through "bad" + ! cancellation and doesn't reflect the theory + ! where the initial gaps are underestimated due + ! to werr being too crude.) + if(.not.eskip) then + if( k>1) then + wgap( windmn ) = max( wgap(windmn),w(windex)-werr(windex)- w(& + windmn)-werr(windmn) ) + endif + if( windex ! + !> + !> ZLARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -conjg(S) C ] [ G ] [ 0 ] + !> where C is real and C**2 + |S|**2 = 1. + !> The mathematical formulas used for C and S are + !> sgn(x) = { x / |x|, x != 0 + !> { 1, x = 0 + !> R = sgn(F) * sqrt(|F|**2 + |G|**2) + !> C = |F| / sqrt(|F|**2 + |G|**2) + !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) + !> When F and G are real, the formulas simplify to C = F/R and + !> S = G/R, and the returned values of C, S, and R should be + !> identical to those returned by DLARTG. + !> The algorithm used to compute these quantities incorporates scaling + !> to avoid overflow or underflow in computing the square root of the + !> sum of squares. + !> This is a faster version of the BLAS1 routine ZROTG, except for + !> the following differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0, then C=0 and S is chosen so that R is real. + !> Below, wp=>dp stands for quad precision from LA_CONSTANTS module. + + pure subroutine stdlib_wlartg( f, g, c, s, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! february 2021 + ! Scalar Arguments + real(qp), intent(out) :: c + complex(qp), intent(in) :: f, g + complex(qp), intent(out) :: r, s + ! Local Scalars + real(qp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + complex(qp) :: fs, gs, t + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(qp) :: abssq + ! Statement Function Definitions + abssq( t ) = real( t,KIND=qp)**2 + aimag( t )**2 + ! Executable Statements + if( g == czero ) then + c = one + s = czero + r = f + else if( f == czero ) then + c = zero + g1 = max( abs(real(g,KIND=qp)), abs(aimag(g)) ) + if( g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + g2 = abssq( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else + ! use scaled algorithm + u = min( safmax, max( safmin, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if + else + f1 = max( abs(real(f,KIND=qp)), abs(aimag(f)) ) + g1 = max( abs(real(g,KIND=qp)), abs(aimag(g)) ) + if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + f2 = abssq( f ) + g2 = abssq( g ) + h2 = f2 + g2 + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = f2*p + s = conjg( g )*( f*p ) + r = f*( h2*p ) + else + ! use scaled algorithm + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + if( f1*uu < rtmin ) then + ! f is not well-scaled when scaled by g1. + ! use a different scaling for f. + v = min( safmax, max( safmin, f1 ) ) + vv = one / v + w = v * uu + fs = f*vv + f2 = abssq( fs ) + h2 = f2*w**2 + g2 + else + ! otherwise use the same scaling for f and g. + w = one + fs = f*uu + f2 = abssq( fs ) + h2 = f2 + g2 + end if + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = ( f2*p )*w + s = conjg( gs )*( fs*p ) + r = ( fs*( h2*p ) )*u + end if + end if + return + end subroutine stdlib_wlartg + + !> ZLARTV: applies a vector of complex plane rotations with real cosines + !> to elements of the complex vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) + + pure subroutine stdlib_wlartv( n, x, incx, y, incy, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(qp), intent(in) :: c(*) + complex(qp), intent(in) :: s(*) + complex(qp), intent(inout) :: x(*), y(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + complex(qp) :: xi, yi + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( iy ) + x( ix ) = c( ic )*xi + s( ic )*yi + y( iy ) = c( ic )*yi - conjg( s( ic ) )*xi + ix = ix + incx + iy = iy + incy + ic = ic + incc + end do + return + end subroutine stdlib_wlartv + + !> ZLARZ: applies a complex elementary reflector H to a complex + !> M-by-N matrix C, from either the left or the right. H is represented + !> in the form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead + !> tau. + !> H is a product of k elementary reflectors as returned by ZTZRZF. + + pure subroutine stdlib_wlarz( side, m, n, l, v, incv, tau, c, ldc, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, l, ldc, m, n + complex(qp), intent(in) :: tau + ! Array Arguments + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(in) :: v(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Executable Statements + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c + if( tau/=czero ) then + ! w( 1:n ) = conjg( c( 1, 1:n ) ) + call stdlib_wcopy( n, c, ldc, work, 1 ) + call stdlib_wlacgv( n, work, 1 ) + ! w( 1:n ) = conjg( w( 1:n ) + c( m-l+1:m, 1:n )**h * v( 1:l ) ) + call stdlib_wgemv( 'CONJUGATE TRANSPOSE', l, n, cone, c( m-l+1, 1 ),ldc, v, incv,& + cone, work, 1 ) + call stdlib_wlacgv( n, work, 1 ) + ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n ) + call stdlib_waxpy( n, -tau, work, 1, c, ldc ) + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! tau * v( 1:l ) * w( 1:n )**h + call stdlib_wgeru( l, n, -tau, v, incv, work, 1, c( m-l+1, 1 ),ldc ) + end if + else + ! form c * h + if( tau/=czero ) then + ! w( 1:m ) = c( 1:m, 1 ) + call stdlib_wcopy( m, c, 1, work, 1 ) + ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l ) + call stdlib_wgemv( 'NO TRANSPOSE', m, l, cone, c( 1, n-l+1 ), ldc,v, incv, cone, & + work, 1 ) + ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m ) + call stdlib_waxpy( m, -tau, work, 1, c, 1 ) + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! tau * w( 1:m ) * v( 1:l )**h + call stdlib_wgerc( m, l, -tau, work, 1, v, incv, c( 1, n-l+1 ),ldc ) + end if + end if + return + end subroutine stdlib_wlarz + + !> ZLARZB: applies a complex block reflector H or its transpose H**H + !> to a complex distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_wlarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: c(ldc,*), t(ldt,*), v(ldv,*) + complex(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, info, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -3 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLARZB', -info ) + return + end if + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'C' + else + transt = 'N' + end if + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c + ! w( 1:n, 1:k ) = c( 1:k, 1:n )**h + do j = 1, k + call stdlib_wcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ... + ! c( m-l+1:m, 1:n )**h * v( 1:k, 1:l )**t + if( l>0 )call stdlib_wgemm( 'TRANSPOSE', 'CONJUGATE TRANSPOSE', n, k, l,cone, c( m-& + l+1, 1 ), ldc, v, ldv, cone, work,ldwork ) + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t or w( 1:m, 1:k ) * t + call stdlib_wtrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k, cone, t,ldt, work, & + ldwork ) + ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**h + do j = 1, n + do i = 1, k + c( i, j ) = c( i, j ) - work( j, i ) + end do + end do + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! v( 1:k, 1:l )**h * w( 1:n, 1:k )**h + if( l>0 )call stdlib_wgemm( 'TRANSPOSE', 'TRANSPOSE', l, n, k, -cone, v, ldv,work, & + ldwork, cone, c( m-l+1, 1 ), ldc ) + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h + ! w( 1:m, 1:k ) = c( 1:m, 1:k ) + do j = 1, k + call stdlib_wcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ... + ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**h + if( l>0 )call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, l, cone,c( 1, n-l+1 )& + , ldc, v, ldv, cone, work, ldwork ) + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * conjg( t ) or + ! w( 1:m, 1:k ) * t**h + do j = 1, k + call stdlib_wlacgv( k-j+1, t( j, j ), 1 ) + end do + call stdlib_wtrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k, cone, t,ldt, work, & + ldwork ) + do j = 1, k + call stdlib_wlacgv( k-j+1, t( j, j ), 1 ) + end do + ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k ) + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! w( 1:m, 1:k ) * conjg( v( 1:k, 1:l ) ) + do j = 1, l + call stdlib_wlacgv( k, v( 1, j ), 1 ) + end do + if( l>0 )call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, l, k, -cone,work, & + ldwork, v, ldv, cone, c( 1, n-l+1 ), ldc ) + do j = 1, l + call stdlib_wlacgv( k, v( 1, j ), 1 ) + end do + end if + return + end subroutine stdlib_wlarzb + + !> ZLARZT: forms the triangular factor T of a complex block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_wlarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + complex(qp), intent(out) :: t(ldt,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + ! Executable Statements + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -1 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLARZT', -info ) + return + end if + do i = k, 1, -1 + if( tau( i )==czero ) then + ! h(i) = i + do j = i, k + t( j, i ) = czero + end do + else + ! general case + if( i ZLASCL: multiplies the M by N complex matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + + pure subroutine stdlib_wlascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, lda, m, n + real(qp), intent(in) :: cfrom, cto + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: i, itype, j, k1, k2, k3, k4 + real(qp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( stdlib_lsame( type, 'G' ) ) then + itype = 0 + else if( stdlib_lsame( type, 'L' ) ) then + itype = 1 + else if( stdlib_lsame( type, 'U' ) ) then + itype = 2 + else if( stdlib_lsame( type, 'H' ) ) then + itype = 3 + else if( stdlib_lsame( type, 'B' ) ) then + itype = 4 + else if( stdlib_lsame( type, 'Q' ) ) then + itype = 5 + else if( stdlib_lsame( type, 'Z' ) ) then + itype = 6 + else + itype = -1 + end if + if( itype==-1 ) then + info = -1 + else if( cfrom==zero .or. stdlib_qisnan(cfrom) ) then + info = -4 + else if( stdlib_qisnan(cto) ) then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 .or. ( itype==4 .and. n/=m ) .or.( itype==5 .and. n/=m ) ) then + info = -7 + else if( itype<=3 .and. lda=4 ) then + if( kl<0 .or. kl>max( m-1, 0 ) ) then + info = -2 + else if( ku<0 .or. ku>max( n-1, 0 ) .or.( ( itype==4 .or. itype==5 ) .and. kl/=ku ) & + )then + info = -3 + else if( ( itype==4 .and. ldaabs( ctoc ) .and. ctoc/=zero ) then + mul = smlnum + done = .false. + cfromc = cfrom1 + else if( abs( cto1 )>abs( cfromc ) ) then + mul = bignum + done = .false. + ctoc = cto1 + else + mul = ctoc / cfromc + done = .true. + end if + end if + if( itype==0 ) then + ! full matrix + do j = 1, n + do i = 1, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==1 ) then + ! lower triangular matrix + do j = 1, n + do i = j, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==2 ) then + ! upper triangular matrix + do j = 1, n + do i = 1, min( j, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==3 ) then + ! upper hessenberg matrix + do j = 1, n + do i = 1, min( j+1, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==4 ) then + ! lower chalf of a symmetric band matrix + k3 = kl + 1 + k4 = n + 1 + do j = 1, n + do i = 1, min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==5 ) then + ! upper chalf of a symmetric band matrix + k1 = ku + 2 + k3 = ku + 1 + do j = 1, n + do i = max( k1-j, 1 ), k3 + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==6 ) then + ! band matrix + k1 = kl + ku + 2 + k2 = kl + 1 + k3 = 2*kl + ku + 1 + k4 = kl + ku + 1 + m + do j = 1, n + do i = max( k1-j, k2 ), min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + end if + if( .not.done )go to 10 + return + end subroutine stdlib_wlascl + + !> ZLASET: initializes a 2-D array A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + + pure subroutine stdlib_wlaset( uplo, m, n, alpha, beta, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, m, n + complex(qp), intent(in) :: alpha, beta + ! Array Arguments + complex(qp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + ! set the diagonal to beta and the strictly upper triangular + ! part of the array to alpha. + do j = 2, n + do i = 1, min( j-1, m ) + a( i, j ) = alpha + end do + end do + do i = 1, min( n, m ) + a( i, i ) = beta + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + ! set the diagonal to beta and the strictly lower triangular + ! part of the array to alpha. + do j = 1, min( m, n ) + do i = j + 1, m + a( i, j ) = alpha + end do + end do + do i = 1, min( n, m ) + a( i, i ) = beta + end do + else + ! set the array to beta on the diagonal and alpha on the + ! offdiagonal. + do j = 1, n + do i = 1, m + a( i, j ) = alpha + end do + end do + do i = 1, min( m, n ) + a( i, i ) = beta + end do + end if + return + end subroutine stdlib_wlaset + + !> ZLASR: applies a sequence of real plane rotations to a complex matrix + !> A, from either the left or the right. + !> When SIDE = 'L', the transformation takes the form + !> A := P*A + !> and when SIDE = 'R', the transformation takes the form + !> A := A*P**T + !> where P is an orthogonal matrix consisting of a sequence of z plane + !> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + !> and P**T is the transpose of P. + !> When DIRECT = 'F' (Forward sequence), then + !> P = P(z-1) * ... * P(2) * P(1) + !> and when DIRECT = 'B' (Backward sequence), then + !> P = P(1) * P(2) * ... * P(z-1) + !> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + !> R(k) = ( c(k) s(k) ) + !> = ( -s(k) c(k) ). + !> When PIVOT = 'V' (Variable pivot), the rotation is performed + !> for the plane (k,k+1), i.e., P(k) has the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears as a rank-2 modification to the identity matrix in + !> rows and columns k and k+1. + !> When PIVOT = 'T' (Top pivot), the rotation is performed for the + !> plane (1,k+1), so P(k) has the form + !> P(k) = ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears in rows and columns 1 and k+1. + !> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + !> performed for the plane (k,z), giving P(k) the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> where R(k) appears in rows and columns k and z. The rotations are + !> performed without ever forming P(k) explicitly. + + pure subroutine stdlib_wlasr( side, pivot, direct, m, n, c, s, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, pivot, side + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(qp), intent(in) :: c(*), s(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + real(qp) :: ctemp, stemp + complex(qp) :: temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.( stdlib_lsame( side, 'L' ) .or. stdlib_lsame( side, 'R' ) ) ) then + info = 1 + else if( .not.( stdlib_lsame( pivot, 'V' ) .or. stdlib_lsame( pivot,'T' ) .or. & + stdlib_lsame( pivot, 'B' ) ) ) then + info = 2 + else if( .not.( stdlib_lsame( direct, 'F' ) .or. stdlib_lsame( direct, 'B' ) ) )& + then + info = 3 + else if( m<0 ) then + info = 4 + else if( n<0 ) then + info = 5 + else if( lda ! + !> + !> ZLASSQ: returns the values scl and smsq such that + !> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + !> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + !> assumed to be non-negative. + !> scale and sumsq must be supplied in SCALE and SUMSQ and + !> scl and smsq are overwritten on SCALE and SUMSQ respectively. + !> If scale * sqrt( sumsq ) > tbig then + !> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, + !> and if 0 < scale * sqrt( sumsq ) < tsml then + !> we require: scale <= sqrt( HUGE ) / ssml on entry, + !> where + !> tbig -- upper threshold for values whose square is representable; + !> sbig -- scaling constant for big numbers; \see la_constants.f90 + !> tsml -- lower threshold for values whose square is representable; + !> ssml -- scaling constant for small numbers; \see la_constants.f90 + !> and + !> TINY*EPS -- tiniest representable number; + !> HUGE -- biggest representable number. + + pure subroutine stdlib_wlassq( n, x, incx, scl, sumsq ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(qp), intent(inout) :: scl, sumsq + ! Array Arguments + complex(qp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(qp) :: abig, amed, asml, ax, ymax, ymin + ! quick return if possible + if( ieee_is_nan(scl) .or. ieee_is_nan(sumsq) ) return + if( sumsq == zero ) scl = one + if( scl == zero ) then + scl = one + sumsq = zero + end if + if (n <= 0) then + return + end if + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(real(x(ix),KIND=qp)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ax = abs(aimag(x(ix))) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! put the existing sum of squares into one of the accumulators + if( sumsq > zero ) then + ax = scl*sqrt( sumsq ) + if (ax > tbig) then + ! we assume scl >= sqrt( tiny*eps ) / sbig + abig = abig + (scl*sbig)**2 * sumsq + else if (ax < tsml) then + ! we assume scl <= sqrt( huge ) / ssml + if (notbig) asml = asml + (scl*ssml)**2 * sumsq + else + amed = amed + scl**2 * sumsq + end if + end if + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range or zero + scl = one + sumsq = amed + end if + return + end subroutine stdlib_wlassq + + !> ZLASWLQ: computes a blocked Tall-Skinny LQ factorization of + !> a complexx M-by-N matrix A for M <= N: + !> A = ( L 0 ) * Q, + !> where: + !> Q is a n-by-N orthogonal matrix, stored on exit in an implicit + !> form in the elements above the diagonal of the array A and in + !> the elements of the array T; + !> L is a lower-triangular M-by-M matrix stored on exit in + !> the elements on and below the diagonal of the array A. + !> 0 is a M-by-(N-M) zero matrix, if M < N, and is not stored. + + pure subroutine stdlib_wlaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, lwork, ldt + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. nm .and. m>0 )) then + info = -3 + else if( nb<=0 ) then + info = -4 + else if( lda=n).or.(nb<=m).or.(nb>=n)) then + call stdlib_wgelqt( m, n, mb, a, lda, t, ldt, work, info) + return + end if + kk = mod((n-m),(nb-m)) + ii=n-kk+1 + ! compute the lq factorization of the first block a(1:m,1:nb) + call stdlib_wgelqt( m, nb, mb, a(1,1), lda, t, ldt, work, info) + ctr = 1 + do i = nb+1, ii-nb+m , (nb-m) + ! compute the qr factorization of the current block a(1:m,i:i+nb-m) + call stdlib_wtplqt( m, nb-m, 0, mb, a(1,1), lda, a( 1, i ),lda, t(1, ctr * m + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(1:m,ii:n) + if (ii<=n) then + call stdlib_wtplqt( m, kk, 0, mb, a(1,1), lda, a( 1, ii ),lda, t(1, ctr * m + 1), & + ldt,work, info ) + end if + work( 1 ) = m * mb + return + end subroutine stdlib_wlaswlq + + !> ZLASWP: performs a series of row interchanges on the matrix A. + !> One row interchange is initiated for each of rows K1 through K2 of A. + + pure subroutine stdlib_wlaswp( n, a, lda, k1, k2, ipiv, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k1, k2, lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32 + complex(qp) :: temp + ! Executable Statements + ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows + ! k1 through k2. + if( incx>0 ) then + ix0 = k1 + i1 = k1 + i2 = k2 + inc = 1 + else if( incx<0 ) then + ix0 = k1 + ( k1-k2 )*incx + i1 = k2 + i2 = k1 + inc = -1 + else + return + end if + n32 = ( n / 32 )*32 + if( n32/=0 ) then + do j = 1, n32, 32 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = j, j + 31 + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end do + end if + if( n32/=n ) then + n32 = n32 + 1 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = n32, n + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end if + return + end subroutine stdlib_wlaswp + + !> ZLASYF: computes a partial factorization of a complex symmetric matrix + !> A using the Bunch-Kaufman diagonal pivoting method. The partial + !> factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**T denotes the transpose of U. + !> ZLASYF is an auxiliary routine called by ZSYTRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_wlasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(qp) :: absakk, alpha, colmax, rowmax + complex(qp) :: d11, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_iwamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column kw-1 of w and update it + call stdlib_wcopy( imax, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + call stdlib_wcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + if( k1 ) then + jmax = stdlib_iwamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( w( imax, kw-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_wcopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_wcopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + if( kp>1 )call stdlib_wcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k2 ) then + ! compose the columns of the inverse of 2-by-2 pivot + ! block d in the following way to reduce the number + ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by + ! this inverse + ! d**(-1) = ( d11 d21 )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = + ! ( (-d21 ) ( d11 ) ) + ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * + ! * ( ( d22/d21 ) ( -1 ) ) = + ! ( ( -1 ) ( d11/d21 ) ) + ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) ( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = 1/d21 * t * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + ! = d21 * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / d21 + d22 = w( k-1, kw-1 ) / d21 + t = cone / ( d11*d22-cone ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = d21*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_wgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_wswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + ! copy column k of a to column k of w and update it + call stdlib_wcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ), lda,w( k, 1 ), ldw,& + cone, w( k, k ), 1 ) + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column k+1 of w and update it + call stdlib_wcopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_wcopy( n-imax+1, a( imax, imax ), 1, w( imax, k+1 ),1 ) + call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( imax, & + 1 ), ldw, cone, w( k, k+1 ),1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_iwamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( w( imax, k+1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_wcopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_wcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + if( kp1 )call stdlib_wswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_wswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + call stdlib_wcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_wswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_wlasyf + + !> DLATRF_AA factorizes a panel of a complex symmetric matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_wlasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), h(ldh,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + complex(qp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_wsytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j, j:m) - h(j:m, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:m, j) has been initialized to be a(j, j:m) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_wgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( 1, j ), 1,& + cone, h( j, j ), 1 ) + end if + ! copy h(i:m, i) into work + call stdlib_wcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:m) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:m) stores u(j-1, j:m) + alpha = -a( k-1, j ) + call stdlib_waxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = work( 1 ) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_waxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_iwamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:m) with a(i1+1:m, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_wswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + ! swap a(i1, i2+1:m) with a(i2, i2+1:m) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_wswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_wsytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j:m, j) - h(j:m, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:m, j) has been initialized to be a(j:m, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_wgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( j, 1 ), & + lda,cone, h( j, j ), 1 ) + end if + ! copy h(j:m, j) into work + call stdlib_wcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:m, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -a( j, k-1 ) + call stdlib_waxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = work( 1 ) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_waxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_iwamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:m, i1) with a(i2, i1+1:m) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_wswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + ! swap a(i2+1:m, i1) with a(i2+1:m, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_wswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j ZLASYF_RK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> ZLASYF_RK is an auxiliary routine called by ZSYTRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_wlasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: e(*), w(ldw,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii + real(qp) :: absakk, alpha, colmax, rowmax, sfmin, dtemp + complex(qp) :: d11, d12, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_qlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = czero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_iwamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_wcopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_iwamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(cabs1( w( imax, kw-1 ) )1 ) then + if( cabs1( a( k, k ) )>=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_wscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = cone / ( d11*d22-cone ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = czero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_wgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -cone, a( & + 1, k+1 ), lda, w( j, kw+1 ),ldw, cone, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_wcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, & + 1 ), ldw, cone, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), & + lda, w( imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_iwamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( cabs1( w( imax, k+1 ) )=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_wscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k ZLASYF_ROOK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> ZLASYF_ROOK is an auxiliary routine called by ZSYTRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_wlasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, & + ii + real(qp) :: absakk, alpha, colmax, rowmax, dtemp, sfmin + complex(qp) :: d11, d12, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt,aimag,real + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_qlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_iwamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_wcopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_iwamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(cabs1( w( imax, kw-1 ) )1 ) then + if( cabs1( a( k, k ) )>=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_wscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = cone / ( d11*d22-cone ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_wgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n + j = k + 1 + 60 continue + kstep = 1 + jp1 = 1 + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_wswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = j - 1 + if( jp1/=jj .and. kstep==2 )call stdlib_wswap( n-j+1, a( jp1, j ), lda, a( jj, j & + ), lda ) + if( j<=n )go to 60 + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_wcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, & + 1 ), ldw, cone, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_wgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), & + lda, w( imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_iwamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( cabs1( w( imax, k+1 ) )=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_wscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k=1 )call stdlib_wswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = j + 1 + if( jp1/=jj .and. kstep==2 )call stdlib_wswap( j, a( jp1, 1 ), lda, a( jj, 1 ), & + lda ) + if( j>=1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_wlasyf_rook + + !> ZLAT2C: converts a COMPLEX*16 triangular matrix, SA, to a COMPLEX + !> triangular matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> ZLAT2C checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_wlat2c( uplo, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, n + ! Array Arguments + complex(dp), intent(out) :: sa(ldsa,*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(qp) :: rmax + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: real,aimag + ! Executable Statements + rmax = stdlib_dlamch( 'O' ) + upper = stdlib_lsame( uplo, 'U' ) + if( upper ) then + do j = 1, n + do i = 1, j + if( ( real( a( i, j ),KIND=qp)<-rmax ) .or.( real( a( i, j ),KIND=qp)>rmax ) & + .or.( aimag( a( i, j ) )<-rmax ) .or.( aimag( a( i, j ) )>rmax ) ) & + then + info = 1 + go to 50 + end if + sa( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = j, n + if( ( real( a( i, j ),KIND=qp)<-rmax ) .or.( real( a( i, j ),KIND=qp)>rmax ) & + .or.( aimag( a( i, j ) )<-rmax ) .or.( aimag( a( i, j ) )>rmax ) ) & + then + info = 1 + go to 50 + end if + sa( i, j ) = a( i, j ) + end do + end do + end if + 50 continue + return + end subroutine stdlib_wlat2c + + !> ZLATBS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular band matrix. Here A**T denotes the transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine ZTBSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_wlatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(out) :: scale + ! Array Arguments + real(qp), intent(inout) :: cnorm(*) + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind + real(qp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(qp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(qp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=qp) / 2._qp ) +abs( aimag( zdum ) / 2._qp ) + + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( kd<0 ) then + info = -6 + else if( ldab0 ) then + cnorm( j ) = stdlib_qzasum( jlen, ab( 2, j ), 1 ) + else + cnorm( j ) = zero + end if + end do + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum/2. + imax = stdlib_iqamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum*half ) then + tscal = one + else + tscal = half / ( smlnum*tmax ) + call stdlib_qscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_wtbsv can be used. + xmax = zero + do j = 1, n + xmax = max( xmax, cabs2( x( j ) ) ) + end do + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + maind = kd + 1 + else + jfirst = 1 + jlast = n + jinc = 1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 60 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + tjjs = ab( maind, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + maind = kd + 1 + else + jfirst = n + jlast = 1 + jinc = -1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = ab( maind, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_wtbsv( uplo, trans, diag, n, kd, ab, ldab, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_wdscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + loop_120: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 110 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 110 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_wdscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - + ! x(j)* a(max(1,j-kd):j-1,j) + jlen = min( kd, j-1 ) + call stdlib_waxpy( jlen, -x( j )*tscal,ab( kd+1-jlen, j ), 1, x( j-jlen & + ), 1 ) + i = stdlib_iwamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + else if( j0 )call stdlib_waxpy( jlen, -x( j )*tscal, ab( 2, j ), 1,x( j+1 ),& + 1 ) + i = j + stdlib_iwamax( n-j, x( j+1 ), 1 ) + xmax = cabs1( x( i ) ) + end if + end do loop_120 + else if( stdlib_lsame( trans, 'T' ) ) then + ! solve a**t * x = b + loop_170: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_wladiv( uscal, tjjs ) + end if + if( rec1 )csumj = stdlib_wdotu( jlen, ab( 2, j ), 1, x( j+1 ),1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + csumj = csumj + ( ab( kd+i-jlen, j )*uscal )*x( j-jlen-1+i ) + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + csumj = csumj + ( ab( i+1, j )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==cmplx( tscal,KIND=qp) ) then + ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - csumj + xj = cabs1( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 160 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 160 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_wladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_170 + else + ! solve a**h * x = b + loop_220: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( ab( maind, j ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_wladiv( uscal, tjjs ) + end if + if( rec1 )csumj = stdlib_wdotc( jlen, ab( 2, j ), 1, x( j+1 ),1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + csumj = csumj + ( conjg( ab( kd+i-jlen, j ) )*uscal )*x( j-jlen-1+i ) + + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + csumj = csumj + ( conjg( ab( i+1, j ) )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==cmplx( tscal,KIND=qp) ) then + ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - csumj + xj = cabs1( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = conjg( ab( maind, j ) )*tscal + else + tjjs = tscal + if( tscal==one )go to 210 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 210 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_wladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_220 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_qscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_wlatbs + + !> ZLATDF: computes the contribution to the reciprocal Dif-estimate + !> by solving for x in Z * x = b, where b is chosen such that the norm + !> of x is as large as possible. It is assumed that LU decomposition + !> of Z has been computed by ZGETC2. On entry RHS = f holds the + !> contribution from earlier solved sub-systems, and on return RHS = x. + !> The factorization of Z returned by ZGETC2 has the form + !> Z = P * L * U * Q, where P and Q are permutation matrices. L is lower + !> triangular with unit diagonal elements and U is upper triangular. + + pure subroutine stdlib_wlatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ijob, ldz, n + real(qp), intent(inout) :: rdscal, rdsum + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + complex(qp), intent(inout) :: rhs(*), z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxdim = 2 + + + + ! Local Scalars + integer(ilp) :: i, info, j, k + real(qp) :: rtemp, scale, sminu, splus + complex(qp) :: bm, bp, pmone, temp + ! Local Arrays + real(qp) :: rwork(maxdim) + complex(qp) :: work(4*maxdim), xm(maxdim), xp(maxdim) + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( ijob/=2 ) then + ! apply permutations ipiv to rhs + call stdlib_wlaswp( 1, rhs, ldz, 1, n-1, ipiv, 1 ) + ! solve for l-part choosing rhs either to +1 or -1. + pmone = -cone + loop_10: do j = 1, n - 1 + bp = rhs( j ) + cone + bm = rhs( j ) - cone + splus = one + ! lockahead for l- part rhs(1:n-1) = +-1 + ! splus and smin computed more efficiently than in bsolve[1]. + splus = splus + real( stdlib_wdotc( n-j, z( j+1, j ), 1, z( j+1,j ), 1 ),KIND=qp) + + sminu = real( stdlib_wdotc( n-j, z( j+1, j ), 1, rhs( j+1 ), 1 ),KIND=qp) + splus = splus*real( rhs( j ),KIND=qp) + if( splus>sminu ) then + rhs( j ) = bp + else if( sminu>splus ) then + rhs( j ) = bm + else + ! in this case the updating sums are equal and we can + ! choose rhs(j) +1 or -1. the first time this happens we + ! choose -1, thereafter +1. this is a simple way to get + ! good estimates of matrices like byers well-known example + ! (see [1]). (not done in bsolve.) + rhs( j ) = rhs( j ) + pmone + pmone = cone + end if + ! compute the remaining r.h.s. + temp = -rhs( j ) + call stdlib_waxpy( n-j, temp, z( j+1, j ), 1, rhs( j+1 ), 1 ) + end do loop_10 + ! solve for u- part, lockahead for rhs(n) = +-1. this is not done + ! in bsolve and will hopefully give us a better estimate because + ! any ill-conditioning of the original matrix is transferred to u + ! and not to l. u(n, n) is an approximation to sigma_min(lu). + call stdlib_wcopy( n-1, rhs, 1, work, 1 ) + work( n ) = rhs( n ) + cone + rhs( n ) = rhs( n ) - cone + splus = zero + sminu = zero + do i = n, 1, -1 + temp = cone / z( i, i ) + work( i ) = work( i )*temp + rhs( i ) = rhs( i )*temp + do k = i + 1, n + work( i ) = work( i ) - work( k )*( z( i, k )*temp ) + rhs( i ) = rhs( i ) - rhs( k )*( z( i, k )*temp ) + end do + splus = splus + abs( work( i ) ) + sminu = sminu + abs( rhs( i ) ) + end do + if( splus>sminu )call stdlib_wcopy( n, work, 1, rhs, 1 ) + ! apply the permutations jpiv to the computed solution (rhs) + call stdlib_wlaswp( 1, rhs, ldz, 1, n-1, jpiv, -1 ) + ! compute the sum of squares + call stdlib_wlassq( n, rhs, 1, rdscal, rdsum ) + return + end if + ! entry ijob = 2 + ! compute approximate nullvector xm of z + call stdlib_wgecon( 'I', n, z, ldz, one, rtemp, work, rwork, info ) + call stdlib_wcopy( n, work( n+1 ), 1, xm, 1 ) + ! compute rhs + call stdlib_wlaswp( 1, xm, ldz, 1, n-1, ipiv, -1 ) + temp = cone / sqrt( stdlib_wdotc( n, xm, 1, xm, 1 ) ) + call stdlib_wscal( n, temp, xm, 1 ) + call stdlib_wcopy( n, xm, 1, xp, 1 ) + call stdlib_waxpy( n, cone, rhs, 1, xp, 1 ) + call stdlib_waxpy( n, -cone, xm, 1, rhs, 1 ) + call stdlib_wgesc2( n, z, ldz, rhs, ipiv, jpiv, scale ) + call stdlib_wgesc2( n, z, ldz, xp, ipiv, jpiv, scale ) + if( stdlib_qzasum( n, xp, 1 )>stdlib_qzasum( n, rhs, 1 ) )call stdlib_wcopy( n, xp, 1, & + rhs, 1 ) + ! compute the sum of squares + call stdlib_wlassq( n, rhs, 1, rdscal, rdsum ) + return + end subroutine stdlib_wlatdf + + !> ZLATPS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular matrix stored in packed form. Here A**T denotes the + !> transpose of A, A**H denotes the conjugate transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine ZTPSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_wlatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(out) :: scale + ! Array Arguments + real(qp), intent(inout) :: cnorm(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen + real(qp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(qp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(qp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=qp) / 2._qp ) +abs( aimag( zdum ) / 2._qp ) + + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLATPS', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine machine dependent parameters to control overflow. + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + call stdlib_qlabad( smlnum, bignum ) + smlnum = smlnum / stdlib_qlamch( 'PRECISION' ) + bignum = one / smlnum + scale = one + if( stdlib_lsame( normin, 'N' ) ) then + ! compute the 1-norm of each column, not including the diagonal. + if( upper ) then + ! a is upper triangular. + ip = 1 + do j = 1, n + cnorm( j ) = stdlib_qzasum( j-1, ap( ip ), 1 ) + ip = ip + j + end do + else + ! a is lower triangular. + ip = 1 + do j = 1, n - 1 + cnorm( j ) = stdlib_qzasum( n-j, ap( ip+1 ), 1 ) + ip = ip + n - j + 1 + end do + cnorm( n ) = zero + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum/2. + imax = stdlib_iqamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum*half ) then + tscal = one + else + tscal = half / ( smlnum*tmax ) + call stdlib_qscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_wtpsv can be used. + xmax = zero + do j = 1, n + xmax = max( xmax, cabs2( x( j ) ) ) + end do + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + else + jfirst = 1 + jlast = n + jinc = 1 + end if + if( tscal/=one ) then + grow = zero + go to 60 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = n + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + tjjs = ap( ip ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + ip = ip + jinc*jlen + jlen = jlen - 1 + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = ap( ip ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + jlen = jlen + 1 + ip = ip + jinc*jlen + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_wtpsv( uplo, trans, diag, n, ap, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_wdscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + ip = jfirst*( jfirst+1 ) / 2 + loop_120: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + if( tscal==one )go to 110 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 110 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_wdscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_waxpy( j-1, -x( j )*tscal, ap( ip-j+1 ), 1, x,1 ) + i = stdlib_iwamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + ip = ip - j + else + if( jj + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_wladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 160 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_wladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_170 + else + ! solve a**h * x = b + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + loop_220: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( ap( ip ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_wladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 210 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_wladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_220 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_qscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_wlatps + + !> ZLATRD: reduces NB rows and columns of a complex Hermitian matrix A to + !> Hermitian tridiagonal form by a unitary similarity + !> transformation Q**H * A * Q, and returns the matrices V and W which are + !> needed to apply the transformation to the unreduced part of A. + !> If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a + !> matrix, of which the upper triangle is supplied; + !> if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a + !> matrix, of which the lower triangle is supplied. + !> This is an auxiliary routine called by ZHETRD. + + pure subroutine stdlib_wlatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + real(qp), intent(out) :: e(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), w(ldw,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iw + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: real,min + ! Executable Statements + ! quick return if possible + if( n<=0 )return + if( stdlib_lsame( uplo, 'U' ) ) then + ! reduce last nb columns of upper triangle + loop_10: do i = n, n - nb + 1, -1 + iw = i - n + nb + if( i1 ) then + ! generate elementary reflector h(i) to annihilate + ! a(1:i-2,i) + alpha = a( i-1, i ) + call stdlib_wlarfg( i-1, alpha, a( 1, i ), 1, tau( i-1 ) ) + e( i-1 ) = real( alpha,KIND=qp) + a( i-1, i ) = cone + ! compute w(1:i-1,i) + call stdlib_whemv( 'UPPER', i-1, cone, a, lda, a( 1, i ), 1,czero, w( 1, iw ),& + 1 ) + if( i ZLATRS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow. Here A is an upper or lower + !> triangular matrix, A**T denotes the transpose of A, A**H denotes the + !> conjugate transpose of A, x and b are n-element vectors, and s is a + !> scaling factor, usually less than or equal to 1, chosen so that the + !> components of x will be less than the overflow threshold. If the + !> unscaled problem will not cause overflow, the Level 2 BLAS routine + !> ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + !> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_wlatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: scale + ! Array Arguments + real(qp), intent(inout) :: cnorm(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast + real(qp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(qp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(qp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=qp) / 2._qp ) +abs( aimag( zdum ) / 2._qp ) + + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = a( j, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_wtrsv( uplo, trans, diag, n, a, lda, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_wdscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + loop_120: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 110 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 110 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_wdscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_waxpy( j-1, -x( j )*tscal, a( 1, j ), 1, x,1 ) + i = stdlib_iwamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + else + if( jj + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_wladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 160 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_wladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_170 + else + ! solve a**h * x = b + loop_220: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( a( j, j ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_wladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_wdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_wladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 210 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_wladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_220 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_qscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_wlatrs + + !> ZLATRZ: factors the M-by-(M+L) complex upper trapezoidal matrix + !> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means + !> of unitary transformations, where Z is an (M+L)-by-(M+L) unitary + !> matrix and, R and A1 are M-by-M upper triangular matrices. + + pure subroutine stdlib_wlatrz( m, n, l, a, lda, tau, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: l, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m==0 ) then + return + else if( m==n ) then + do i = 1, n + tau( i ) = czero + end do + return + end if + do i = m, 1, -1 + ! generate elementary reflector h(i) to annihilate + ! [ a(i,i) a(i,n-l+1:n) ] + call stdlib_wlacgv( l, a( i, n-l+1 ), lda ) + alpha = conjg( a( i, i ) ) + call stdlib_wlarfg( l+1, alpha, a( i, n-l+1 ), lda, tau( i ) ) + tau( i ) = conjg( tau( i ) ) + ! apply h(i) to a(1:i-1,i:n) from the right + call stdlib_wlarz( 'RIGHT', i-1, n-i+1, l, a( i, n-l+1 ), lda,conjg( tau( i ) ), a( & + 1, i ), lda, work ) + a( i, i ) = conjg( alpha ) + end do + return + end subroutine stdlib_wlatrz + + !> ZLATSQR: computes a blocked Tall-Skinny QR factorization of + !> a complex M-by-N matrix A for M >= N: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix, stored on exit in an implicit + !> form in the elements below the diagonal of the array A and in + !> the elements of the array T; + !> R is an upper-triangular N-by-N matrix, stored on exit in + !> the elements on and above the diagonal of the array A. + !> 0 is a (M-N)-by-N zero matrix, and is not stored. + + pure subroutine stdlib_wlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, ldt, lwork + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. mn .and. n>0 )) then + info = -4 + else if( lda=m)) then + call stdlib_wgeqrt( m, n, nb, a, lda, t, ldt, work, info) + return + end if + kk = mod((m-n),(mb-n)) + ii=m-kk+1 + ! compute the qr factorization of the first block a(1:mb,1:n) + call stdlib_wgeqrt( mb, n, nb, a(1,1), lda, t, ldt, work, info ) + ctr = 1 + do i = mb+1, ii-mb+n , (mb-n) + ! compute the qr factorization of the current block a(i:i+mb-n,1:n) + call stdlib_wtpqrt( mb-n, n, 0, nb, a(1,1), lda, a( i, 1 ), lda,t(1, ctr * n + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(ii:m,1:n) + if (ii<=m) then + call stdlib_wtpqrt( kk, n, 0, nb, a(1,1), lda, a( ii, 1 ), lda,t(1,ctr * n + 1), & + ldt,work, info ) + end if + work( 1 ) = n*nb + return + end subroutine stdlib_wlatsqr + + !> ZLAUNHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine ZLAUNHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + + pure subroutine stdlib_wlaunhr_col_getrfnp( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_wlaunhr_col_getrfnp2( m, n, a, lda, d, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks. + call stdlib_wlaunhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo ) + + if( j+jb<=n ) then + ! compute block row of u. + call stdlib_wtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,& + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_wgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_wlaunhr_col_getrfnp + + !> ZLAUNHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> ZLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine ZLAUNHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, ZLAUNHR_COL_GETRFNP2 + !> is self-sufficient and can be used without ZLAUNHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + + pure recursive subroutine stdlib_wlaunhr_col_getrfnp2( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: d(*) + ! ===================================================================== + + + ! Local Scalars + real(qp) :: sfmin + integer(ilp) :: i, iinfo, n1, n2 + complex(qp) :: z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_wscal( m-1, cone / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 2, m + a( i, 1 ) = a( i, 1 ) / a( 1, 1 ) + end do + end if + else + ! divide the matrix b into four submatrices + n1 = min( m, n ) / 2 + n2 = n-n1 + ! factor b11, recursive call + call stdlib_wlaunhr_col_getrfnp2( n1, n1, a, lda, d, iinfo ) + ! solve for b21 + call stdlib_wtrsm( 'R', 'U', 'N', 'N', m-n1, n1, cone, a, lda,a( n1+1, 1 ), lda ) + + ! solve for b12 + call stdlib_wtrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1, n1+1 ), lda ) + + ! update b22, i.e. compute the schur complement + ! b22 := b22 - b21*b12 + call stdlib_wgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, cone, a( n1+1, n1+1 ), lda ) + ! factor b22, recursive call + call stdlib_wlaunhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo ) + + end if + return + end subroutine stdlib_wlaunhr_col_getrfnp2 + + !> ZLAUU2: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the unblocked form of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_wlauu2( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(qp) :: aii + ! Intrinsic Functions + intrinsic :: real,cmplx,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLAUUM: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the blocked form of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wlauum( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ib, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_wlauu2( uplo, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute the product u * u**h. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_wtrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', i-1, & + ib, cone, a( i, i ), lda,a( 1, i ), lda ) + call stdlib_wlauu2( 'UPPER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',i-1, ib, n-i-ib+1,& + cone, a( 1, i+ib ),lda, a( i, i+ib ), lda, cone, a( 1, i ),lda ) + call stdlib_wherk( 'UPPER', 'NO TRANSPOSE', ib, n-i-ib+1,one, a( i, i+ib ),& + lda, one, a( i, i ),lda ) + end if + end do + else + ! compute the product l**h * l. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_wtrmm( 'LEFT', 'LOWER', 'CONJUGATE TRANSPOSE','NON-UNIT', ib, i-1,& + cone, a( i, i ), lda,a( i, 1 ), lda ) + call stdlib_wlauu2( 'LOWER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_wgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', ib,i-1, n-i-ib+1,& + cone, a( i+ib, i ), lda,a( i+ib, 1 ), lda, cone, a( i, 1 ), lda ) + call stdlib_wherk( 'LOWER', 'CONJUGATE TRANSPOSE', ib,n-i-ib+1, one, a( i+& + ib, i ), lda, one,a( i, i ), lda ) + end if + end do + end if + end if + return + end subroutine stdlib_wlauum + + !> ZPBCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite band matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> ZPBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wpbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,rwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(qp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab ZPBEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite band matrix A and reduce its condition + !> number (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_wpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(out) :: amax, scond + ! Array Arguments + real(qp), intent(out) :: s(*) + complex(qp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j + real(qp) :: smin + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab ZPBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and banded, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_wpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, l, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_wpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wpbrfs + + !> ZPBSTF: computes a split Cholesky factorization of a complex + !> Hermitian positive definite band matrix A. + !> This routine is designed to be used in conjunction with ZHBGST. + !> The factorization has the form A = S**H*S where S is a band matrix + !> of the same bandwidth as A and the following structure: + !> S = ( U ) + !> ( M L ) + !> where U is upper triangular of order m = (n+kd)/2, and L is lower + !> triangular of order n-m. + + pure subroutine stdlib_wpbstf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, km, m + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_wdscal( km, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_wlacgv( km, ab( kd, j+1 ), kld ) + call stdlib_wher( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + call stdlib_wlacgv( km, ab( kd, j+1 ), kld ) + end if + end do + else + ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m). + do j = n, m + 1, -1 + ! compute s(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=qp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 50 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( j-1, kd ) + ! compute elements j-km:j-1 of the j-th row and update the + ! trailing submatrix within the band. + call stdlib_wdscal( km, one / ajj, ab( km+1, j-km ), kld ) + call stdlib_wlacgv( km, ab( km+1, j-km ), kld ) + call stdlib_wher( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1, j-km ), kld ) + + call stdlib_wlacgv( km, ab( km+1, j-km ), kld ) + end do + ! factorize the updated submatrix a(1:m,1:m) as u**h*u. + do j = 1, m + ! compute s(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=qp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 50 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( kd, m-j ) + ! compute elements j+1:j+km of the j-th column and update the + ! trailing submatrix within the band. + if( km>0 ) then + call stdlib_wdscal( km, one / ajj, ab( 2, j ), 1 ) + call stdlib_wher( 'LOWER', km, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 50 continue + info = j + return + end subroutine stdlib_wpbstf + + !> ZPBSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular band matrix, and L is a lower + !> triangular band matrix, with the same number of superdiagonals or + !> subdiagonals as A. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_wpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(qp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab ZPBSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wpbsvx( fact, uplo, n, kd, nrhs, ab, ldab, afb, ldafb,equed, s, b, ldb, x, & + ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(inout) :: s(*) + complex(qp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ, upper + integer(ilp) :: i, infequ, j, j1, j2 + real(qp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + upper = stdlib_lsame( uplo, 'U' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlanhb( '1', uplo, n, kd, ab, ldab, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_wpbcon( uplo, n, kd, afb, ldafb, anorm, rcond, work, rwork,info ) + ! compute the solution matrix x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wpbtrs( uplo, n, kd, nrhs, afb, ldafb, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_wpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPBTF2: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U , if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix, U**H is the conjugate transpose + !> of U, and L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_wpbtf2( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, kn + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_wdscal( kn, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_wlacgv( kn, ab( kd, j+1 ), kld ) + call stdlib_wher( 'UPPER', kn, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + call stdlib_wlacgv( kn, ab( kd, j+1 ), kld ) + end if + end do + else + ! compute the cholesky factorization a = l*l**h. + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=qp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + ! compute elements j+1:j+kn of column j and update the + ! trailing submatrix within the band. + kn = min( kd, n-j ) + if( kn>0 ) then + call stdlib_wdscal( kn, one / ajj, ab( 2, j ), 1 ) + call stdlib_wher( 'LOWER', kn, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 30 continue + info = j + return + end subroutine stdlib_wpbtf2 + + !> ZPBTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_wpbtrf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(qp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 32 + integer(ilp), parameter :: ldwork = nbmax+1 + + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ib, ii, j, jj, nb + ! Local Arrays + complex(qp) :: work(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( .not.stdlib_lsame( uplo, 'U' ) ) .and.( .not.stdlib_lsame( uplo, 'L' ) ) ) & + then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldabkd ) then + ! use unblocked code + call stdlib_wpbtf2( uplo, n, kd, ab, ldab, info ) + else + ! use blocked code + if( stdlib_lsame( uplo, 'U' ) ) then + ! compute the cholesky factorization of a hermitian band + ! matrix, given the upper triangle of the matrix in band + ! storage. + ! zero the upper triangle of the work array. + do j = 1, nb + do i = 1, j - 1 + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_70: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_wpotf2( uplo, ib, ab( kd+1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 a12 a13 + ! a22 a23 + ! a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a12, a22 and + ! a23 are empty if ib = kd. the upper triangle of a13 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a12 + call stdlib_wtrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', & + ib, i2, cone,ab( kd+1, i ), ldab-1,ab( kd+1-ib, i+ib ), ldab-1 ) + ! update a22 + call stdlib_wherk( 'UPPER', 'CONJUGATE TRANSPOSE', i2, ib,-one, ab( kd+& + 1-ib, i+ib ), ldab-1, one,ab( kd+1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the lower triangle of a13 into the work array. + do jj = 1, i3 + do ii = jj, ib + work( ii, jj ) = ab( ii-jj+1, jj+i+kd-1 ) + end do + end do + ! update a13 (in the work array). + call stdlib_wtrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', & + ib, i3, cone,ab( kd+1, i ), ldab-1, work, ldwork ) + ! update a23 + if( i2>0 )call stdlib_wgemm( 'CONJUGATE TRANSPOSE','NO TRANSPOSE', i2, & + i3, ib, -cone,ab( kd+1-ib, i+ib ), ldab-1, work,ldwork, cone, ab( 1+ib, & + i+kd ),ldab-1 ) + ! update a33 + call stdlib_wherk( 'UPPER', 'CONJUGATE TRANSPOSE', i3, ib,-one, work, & + ldwork, one,ab( kd+1, i+kd ), ldab-1 ) + ! copy the lower triangle of a13 back into place. + do jj = 1, i3 + do ii = jj, ib + ab( ii-jj+1, jj+i+kd-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_70 + else + ! compute the cholesky factorization of a hermitian band + ! matrix, given the lower triangle of the matrix in band + ! storage. + ! zero the lower triangle of the work array. + do j = 1, nb + do i = j + 1, nb + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_140: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_wpotf2( uplo, ib, ab( 1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 + ! a21 a22 + ! a31 a32 a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a21, a22 and + ! a32 are empty if ib = kd. the lower triangle of a31 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a21 + call stdlib_wtrsm( 'RIGHT', 'LOWER','CONJUGATE TRANSPOSE', 'NON-UNIT', & + i2,ib, cone, ab( 1, i ), ldab-1,ab( 1+ib, i ), ldab-1 ) + ! update a22 + call stdlib_wherk( 'LOWER', 'NO TRANSPOSE', i2, ib, -one,ab( 1+ib, i ), & + ldab-1, one,ab( 1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the upper triangle of a31 into the work array. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + work( ii, jj ) = ab( kd+1-jj+ii, jj+i-1 ) + end do + end do + ! update a31 (in the work array). + call stdlib_wtrsm( 'RIGHT', 'LOWER','CONJUGATE TRANSPOSE', 'NON-UNIT', & + i3,ib, cone, ab( 1, i ), ldab-1, work,ldwork ) + ! update a32 + if( i2>0 )call stdlib_wgemm( 'NO TRANSPOSE','CONJUGATE TRANSPOSE', i3, & + i2, ib,-cone, work, ldwork, ab( 1+ib, i ),ldab-1, cone, ab( 1+kd-ib, i+& + ib ),ldab-1 ) + ! update a33 + call stdlib_wherk( 'LOWER', 'NO TRANSPOSE', i3, ib, -one,work, ldwork, & + one, ab( 1, i+kd ),ldab-1 ) + ! copy the upper triangle of a31 back into place. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + ab( kd+1-jj+ii, jj+i-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_140 + end if + end if + return + 150 continue + return + end subroutine stdlib_wpbtrf + + !> ZPBTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite band matrix A using the Cholesky factorization + !> A = U**H *U or A = L*L**H computed by ZPBTRF. + + pure subroutine stdlib_wpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab ZPFTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wpftrf( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(0:*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPFTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_wpotrf( 'L', n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_wtrsm( 'R', 'L', 'C', 'N', n2, n1, cone, a( 0 ), n,a( n1 ), n ) + + call stdlib_wherk( 'U', 'N', n2, n1, -one, a( n1 ), n, one,a( n ), n ) + call stdlib_wpotrf( 'U', n2, a( n ), n, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_wpotrf( 'L', n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_wtrsm( 'L', 'L', 'N', 'N', n1, n2, cone, a( n2 ), n,a( 0 ), n ) + + call stdlib_wherk( 'U', 'C', n2, n1, -one, a( 0 ), n, one,a( n1 ), n ) + call stdlib_wpotrf( 'U', n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + call stdlib_wpotrf( 'U', n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_wtrsm( 'L', 'U', 'C', 'N', n1, n2, cone, a( 0 ), n1,a( n1*n1 ), & + n1 ) + call stdlib_wherk( 'L', 'C', n2, n1, -one, a( n1*n1 ), n1, one,a( 1 ), n1 ) + + call stdlib_wpotrf( 'L', n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + call stdlib_wpotrf( 'U', n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_wtrsm( 'R', 'U', 'N', 'N', n2, n1, cone, a( n2*n2 ),n2, a( 0 ), & + n2 ) + call stdlib_wherk( 'L', 'N', n2, n1, -one, a( 0 ), n2, one,a( n1*n2 ), n2 ) + + call stdlib_wpotrf( 'L', n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_wpotrf( 'L', k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_wtrsm( 'R', 'L', 'C', 'N', k, k, cone, a( 1 ), n+1,a( k+1 ), n+1 ) + + call stdlib_wherk( 'U', 'N', k, k, -one, a( k+1 ), n+1, one,a( 0 ), n+1 ) + + call stdlib_wpotrf( 'U', k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_wpotrf( 'L', k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_wtrsm( 'L', 'L', 'N', 'N', k, k, cone, a( k+1 ),n+1, a( 0 ), n+1 ) + + call stdlib_wherk( 'U', 'C', k, k, -one, a( 0 ), n+1, one,a( k ), n+1 ) + + call stdlib_wpotrf( 'U', k, a( k ), n+1, info ) + if( info>0 )info = info + k + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_wpotrf( 'U', k, a( 0+k ), k, info ) + if( info>0 )return + call stdlib_wtrsm( 'L', 'U', 'C', 'N', k, k, cone, a( k ), n1,a( k*( k+1 ) ), & + k ) + call stdlib_wherk( 'L', 'C', k, k, -one, a( k*( k+1 ) ), k, one,a( 0 ), k ) + + call stdlib_wpotrf( 'L', k, a( 0 ), k, info ) + if( info>0 )info = info + k + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_wpotrf( 'U', k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_wtrsm( 'R', 'U', 'N', 'N', k, k, cone,a( k*( k+1 ) ), k, a( 0 ), & + k ) + call stdlib_wherk( 'L', 'N', k, k, -one, a( 0 ), k, one,a( k*k ), k ) + call stdlib_wpotrf( 'L', k, a( k*k ), k, info ) + if( info>0 )info = info + k + end if + end if + end if + return + end subroutine stdlib_wpftrf + + !> ZPFTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by ZPFTRF. + + pure subroutine stdlib_wpftri( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(inout) :: a(0:*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_wtftri( transr, uplo, 'N', n, a, info ) + if( info>0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution of triangular matrix multiply: inv(u)*inv(u)^c or + ! inv(l)^c*inv(l). there are eight cases. + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_wlauum( 'L', n1, a( 0 ), n, info ) + call stdlib_wherk( 'L', 'C', n1, n2, one, a( n1 ), n, one,a( 0 ), n ) + call stdlib_wtrmm( 'L', 'U', 'N', 'N', n2, n1, cone, a( n ), n,a( n1 ), n ) + + call stdlib_wlauum( 'U', n2, a( n ), n, info ) + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_wlauum( 'L', n1, a( n2 ), n, info ) + call stdlib_wherk( 'L', 'N', n1, n2, one, a( 0 ), n, one,a( n2 ), n ) + call stdlib_wtrmm( 'R', 'U', 'C', 'N', n1, n2, cone, a( n1 ), n,a( 0 ), n ) + + call stdlib_wlauum( 'U', n2, a( n1 ), n, info ) + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose, and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_wlauum( 'U', n1, a( 0 ), n1, info ) + call stdlib_wherk( 'U', 'N', n1, n2, one, a( n1*n1 ), n1, one,a( 0 ), n1 ) + + call stdlib_wtrmm( 'R', 'L', 'N', 'N', n1, n2, cone, a( 1 ), n1,a( n1*n1 ), & + n1 ) + call stdlib_wlauum( 'L', n2, a( 1 ), n1, info ) + else + ! srpa for upper, transpose, and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_wlauum( 'U', n1, a( n2*n2 ), n2, info ) + call stdlib_wherk( 'U', 'C', n1, n2, one, a( 0 ), n2, one,a( n2*n2 ), n2 ) + + call stdlib_wtrmm( 'L', 'L', 'C', 'N', n2, n1, cone, a( n1*n2 ),n2, a( 0 ), & + n2 ) + call stdlib_wlauum( 'L', n2, a( n1*n2 ), n2, info ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_wlauum( 'L', k, a( 1 ), n+1, info ) + call stdlib_wherk( 'L', 'C', k, k, one, a( k+1 ), n+1, one,a( 1 ), n+1 ) + + call stdlib_wtrmm( 'L', 'U', 'N', 'N', k, k, cone, a( 0 ), n+1,a( k+1 ), n+1 ) + + call stdlib_wlauum( 'U', k, a( 0 ), n+1, info ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_wlauum( 'L', k, a( k+1 ), n+1, info ) + call stdlib_wherk( 'L', 'N', k, k, one, a( 0 ), n+1, one,a( k+1 ), n+1 ) + + call stdlib_wtrmm( 'R', 'U', 'C', 'N', k, k, cone, a( k ), n+1,a( 0 ), n+1 ) + + call stdlib_wlauum( 'U', k, a( k ), n+1, info ) + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose, and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1), + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_wlauum( 'U', k, a( k ), k, info ) + call stdlib_wherk( 'U', 'N', k, k, one, a( k*( k+1 ) ), k, one,a( k ), k ) + + call stdlib_wtrmm( 'R', 'L', 'N', 'N', k, k, cone, a( 0 ), k,a( k*( k+1 ) ), & + k ) + call stdlib_wlauum( 'L', k, a( 0 ), k, info ) + else + ! srpa for upper, transpose, and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0), + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_wlauum( 'U', k, a( k*( k+1 ) ), k, info ) + call stdlib_wherk( 'U', 'C', k, k, one, a( 0 ), k, one,a( k*( k+1 ) ), k ) + + call stdlib_wtrmm( 'L', 'L', 'C', 'N', k, k, cone, a( k*k ), k,a( 0 ), k ) + + call stdlib_wlauum( 'L', k, a( k*k ), k, info ) + end if + end if + end if + return + end subroutine stdlib_wpftri + + !> ZPFTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by ZPFTRF. + + pure subroutine stdlib_wpftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: a(0:*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, normaltransr + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb ZPOCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite matrix using the + !> Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wpocon( uplo, n, a, lda, anorm, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(qp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZPOEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_wpoequ( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: amax, scond + ! Array Arguments + real(qp), intent(out) :: s(*) + complex(qp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: smin + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( lda ZPOEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + !> This routine differs from ZPOEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled diagonal entries are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_wpoequb( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: amax, scond + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + real(qp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(qp) :: smin, base, tmp + ! Intrinsic Functions + intrinsic :: max,min,sqrt,log,int,real,aimag + ! Executable Statements + ! test the input parameters. + ! positive definite only performs 1 pass of equilibration. + info = 0 + if( n<0 ) then + info = -1 + else if( lda ZPORFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite, + !> and provides error bounds and backward error estimates for the + !> solution. + + pure subroutine stdlib_wporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ==================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wpotrs( uplo, n, 1, af, ldaf, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_wpotrs( uplo, n, 1, af, ldaf, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wpotrs( uplo, n, 1, af, ldaf, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wporfs + + !> ZPOSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H* U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_wposv( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZPOSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wposvx( fact, uplo, n, nrhs, a, lda, af, ldaf, equed,s, b, ldb, x, ldx, & + rcond, ferr, berr, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(inout) :: s(*) + complex(qp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(qp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlanhe( '1', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_wpocon( uplo, n, af, ldaf, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wpotrs( uplo, n, nrhs, af, ldaf, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_wporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x, ldx,ferr, berr, work, & + rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPOTF2: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U , if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_wpotf2( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: real,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZPOTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wpotrf( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code. + call stdlib_wpotrf2( uplo, n, a, lda, info ) + else + ! use blocked code. + if( upper ) then + ! compute the cholesky factorization a = u**h *u. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_wherk( 'UPPER', 'CONJUGATE TRANSPOSE', jb, j-1,-one, a( 1, j ), & + lda, one, a( j, j ), lda ) + call stdlib_wpotrf2( 'UPPER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block row. + call stdlib_wgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', jb,n-j-jb+1, j-1,& + -cone, a( 1, j ), lda,a( 1, j+jb ), lda, cone, a( j, j+jb ),lda ) + call stdlib_wtrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', jb, & + n-j-jb+1, cone, a( j, j ),lda, a( j, j+jb ), lda ) + end if + end do + else + ! compute the cholesky factorization a = l*l**h. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_wherk( 'LOWER', 'NO TRANSPOSE', jb, j-1, -one,a( j, 1 ), lda, one,& + a( j, j ), lda ) + call stdlib_wpotrf2( 'LOWER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block column. + call stdlib_wgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',n-j-jb+1, jb, j-1,& + -cone, a( j+jb, 1 ),lda, a( j, 1 ), lda, cone, a( j+jb, j ),lda ) + call stdlib_wtrsm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','NON-UNIT', n-j-& + jb+1, jb, cone, a( j, j ),lda, a( j+jb, j ), lda ) + end if + end do + end if + end if + go to 40 + 30 continue + info = info + j - 1 + 40 continue + return + end subroutine stdlib_wpotrf + + !> ZPOTRF2: computes the Cholesky factorization of a Hermitian + !> positive definite matrix A using the recursive algorithm. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = n/2 + !> [ A21 | A22 ] n2 = n-n1 + !> The subroutine calls itself to factor A11. Update and scale A21 + !> or A12, update A22 then call itself to factor A22. + + pure recursive subroutine stdlib_wpotrf2( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: n1, n2, iinfo + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: max,real,sqrt + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZPOTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by ZPOTRF. + + pure subroutine stdlib_wpotri( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 )return + ! form inv(u) * inv(u)**h or inv(l)**h * inv(l). + call stdlib_wlauum( uplo, n, a, lda, info ) + return + end subroutine stdlib_wpotri + + !> ZPOTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H * U or A = L * L**H computed by ZPOTRF. + + pure subroutine stdlib_wpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZPPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite packed matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> ZPPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wppcon( uplo, n, ap, anorm, rcond, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(qp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm ZPPEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A in packed storage and reduce + !> its condition number (with respect to the two-norm). S contains the + !> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix + !> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. + !> This choice of S puts the condition number of B within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_wppequ( uplo, n, ap, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(out) :: amax, scond + ! Array Arguments + real(qp), intent(out) :: s(*) + complex(qp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, jj + real(qp) :: smin + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPPEQU', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + scond = one + amax = zero + return + end if + ! initialize smin and amax. + s( 1 ) = real( ap( 1 ),KIND=qp) + smin = s( 1 ) + amax = s( 1 ) + if( upper ) then + ! uplo = 'u': upper triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + i + s( i ) = real( ap( jj ),KIND=qp) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + else + ! uplo = 'l': lower triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + n - i + 2 + s( i ) = real( ap( jj ),KIND=qp) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + end if + if( smin<=zero ) then + ! find the first non-positive diagonal element and return. + do i = 1, n + if( s( i )<=zero ) then + info = i + return + end if + end do + else + ! set the scale factors to the reciprocals + ! of the diagonal elements. + do i = 1, n + s( i ) = one / sqrt( s( i ) ) + end do + ! compute scond = min(s(i)) / max(s(i)) + scond = sqrt( smin ) / sqrt( amax ) + end if + return + end subroutine stdlib_wppequ + + !> ZPPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_wpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ==================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wpptrs( uplo, n, 1, afp, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_wpptrs( uplo, n, 1, afp, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wpptrs( uplo, n, 1, afp, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wpprfs + + !> ZPPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_wppsv( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(qp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZPPSVX: uses the Cholesky factorization A = U**H * U or A = L * L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wppsvx( fact, uplo, n, nrhs, ap, afp, equed, s, b, ldb,x, ldx, rcond, ferr,& + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(inout) :: s(*) + complex(qp), intent(inout) :: afp(*), ap(*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(qp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_qlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( stdlib_lsame( fact, 'F' ) .and. .not.( rcequ .or. stdlib_lsame( equed, 'N' ) )& + ) then + info = -7 + else + if( rcequ ) then + smin = bignum + smax = zero + do j = 1, n + smin = min( smin, s( j ) ) + smax = max( smax, s( j ) ) + end do + if( smin<=zero ) then + info = -8 + else if( n>0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlanhp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_wppcon( uplo, n, afp, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wpptrs( uplo, n, nrhs, afp, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_wpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr, berr,work, rwork, & + info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPPTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A stored in packed format. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_wpptrf( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPPTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( upper ) then + ! compute the cholesky factorization a = u**h * u. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + ! compute elements 1:j-1 of column j. + if( j>1 )call stdlib_wtpsv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',j-1, ap, & + ap( jc ), 1 ) + ! compute u(j,j) and test for non-positive-definiteness. + ajj = real( ap( jj ),KIND=qp) - real( stdlib_wdotc( j-1,ap( jc ), 1, ap( jc ), 1 & + ),KIND=qp) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ap( jj ) = sqrt( ajj ) + end do + else + ! compute the cholesky factorization a = l * l**h. + jj = 1 + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = real( ap( jj ),KIND=qp) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ap( jj ) = ajj + ! compute elements j+1:n of column j and update the trailing + ! submatrix. + if( j ZPPTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by ZPPTRF. + + pure subroutine stdlib_wpptri( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj, jjn + real(qp) :: ajj + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_wtptri( uplo, 'NON-UNIT', n, ap, info ) + if( info>0 )return + if( upper ) then + ! compute the product inv(u) * inv(u)**h. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + if( j>1 )call stdlib_whpr( 'UPPER', j-1, one, ap( jc ), 1, ap ) + ajj = real( ap( jj ),KIND=qp) + call stdlib_wdscal( j, ajj, ap( jc ), 1 ) + end do + else + ! compute the product inv(l)**h * inv(l). + jj = 1 + do j = 1, n + jjn = jj + n - j + 1 + ap( jj ) = real( stdlib_wdotc( n-j+1, ap( jj ), 1, ap( jj ), 1 ),KIND=qp) + if( j ZPPTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A in packed storage using the Cholesky + !> factorization A = U**H * U or A = L * L**H computed by ZPPTRF. + + pure subroutine stdlib_wpptrs( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZPSTF2: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 2 BLAS. + + pure subroutine stdlib_wpstf2( uplo, n, a, lda, piv, rank, tol, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: tol + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(2*n) + integer(ilp), intent(out) :: piv(n) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: ztemp + real(qp) :: ajj, dstop, dtemp + integer(ilp) :: i, itemp, j, pvt + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: real,conjg,max,sqrt + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + work( i ) = work( i ) +real( conjg( a( j-1, i ) )*a( j-1, i ),KIND=qp) + + end if + work( n+i ) = real( a( i, i ),KIND=qp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_qisnan( ajj ) ) then + a( j, j ) = ajj + go to 190 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_wswap( j-1, a( 1, j ), 1, a( 1, pvt ), 1 ) + if( pvt1 ) then + work( i ) = work( i ) +real( conjg( a( i, j-1 ) )*a( i, j-1 ),KIND=qp) + + end if + work( n+i ) = real( a( i, i ),KIND=qp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_qisnan( ajj ) ) then + a( j, j ) = ajj + go to 190 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_wswap( j-1, a( j, 1 ), lda, a( pvt, 1 ), lda ) + if( pvt ZPSTRF: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 3 BLAS. + + pure subroutine stdlib_wpstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(qp), intent(in) :: tol + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + real(qp), intent(out) :: work(2*n) + integer(ilp), intent(out) :: piv(n) + ! ===================================================================== + + + ! Local Scalars + complex(qp) :: ztemp + real(qp) :: ajj, dstop, dtemp + integer(ilp) :: i, itemp, j, jb, k, nb, pvt + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: real,conjg,max,min,sqrt,maxloc + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_wpstf2( uplo, n, a( 1, 1 ), lda, piv, rank, tol, work,info ) + go to 230 + else + ! initialize piv + do i = 1, n + piv( i ) = i + end do + ! compute stopping value + do i = 1, n + work( i ) = real( a( i, i ),KIND=qp) + end do + pvt = maxloc( work( 1:n ), 1 ) + ajj = real( a( pvt, pvt ),KIND=qp) + if( ajj<=zero.or.stdlib_qisnan( ajj ) ) then + rank = 0 + info = 1 + go to 230 + end if + ! compute stopping value if not supplied + if( tolk ) then + work( i ) = work( i ) +real( conjg( a( j-1, i ) )*a( j-1, i ),& + KIND=qp) + end if + work( n+i ) = real( a( i, i ),KIND=qp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_qisnan( ajj ) ) then + a( j, j ) = ajj + go to 220 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_wswap( j-1, a( 1, j ), 1, a( 1, pvt ), 1 ) + if( pvtk ) then + work( i ) = work( i ) +real( conjg( a( i, j-1 ) )*a( i, j-1 ),& + KIND=qp) + end if + work( n+i ) = real( a( i, i ),KIND=qp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_qisnan( ajj ) ) then + a( j, j ) = ajj + go to 220 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_wswap( j-1, a( j, 1 ), lda, a( pvt, 1 ), lda ) + if( pvt ZPTCON: computes the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite tridiagonal matrix + !> using the factorization A = L*D*L**H or A = U**H*D*U computed by + !> ZPTTRF. + !> Norm(inv(A)) is computed by a direct method, and the reciprocal of + !> the condition number is computed as + !> RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wptcon( n, d, e, anorm, rcond, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(in) :: d(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ix + real(qp) :: ainvnm + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input arguments. + info = 0 + if( n<0 ) then + info = -1 + else if( anorm ZPTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric positive definite tridiagonal matrix by first factoring the + !> matrix using DPTTRF and then calling ZBDSQR to compute the singular + !> values of the bidiagonal factor. + !> This routine computes the eigenvalues of the positive definite + !> tridiagonal matrix to high relative accuracy. This means that if the + !> eigenvalues range over many orders of magnitude in size, then the + !> small eigenvalues and corresponding eigenvectors will be computed + !> more accurately than, for example, with the standard QR method. + !> The eigenvectors of a full or band positive definite Hermitian matrix + !> can also be found if ZHETRD, ZHPTRD, or ZHBTRD has been used to + !> reduce this matrix to tridiagonal form. (The reduction to + !> tridiagonal form, however, may preclude the possibility of obtaining + !> high relative accuracy in the small eigenvalues of the original + !> matrix, if these eigenvalues range over many orders of magnitude.) + + pure subroutine stdlib_wpteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: z(ldz,*) + ! ==================================================================== + + ! Local Arrays + complex(qp) :: c(1,1), vt(1,1) + ! Local Scalars + integer(ilp) :: i, icompz, nru + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldz0 )z( 1, 1 ) = cone + return + end if + if( icompz==2 )call stdlib_wlaset( 'FULL', n, n, czero, cone, z, ldz ) + ! call stdlib_qpttrf to factor the matrix. + call stdlib_qpttrf( n, d, e, info ) + if( info/=0 )return + do i = 1, n + d( i ) = sqrt( d( i ) ) + end do + do i = 1, n - 1 + e( i ) = e( i )*d( i ) + end do + ! call stdlib_wbdsqr to compute the singular values/vectors of the + ! bidiagonal factor. + if( icompz>0 ) then + nru = n + else + nru = 0 + end if + call stdlib_wbdsqr( 'LOWER', n, 0, nru, 0, d, e, vt, 1, z, ldz, c, 1,work, info ) + + ! square the singular values. + if( info==0 ) then + do i = 1, n + d( i ) = d( i )*d( i ) + end do + else + info = n + info + end if + return + end subroutine stdlib_wpteqr + + !> ZPTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and tridiagonal, and provides error bounds and backward error + !> estimates for the solution. + + pure subroutine stdlib_wptrfs( uplo, n, nrhs, d, e, df, ef, b, ldb, x, ldx,ferr, berr, work, & + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(in) :: d(*), df(*) + complex(qp), intent(in) :: b(ldb,*), e(*), ef(*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ix, j, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin + complex(qp) :: bi, cx, dx, ex, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wpttrs( uplo, n, 1, df, ef, work, n, info ) + call stdlib_waxpy( n, cmplx( one,KIND=qp), work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + ix = stdlib_iqamax( n, rwork, 1 ) + ferr( j ) = rwork( ix ) + ! estimate the norm of inv(a). + ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by + ! m(i,j) = abs(a(i,j)), i = j, + ! m(i,j) = -abs(a(i,j)), i .ne. j, + ! and e = [ 1, 1, ..., 1 ]**t. note m(a) = m(l)*d*m(l)**h. + ! solve m(l) * x = e. + rwork( 1 ) = one + do i = 2, n + rwork( i ) = one + rwork( i-1 )*abs( ef( i-1 ) ) + end do + ! solve d * m(l)**h * x = b. + rwork( n ) = rwork( n ) / df( n ) + do i = n - 1, 1, -1 + rwork( i ) = rwork( i ) / df( i ) +rwork( i+1 )*abs( ef( i ) ) + end do + ! compute norm(inv(a)) = max(x(i)), 1<=i<=n. + ix = stdlib_iqamax( n, rwork, 1 ) + ferr( j ) = ferr( j )*abs( rwork( ix ) ) + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_100 + return + end subroutine stdlib_wptrfs + + !> ZPTSV: computes the solution to a complex system of linear equations + !> A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal + !> matrix, and X and B are N-by-NRHS matrices. + !> A is factored as A = L*D*L**H, and the factored form of A is then + !> used to solve the system of equations. + + pure subroutine stdlib_wptsv( n, nrhs, d, e, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(qp), intent(inout) :: d(*) + complex(qp), intent(inout) :: b(ldb,*), e(*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb ZPTSVX: uses the factorization A = L*D*L**H to compute the solution + !> to a complex system of linear equations A*X = B, where A is an + !> N-by-N Hermitian positive definite tridiagonal matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_wptsvx( fact, n, nrhs, d, e, df, ef, b, ldb, x, ldx,rcond, ferr, berr,& + work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + real(qp), intent(in) :: d(*) + real(qp), intent(inout) :: df(*) + complex(qp), intent(in) :: b(ldb,*), e(*) + complex(qp), intent(inout) :: ef(*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb1 )call stdlib_wcopy( n-1, e, 1, ef, 1 ) + call stdlib_wpttrf( n, df, ef, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlanht( '1', n, d, e ) + ! compute the reciprocal of the condition number of a. + call stdlib_wptcon( n, df, ef, anorm, rcond, rwork, info ) + ! compute the solution vectors x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wpttrs( 'LOWER', n, nrhs, df, ef, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_wptrfs( 'LOWER', n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPTTRF: computes the L*D*L**H factorization of a complex Hermitian + !> positive definite tridiagonal matrix A. The factorization may also + !> be regarded as having the form A = U**H *D*U. + + pure subroutine stdlib_wpttrf( n, d, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(qp), intent(inout) :: d(*) + complex(qp), intent(inout) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i4 + real(qp) :: eii, eir, f, g + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag,mod + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'ZPTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! compute the l*d*l**h (or u**h *d*u) factorization of a. + i4 = mod( n-1, 4 ) + do i = 1, i4 + if( d( i )<=zero ) then + info = i + go to 30 + end if + eir = real( e( i ),KIND=qp) + eii = aimag( e( i ) ) + f = eir / d( i ) + g = eii / d( i ) + e( i ) = cmplx( f, g,KIND=qp) + d( i+1 ) = d( i+1 ) - f*eir - g*eii + end do + loop_20: do i = i4 + 1, n - 4, 4 + ! drop out of the loop if d(i) <= 0: the matrix is not positive + ! definite. + if( d( i )<=zero ) then + info = i + go to 30 + end if + ! solve for e(i) and d(i+1). + eir = real( e( i ),KIND=qp) + eii = aimag( e( i ) ) + f = eir / d( i ) + g = eii / d( i ) + e( i ) = cmplx( f, g,KIND=qp) + d( i+1 ) = d( i+1 ) - f*eir - g*eii + if( d( i+1 )<=zero ) then + info = i + 1 + go to 30 + end if + ! solve for e(i+1) and d(i+2). + eir = real( e( i+1 ),KIND=qp) + eii = aimag( e( i+1 ) ) + f = eir / d( i+1 ) + g = eii / d( i+1 ) + e( i+1 ) = cmplx( f, g,KIND=qp) + d( i+2 ) = d( i+2 ) - f*eir - g*eii + if( d( i+2 )<=zero ) then + info = i + 2 + go to 30 + end if + ! solve for e(i+2) and d(i+3). + eir = real( e( i+2 ),KIND=qp) + eii = aimag( e( i+2 ) ) + f = eir / d( i+2 ) + g = eii / d( i+2 ) + e( i+2 ) = cmplx( f, g,KIND=qp) + d( i+3 ) = d( i+3 ) - f*eir - g*eii + if( d( i+3 )<=zero ) then + info = i + 3 + go to 30 + end if + ! solve for e(i+3) and d(i+4). + eir = real( e( i+3 ),KIND=qp) + eii = aimag( e( i+3 ) ) + f = eir / d( i+3 ) + g = eii / d( i+3 ) + e( i+3 ) = cmplx( f, g,KIND=qp) + d( i+4 ) = d( i+4 ) - f*eir - g*eii + end do loop_20 + ! check d(n) for positive definiteness. + if( d( n )<=zero )info = n + 30 continue + return + end subroutine stdlib_wpttrf + + !> ZPTTRS: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H *D* U or A = L*D*L**H computed by ZPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + + pure subroutine stdlib_wpttrs( uplo, n, nrhs, d, e, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(qp), intent(in) :: d(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(in) :: e(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: iuplo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments. + info = 0 + upper = ( uplo=='U' .or. uplo=='U' ) + if( .not.upper .and. .not.( uplo=='L' .or. uplo=='L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_wptts2( iuplo, n, nrhs, d, e, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_wptts2( iuplo, n, jb, d, e, b( 1, j ), ldb ) + end do + end if + return + end subroutine stdlib_wpttrs + + !> ZPTTS2: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H *D*U or A = L*D*L**H computed by ZPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + + pure subroutine stdlib_wptts2( iuplo, n, nrhs, d, e, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: iuplo, ldb, n, nrhs + ! Array Arguments + real(qp), intent(in) :: d(*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(in) :: e(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + if( n==1 )call stdlib_wdscal( nrhs, 1._qp / d( 1 ), b, ldb ) + return + end if + if( iuplo==1 ) then + ! solve a * x = b using the factorization a = u**h *d*u, + ! overwriting each right hand side vector with its solution. + if( nrhs<=2 ) then + j = 1 + 10 continue + ! solve u**h * x = b. + do i = 2, n + b( i, j ) = b( i, j ) - b( i-1, j )*conjg( e( i-1 ) ) + end do + ! solve d * u * x = b. + do i = 1, n + b( i, j ) = b( i, j ) / d( i ) + end do + do i = n - 1, 1, -1 + b( i, j ) = b( i, j ) - b( i+1, j )*e( i ) + end do + if( j ZROT: applies a plane rotation, where the cos (C) is real and the + !> sin (S) is complex, and the vectors CX and CY are complex. + + pure subroutine stdlib_wrot( n, cx, incx, cy, incy, c, s ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(qp), intent(in) :: c + complex(qp), intent(in) :: s + ! Array Arguments + complex(qp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(qp) :: stemp + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 )go to 20 + ! code for unequal increments or equal increments not equal to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + stemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - conjg( s )*cx( ix ) + cx( ix ) = stemp + ix = ix + incx + iy = iy + incy + end do + return + ! code for both increments equal to 1 + 20 continue + do i = 1, n + stemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - conjg( s )*cx( i ) + cx( i ) = stemp + end do + return + end subroutine stdlib_wrot + + !> ZSPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric packed matrix A using the + !> factorization A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wspcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ip, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm0 .and. ap( ip )==zero )return + ip = ip - i + end do + else + ! lower triangular storage: examine d from top to bottom. + ip = 1 + do i = 1, n + if( ipiv( i )>0 .and. ap( ip )==zero )return + ip = ip + n - i + 1 + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_wsptrs( uplo, n, 1, ap, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_wspcon + + !> ZSPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_wspmv( uplo, n, alpha, ap, x, incx, beta, y, incy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, incy, n + complex(qp), intent(in) :: alpha, beta + ! Array Arguments + complex(qp), intent(in) :: ap(*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + complex(qp) :: temp1, temp2 + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 6 + else if( incy==0 ) then + info = 9 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPMV ', info ) + return + end if + ! quick return if possible. + if( ( n==0 ) .or. ( ( alpha==czero ) .and. ( beta==cone ) ) )return + ! set up the start points in x and y. + if( incx>0 ) then + kx = 1 + else + kx = 1 - ( n-1 )*incx + end if + if( incy>0 ) then + ky = 1 + else + ky = 1 - ( n-1 )*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + ! first form y := beta*y. + if( beta/=cone ) then + if( incy==1 ) then + if( beta==czero ) then + do i = 1, n + y( i ) = czero + end do + else + do i = 1, n + y( i ) = beta*y( i ) + end do + end if + else + iy = ky + if( beta==czero ) then + do i = 1, n + y( iy ) = czero + iy = iy + incy + end do + else + do i = 1, n + y( iy ) = beta*y( iy ) + iy = iy + incy + end do + end if + end if + end if + if( alpha==czero )return + kk = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! form y when ap contains the upper triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + k = kk + do i = 1, j - 1 + y( i ) = y( i ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( i ) + k = k + 1 + end do + y( j ) = y( j ) + temp1*ap( kk+j-1 ) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + ix = kx + iy = ky + do k = kk, kk + j - 2 + y( iy ) = y( iy ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( ix ) + ix = ix + incx + iy = iy + incy + end do + y( jy ) = y( jy ) + temp1*ap( kk+j-1 ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + y( j ) = y( j ) + temp1*ap( kk ) + k = kk + 1 + do i = j + 1, n + y( i ) = y( i ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( i ) + k = k + 1 + end do + y( j ) = y( j ) + alpha*temp2 + kk = kk + ( n-j+1 ) + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + y( jy ) = y( jy ) + temp1*ap( kk ) + ix = jx + iy = jy + do k = kk + 1, kk + n - j + ix = ix + incx + iy = iy + incy + y( iy ) = y( iy ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( ix ) + end do + y( jy ) = y( jy ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + ( n-j+1 ) + end do + end if + end if + return + end subroutine stdlib_wspmv + + !> ZSPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_wspr( uplo, n, alpha, x, incx, ap ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, n + complex(qp), intent(in) :: alpha + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + complex(qp) :: temp + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPR ', info ) + return + end if + ! quick return if possible. + if( ( n==0 ) .or. ( alpha==czero ) )return + ! set the start point in x if the increment is not unity. + if( incx<=0 ) then + kx = 1 - ( n-1 )*incx + else if( incx/=1 ) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! form a when upper triangle is stored in ap. + if( incx==1 ) then + do j = 1, n + if( x( j )/=czero ) then + temp = alpha*x( j ) + k = kk + do i = 1, j - 1 + ap( k ) = ap( k ) + x( i )*temp + k = k + 1 + end do + ap( kk+j-1 ) = ap( kk+j-1 ) + x( j )*temp + else + ap( kk+j-1 ) = ap( kk+j-1 ) + end if + kk = kk + j + end do + else + jx = kx + do j = 1, n + if( x( jx )/=czero ) then + temp = alpha*x( jx ) + ix = kx + do k = kk, kk + j - 2 + ap( k ) = ap( k ) + x( ix )*temp + ix = ix + incx + end do + ap( kk+j-1 ) = ap( kk+j-1 ) + x( jx )*temp + else + ap( kk+j-1 ) = ap( kk+j-1 ) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if( incx==1 ) then + do j = 1, n + if( x( j )/=czero ) then + temp = alpha*x( j ) + ap( kk ) = ap( kk ) + temp*x( j ) + k = kk + 1 + do i = j + 1, n + ap( k ) = ap( k ) + x( i )*temp + k = k + 1 + end do + else + ap( kk ) = ap( kk ) + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1, n + if( x( jx )/=czero ) then + temp = alpha*x( jx ) + ap( kk ) = ap( kk ) + temp*x( jx ) + ix = jx + do k = kk + 1, kk + n - j + ix = ix + incx + ap( k ) = ap( k ) + x( ix )*temp + end do + else + ap( kk ) = ap( kk ) + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_wspr + + !> ZSPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_wsprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, work,& + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wsptrs( uplo, n, 1, afp, ipiv, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_wsptrs( uplo, n, 1, afp, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wsptrs( uplo, n, 1, afp, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wsprfs + + !> ZSPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is symmetric and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + + pure subroutine stdlib_wspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZSPSVX: uses the diagonal pivoting factorization A = U*D*U**T or + !> A = L*D*L**T to compute the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix stored + !> in packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wspsvx( fact, uplo, n, nrhs, ap, afp, ipiv, b, ldb, x,ldx, rcond, ferr, & + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(inout) :: afp(*) + complex(qp), intent(in) :: ap(*), b(ldb,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlansp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_wspcon( uplo, n, afp, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wsptrs( uplo, n, nrhs, afp, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_wsprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZSPTRF: computes the factorization of a complex symmetric matrix A + !> stored in packed format using the Bunch-Kaufman diagonal pivoting + !> method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_wsptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(qp) :: absakk, alpha, colmax, rowmax + complex(qp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**t using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( ap( kc+k-1 ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_iwamax( k-1, ap( kc ), 1 ) + colmax = cabs1( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_iwamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, cabs1( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( ap( kpc+imax-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_wswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = ap( knc+j-1 ) + ap( knc+j-1 ) = ap( kx ) + ap( kx ) = t + end do + t = ap( knc+kk-1 ) + ap( knc+kk-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = t + if( kstep==2 ) then + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = cone / ap( kc+k-1 ) + call stdlib_wspr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_wscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = ap( k-1+( k-1 )*k / 2 ) + d22 = ap( k-1+( k-2 )*( k-1 ) / 2 ) / d12 + d11 = ap( k+( k-1 )*k / 2 ) / d12 + t = cone / ( d11*d22-cone ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-ap( j+( k-1 )*k / 2 ) ) + + wk = d12*( d22*ap( j+( k-1 )*k / 2 )-ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *wk -ap( i+( k-2 )*( k-1 ) / 2 )*wkm1 + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( ap( kc ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( ap( kpc ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZSPTRI: computes the inverse of a complex symmetric indefinite matrix + !> A in packed storage using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSPTRF. + + pure subroutine stdlib_wsptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: ap(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + complex(qp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = cone / ap( kc+k-1 ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_wspmv( uplo, k-1, -cone, ap, work, 1, czero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_wdotu( k-1, work, 1, ap( kc ), 1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = ap( kcnext+k-1 ) + ak = ap( kc+k-1 ) / t + akp1 = ap( kcnext+k ) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-cone ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_wspmv( uplo, k-1, -cone, ap, work, 1, czero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_wdotu( k-1, work, 1, ap( kc ), 1 ) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_wdotu( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_wcopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_wspmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -stdlib_wdotu( k-1, work, 1, ap( kcnext ), 1 ) + + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_wswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = ap( kc+j-1 ) + ap( kc+j-1 ) = ap( kx ) + ap( kx ) = temp + end do + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = cone / ap( kc ) + ! compute column k of the inverse. + if( k ZSPTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A stored in packed format using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. + + pure subroutine stdlib_wsptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgeru( k-1, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_wscal( nrhs, cone / ap( kc+k-1 ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_wgeru( k-2, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_wgeru( k-2, nrhs, -cone, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, & + 1 ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, ap( kc ),1, cone, b( k,& + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, ap( kc ),1, cone, b( k,& + 1 ), ldb ) + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb,ap( kc+k ), 1, cone, b( & + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZSTEDC: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + !> matrix to tridiagonal form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See DLAED3 for details. + + pure subroutine stdlib_wstedc( compz, n, d, e, z, ldz, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: finish, i, icompz, ii, j, k, lgn, liwmin, ll, lrwmin, lwmin, m, smlsiz,& + start + real(qp) :: eps, orgnrm, p, tiny + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max,mod,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or.( icompz>0 .and. ldztiny ) then + finish = finish + 1 + go to 40 + end if + end if + ! (sub) problem determined. compute its size and solve it. + m = finish - start + 1 + if( m>smlsiz ) then + ! scale. + orgnrm = stdlib_qlanst( 'M', m, d( start ), e( start ) ) + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, m, 1, d( start ), m,info ) + call stdlib_qlascl( 'G', 0, 0, orgnrm, one, m-1, 1, e( start ),m-1, info ) + + call stdlib_wlaed0( n, m, d( start ), e( start ), z( 1, start ),ldz, work, n, & + rwork, iwork, info ) + if( info>0 ) then + info = ( info / ( m+1 )+start-1 )*( n+1 ) +mod( info, ( m+1 ) ) + start - & + 1 + go to 70 + end if + ! scale back. + call stdlib_qlascl( 'G', 0, 0, one, orgnrm, m, 1, d( start ), m,info ) + else + call stdlib_qsteqr( 'I', m, d( start ), e( start ), rwork, m,rwork( m*m+1 ), & + info ) + call stdlib_wlacrm( n, m, z( 1, start ), ldz, rwork, m, work, n,rwork( m*m+1 )& + ) + call stdlib_wlacpy( 'A', n, m, work, n, z( 1, start ), ldz ) + if( info>0 ) then + info = start*( n+1 ) + finish + go to 70 + end if + end if + start = finish + 1 + go to 30 + end if + ! endwhile + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

ZSTEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> ZSTEGR is a compatibility wrapper around the improved ZSTEMR routine. + !> See ZSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : ZSTEGR and ZSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + + pure subroutine stdlib_wstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: w(*), work(*) + complex(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: tryrac + ! Executable Statements + info = 0 + tryrac = .false. + call stdlib_wstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, n, isuppz, & + tryrac, work, lwork,iwork, liwork, info ) + end subroutine stdlib_wstegr + + !> ZSTEIN: computes the eigenvectors of a real symmetric tridiagonal + !> matrix T corresponding to specified eigenvalues, using inverse + !> iteration. + !> The maximum number of iterations allowed for each eigenvector is + !> specified by an internal parameter MAXITS (currently set to 5). + !> Although the eigenvectors are real, they are stored in a complex + !> array, which may be passed to ZUNMTR or ZUPMTR for back + !> transformation to the eigenvectors of a complex Hermitian matrix + !> which was reduced to tridiagonal form. + + pure subroutine stdlib_wstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, m, n + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), isplit(*) + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(qp), intent(in) :: d(*), e(*), w(*) + real(qp), intent(out) :: work(*) + complex(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: odm3 = 1.0e-3_qp + real(qp), parameter :: odm1 = 1.0e-1_qp + integer(ilp), parameter :: maxits = 5 + integer(ilp), parameter :: extra = 2 + + + + ! Local Scalars + integer(ilp) :: b1, blksiz, bn, gpind, i, iinfo, indrv1, indrv2, indrv3, indrv4, & + indrv5, its, j, j1, jblk, jmax, jr, nblk, nrmchk + real(qp) :: dtpcrt, eps, eps1, nrm, onenrm, ortol, pertol, scl, sep, tol, xj, xjm, & + ztr + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + do i = 1, m + ifail( i ) = 0 + end do + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -4 + else if( ldz1 ) then + eps1 = abs( eps*xj ) + pertol = ten*eps1 + sep = xj - xjm + if( sepmaxits )go to 120 + ! normalize and scale the righthand side vector pb. + jmax = stdlib_iqamax( blksiz, work( indrv1+1 ), 1 ) + scl = blksiz*onenrm*max( eps,abs( work( indrv4+blksiz ) ) ) /abs( work( indrv1+& + jmax ) ) + call stdlib_qscal( blksiz, scl, work( indrv1+1 ), 1 ) + ! solve the system lu = pb. + call stdlib_qlagts( -1, blksiz, work( indrv4+1 ), work( indrv2+2 ),work( indrv3+& + 1 ), work( indrv5+1 ), iwork,work( indrv1+1 ), tol, iinfo ) + ! reorthogonalize by modified gram-schmidt if eigenvalues are + ! close enough. + if( jblk==1 )go to 110 + if( abs( xj-xjm )>ortol )gpind = j + if( gpind/=j ) then + do i = gpind, j - 1 + ztr = zero + do jr = 1, blksiz + ztr = ztr + work( indrv1+jr )*real( z( b1-1+jr, i ),KIND=qp) + end do + do jr = 1, blksiz + work( indrv1+jr ) = work( indrv1+jr ) -ztr*real( z( b1-1+jr, i ),& + KIND=qp) + end do + end do + end if + ! check the infinity norm of the iterate. + 110 continue + jmax = stdlib_iqamax( blksiz, work( indrv1+1 ), 1 ) + nrm = abs( work( indrv1+jmax ) ) + ! continue for additional iterations after norm reaches + ! stopping criterion. + if( nrm ZSTEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.ZSTEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + !> 2. LAPACK routines can be used to reduce a complex Hermitean matrix to + !> real symmetric tridiagonal form. + !> (Any complex Hermitean tridiagonal matrix has real values on its diagonal + !> and potentially complex numbers on its off-diagonals. By applying a + !> similarity transform with an appropriate diagonal matrix + !> diag(1,e^{i \phy_1}, ... , e^{i \phy_{n-1}}), the complex Hermitean + !> matrix can be transformed into a real symmetric matrix and complex + !> arithmetic can be entirely avoided.) + !> While the eigenvectors of the real symmetric tridiagonal matrix are real, + !> the eigenvectors of original complex Hermitean matrix have complex entries + !> in general. + !> Since LAPACK drivers overwrite the matrix data with the eigenvectors, + !> ZSTEMR accepts complex workspace to facilitate interoperability + !> with ZUNMTR or ZUPMTR. + + pure subroutine stdlib_wstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il, iu, ldz, nzc, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(in) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: w(*), work(*) + complex(qp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: minrgp = 1.0e-3_qp + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, valeig, wantz, zquery + integer(ilp) :: i, ibegin, iend, ifirst, iil, iindbl, iindw, iindwk, iinfo, iinspl, & + iiu, ilast, in, indd, inde2, inderr, indgp, indgrs, indwrk, itmp, itmp2, j, jblk, jj, & + liwmin, lwmin, nsplit, nzcmin, offset, wbegin, wend + real(qp) :: bignum, cs, eps, pivmin, r1, r2, rmax, rmin, rtol1, rtol2, safmin, scale, & + smlnum, sn, thresh, tmp, tnrm, wl, wu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ).or.( liwork==-1 ) ) + zquery = ( nzc==-1 ) + ! stdlib_qstemr needs work of size 6*n, iwork of size 3*n. + ! in addition, stdlib_qlarre needs work of size 6*n, iwork of size 5*n. + ! furthermore, stdlib_wlarrv needs work of size 12*n, iwork of size 7*n. + if( wantz ) then + lwmin = 18*n + liwmin = 10*n + else + ! need less workspace if only the eigenvalues are wanted + lwmin = 12*n + liwmin = 8*n + endif + wl = zero + wu = zero + iil = 0 + iiu = 0 + nsplit = 0 + if( valeig ) then + ! we do not reference vl, vu in the cases range = 'i','a' + ! the interval (wl, wu] contains all the wanted eigenvalues. + ! it is either given by the user or computed in stdlib_qlarre. + wl = vl + wu = vu + elseif( indeig ) then + ! we do not reference il, iu in the cases range = 'v','a' + iil = il + iiu = iu + endif + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( valeig .and. n>0 .and. wu<=wl ) then + info = -7 + else if( indeig .and. ( iil<1 .or. iil>n ) ) then + info = -8 + else if( indeig .and. ( iiun ) ) then + info = -9 + else if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz.and.(.not.zquery) ) then + z( 1, 1 ) = one + isuppz(1) = 1 + isuppz(2) = 1 + end if + return + end if + if( n==2 ) then + if( .not.wantz ) then + call stdlib_qlae2( d(1), e(1), d(2), r1, r2 ) + else if( wantz.and.(.not.zquery) ) then + call stdlib_qlaev2( d(1), e(1), d(2), r1, r2, cs, sn ) + end if + if( alleig.or.(valeig.and.(r2>wl).and.(r2<=wu)).or.(indeig.and.(iil==1)) ) & + then + m = m+1 + w( m ) = r2 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = -sn + z( 2, m ) = cs + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + if( alleig.or.(valeig.and.(r1>wl).and.(r1<=wu)).or.(indeig.and.(iiu==2)) ) & + then + m = m+1 + w( m ) = r1 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = cs + z( 2, m ) = sn + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + else + ! continue with general n + indgrs = 1 + inderr = 2*n + 1 + indgp = 3*n + 1 + indd = 4*n + 1 + inde2 = 5*n + 1 + indwrk = 6*n + 1 + iinspl = 1 + iindbl = n + 1 + iindw = 2*n + 1 + iindwk = 3*n + 1 + ! scale matrix to allowable range, if necessary. + ! the allowable range is related to the pivmin parameter; see the + ! comments in stdlib_qlarrd. the preference for scaling small values + ! up is heuristic; we expect users' matrices not to be close to the + ! rmax threshold. + scale = one + tnrm = stdlib_qlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + scale = rmax / tnrm + end if + if( scale/=one ) then + call stdlib_qscal( n, scale, d, 1 ) + call stdlib_qscal( n-1, scale, e, 1 ) + tnrm = tnrm*scale + if( valeig ) then + ! if eigenvalues in interval have to be found, + ! scale (wl, wu] accordingly + wl = wl*scale + wu = wu*scale + endif + end if + ! compute the desired eigenvalues of the tridiagonal after splitting + ! into smaller subblocks if the corresponding off-diagonal elements + ! are small + ! thresh is the splitting parameter for stdlib_qlarre + ! a negative thresh forces the old splitting criterion based on the + ! size of the off-diagonal. a positive thresh switches to splitting + ! which preserves relative accuracy. + if( tryrac ) then + ! test whether the matrix warrants the more expensive relative approach. + call stdlib_qlarrr( n, d, e, iinfo ) + else + ! the user does not care about relative accurately eigenvalues + iinfo = -1 + endif + ! set the splitting criterion + if (iinfo==0) then + thresh = eps + else + thresh = -eps + ! relative accuracy is desired but t does not guarantee it + tryrac = .false. + endif + if( tryrac ) then + ! copy original diagonal, needed to guarantee relative accuracy + call stdlib_qcopy(n,d,1,work(indd),1) + endif + ! store the squares of the offdiagonal values of t + do j = 1, n-1 + work( inde2+j-1 ) = e(j)**2 + end do + ! set the tolerance parameters for bisection + if( .not.wantz ) then + ! stdlib_qlarre computes the eigenvalues to full precision. + rtol1 = four * eps + rtol2 = four * eps + else + ! stdlib_qlarre computes the eigenvalues to less than full precision. + ! stdlib_wlarrv will refine the eigenvalue approximations, and we only + ! need less accurate initial bisection in stdlib_qlarre. + ! note: these settings do only affect the subset case and stdlib_qlarre + rtol1 = sqrt(eps) + rtol2 = max( sqrt(eps)*5.0e-3_qp, four * eps ) + endif + call stdlib_qlarre( range, n, wl, wu, iil, iiu, d, e,work(inde2), rtol1, rtol2, & + thresh, nsplit,iwork( iinspl ), m, w, work( inderr ),work( indgp ), iwork( iindbl ),& + iwork( iindw ), work( indgrs ), pivmin,work( indwrk ), iwork( iindwk ), iinfo ) + + if( iinfo/=0 ) then + info = 10 + abs( iinfo ) + return + end if + ! note that if range /= 'v', stdlib_qlarre computes bounds on the desired + ! part of the spectrum. all desired eigenvalues are contained in + ! (wl,wu] + if( wantz ) then + ! compute the desired eigenvectors corresponding to the computed + ! eigenvalues + call stdlib_wlarrv( n, wl, wu, d, e,pivmin, iwork( iinspl ), m,1, m, minrgp, & + rtol1, rtol2,w, work( inderr ), work( indgp ), iwork( iindbl ),iwork( iindw ), & + work( indgrs ), z, ldz,isuppz, work( indwrk ), iwork( iindwk ), iinfo ) + if( iinfo/=0 ) then + info = 20 + abs( iinfo ) + return + end if + else + ! stdlib_qlarre computes eigenvalues of the (shifted) root representation + ! stdlib_wlarrv returns the eigenvalues of the unshifted matrix. + ! however, if the eigenvectors are not desired by the user, we need + ! to apply the corresponding shifts from stdlib_qlarre to obtain the + ! eigenvalues of the original matrix. + do j = 1, m + itmp = iwork( iindbl+j-1 ) + w( j ) = w( j ) + e( iwork( iinspl+itmp-1 ) ) + end do + end if + if ( tryrac ) then + ! refine computed eigenvalues so that they are relatively accurate + ! with respect to the original matrix t. + ibegin = 1 + wbegin = 1 + loop_39: do jblk = 1, iwork( iindbl+m-1 ) + iend = iwork( iinspl+jblk-1 ) + in = iend - ibegin + 1 + wend = wbegin - 1 + ! check if any eigenvalues have to be refined in this block + 36 continue + if( wend1 .or. n==2 ) then + if( .not. wantz ) then + call stdlib_qlasrt( 'I', m, w, iinfo ) + if( iinfo/=0 ) then + info = 3 + return + end if + else + do j = 1, m - 1 + i = 0 + tmp = w( j ) + do jj = j + 1, m + if( w( jj ) ZSTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the implicit QL or QR method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + !> matrix to tridiagonal form. + + pure subroutine stdlib_wsteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(qp), intent(inout) :: d(*), e(*) + real(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + + ! Local Scalars + integer(ilp) :: i, icompz, ii, iscale, j, jtot, k, l, l1, lend, lendm1, lendp1, lendsv,& + lm1, lsv, m, mm, mm1, nm1, nmaxit + real(qp) :: anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, s, safmax, safmin, ssfmax, & + ssfmin, tst + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldzn )go to 160 + if( l1>1 )e( l1-1 ) = zero + if( l1<=nm1 ) then + do m = l1, nm1 + tst = abs( e( m ) ) + if( tst==zero )go to 30 + if( tst<=( sqrt( abs( d( m ) ) )*sqrt( abs( d( m+1 ) ) ) )*eps ) then + e( m ) = zero + go to 30 + end if + end do + end if + m = n + 30 continue + l = l1 + lsv = l + lend = m + lendsv = lend + l1 = m + 1 + if( lend==l )go to 10 + ! scale submatrix in rows and columns l to lend + anorm = stdlib_qlanst( 'I', lend-l+1, d( l ), e( l ) ) + iscale = 0 + if( anorm==zero )go to 10 + if( anorm>ssfmax ) then + iscale = 1 + call stdlib_qlascl( 'G', 0, 0, anorm, ssfmax, lend-l+1, 1, d( l ), n,info ) + call stdlib_qlascl( 'G', 0, 0, anorm, ssfmax, lend-l, 1, e( l ), n,info ) + else if( anorml ) then + ! ql iteration + ! look for small subdiagonal element. + 40 continue + if( l/=lend ) then + lendm1 = lend - 1 + do m = l, lendm1 + tst = abs( e( m ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m+1 ) )+safmin )go to 60 + end do + end if + m = lend + 60 continue + if( m0 ) then + call stdlib_qlaev2( d( l ), e( l ), d( l+1 ), rt1, rt2, c, s ) + work( l ) = c + work( n-1+l ) = s + call stdlib_wlasr( 'R', 'V', 'B', n, 2, work( l ),work( n-1+l ), z( 1, l ), & + ldz ) + else + call stdlib_qlae2( d( l ), e( l ), d( l+1 ), rt1, rt2 ) + end if + d( l ) = rt1 + d( l+1 ) = rt2 + e( l ) = zero + l = l + 2 + if( l<=lend )go to 40 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l+1 )-p ) / ( two*e( l ) ) + r = stdlib_qlapy2( g, one ) + g = d( m ) - p + ( e( l ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + mm1 = m - 1 + do i = mm1, l, -1 + f = s*e( i ) + b = c*e( i ) + call stdlib_qlartg( g, f, c, s, r ) + if( i/=m-1 )e( i+1 ) = r + g = d( i+1 ) - p + r = ( d( i )-g )*s + two*c*b + p = s*r + d( i+1 ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = -s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = m - l + 1 + call stdlib_wlasr( 'R', 'V', 'B', n, mm, work( l ), work( n-1+l ),z( 1, l ), ldz & + ) + end if + d( l ) = d( l ) - p + e( l ) = g + go to 40 + ! eigenvalue found. + 80 continue + d( l ) = p + l = l + 1 + if( l<=lend )go to 40 + go to 140 + else + ! qr iteration + ! look for small superdiagonal element. + 90 continue + if( l/=lend ) then + lendp1 = lend + 1 + do m = l, lendp1, -1 + tst = abs( e( m-1 ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m-1 ) )+safmin )go to 110 + end do + end if + m = lend + 110 continue + if( m>lend )e( m-1 ) = zero + p = d( l ) + if( m==l )go to 130 + ! if remaining matrix is 2-by-2, use stdlib_qlae2 or stdlib_dlaev2 + ! to compute its eigensystem. + if( m==l-1 ) then + if( icompz>0 ) then + call stdlib_qlaev2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2, c, s ) + work( m ) = c + work( n-1+m ) = s + call stdlib_wlasr( 'R', 'V', 'F', n, 2, work( m ),work( n-1+m ), z( 1, l-1 ), & + ldz ) + else + call stdlib_qlae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 ) + end if + d( l-1 ) = rt1 + d( l ) = rt2 + e( l-1 ) = zero + l = l - 2 + if( l>=lend )go to 90 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l-1 )-p ) / ( two*e( l-1 ) ) + r = stdlib_qlapy2( g, one ) + g = d( m ) - p + ( e( l-1 ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + lm1 = l - 1 + do i = m, lm1 + f = s*e( i ) + b = c*e( i ) + call stdlib_qlartg( g, f, c, s, r ) + if( i/=m )e( i-1 ) = r + g = d( i ) - p + r = ( d( i+1 )-g )*s + two*c*b + p = s*r + d( i ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = l - m + 1 + call stdlib_wlasr( 'R', 'V', 'F', n, mm, work( m ), work( n-1+m ),z( 1, m ), ldz & + ) + end if + d( l ) = d( l ) - p + e( lm1 ) = g + go to 90 + ! eigenvalue found. + 130 continue + d( l ) = p + l = l - 1 + if( l>=lend )go to 90 + go to 140 + end if + ! undo scaling if necessary + 140 continue + if( iscale==1 ) then + call stdlib_qlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_qlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + else if( iscale==2 ) then + call stdlib_qlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_qlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + end if + ! check for no convergence to an eigenvalue after a total + ! of n*maxit iterations. + if( jtot==nmaxit ) then + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + return + end if + go to 10 + ! order eigenvalues and eigenvectors. + 160 continue + if( icompz==0 ) then + ! use quick sort + call stdlib_qlasrt( 'I', n, d, info ) + else + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

ZSYCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by ZSYTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wsycon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_wsytrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_wsycon + + !> ZSYCON_ROOK: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by ZSYTRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_wsycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(in) :: anorm + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(qp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==czero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_wsytrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_wsycon_rook + + !> ZSYCONV: converts A given by ZHETRF into L and D or vice-versa. + !> Get nondiagonal elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + + pure subroutine stdlib_wsyconv( uplo, way, n, a, lda, ipiv, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, j + complex(qp) :: temp + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda 1 ) + if( ipiv(i) < 0 ) then + e(i)=a(i-1,i) + e(i-1)=czero + a(i-1,i)=czero + i=i-1 + else + e(i)=czero + endif + i=i-1 + end do + ! convert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + i=i-1 + endif + i=i-1 + end do + else + ! revert a (a is upper) + ! revert permutations + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i+1 + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + endif + i=i+1 + end do + ! revert value + i=n + do while ( i > 1 ) + if( ipiv(i) < 0 ) then + a(i-1,i)=e(i) + i=i-1 + endif + i=i-1 + end do + end if + else + ! a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + i=1 + e(n)=czero + do while ( i <= n ) + if( i 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i+1,j) + a(i+1,j)=temp + end do + endif + i=i+1 + endif + i=i+1 + end do + else + ! revert a (a is lower) + ! revert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(i,j) + a(i,j)=a(ip,j) + a(ip,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i-1 + if (i > 1) then + do j= 1,i-1 + temp=a(i+1,j) + a(i+1,j)=a(ip,j) + a(ip,j)=temp + end do + endif + endif + i=i-1 + end do + ! revert value + i=1 + do while ( i <= n-1 ) + if( ipiv(i) < 0 ) then + a(i+1,i)=e(i) + i=i+1 + endif + i=i+1 + end do + end if + end if + return + end subroutine stdlib_wsyconv + + !> If parameter WAY = 'C': + !> ZSYCONVF: converts the factorization output format used in + !> ZSYTRF provided on entry in parameter A into the factorization + !> output format used in ZSYTRF_RK (or ZSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in ZSYTRF into + !> the format used in ZSYTRF_RK (or ZSYTRF_BK). + !> If parameter WAY = 'R': + !> ZSYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in ZSYTRF_RK + !> (or ZSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in ZSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in ZSYTRF_RK + !> (or ZSYTRF_BK) into the format used in ZSYTRF. + !> ZSYCONVF can also convert in Hermitian matrix case, i.e. between + !> formats used in ZHETRF and ZHETRF_RK (or ZHETRF_BK). + + pure subroutine stdlib_wsyconvf( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = czero + a( i-1, i ) = czero + i = i - 1 + else + e( i ) = czero + end if + i = i - 1 + end do + ! convert permutations and ipiv + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and czero out + ! corresponding entries in input storage a + i = 1 + e( n ) = czero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_wswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_wswap( i-1, a( i+1, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is no interchnge of rows i and and ipiv(i), + ! so this should be reflected in ipiv format for + ! *sytrf_rk ( or *sytrf_bk) + ipiv( i ) = i + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations and ipiv + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_wswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_wswap( i-1, a( ip, 1 ), lda,a( i+1, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is cone interchange of rows i+1 and ipiv(i+1), + ! so this should be recorded in consecutive entries + ! in ipiv format for *sytrf + ipiv( i ) = ipiv( i+1 ) + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_wsyconvf + + !> If parameter WAY = 'C': + !> ZSYCONVF_ROOK: converts the factorization output format used in + !> ZSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in ZSYTRF_RK (or ZSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for ZSYTRF_ROOK and + !> ZSYTRF_RK (or ZSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> ZSYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in ZSYTRF_RK + !> (or ZSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in ZSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for ZSYTRF_ROOK and + !> ZSYTRF_RK (or ZSYTRF_BK) is the same and is not converted. + !> ZSYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between + !> formats used in ZHETRF_ROOK and ZHETRF_RK (or ZHETRF_BK). + + pure subroutine stdlib_wsyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, ip2 + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = czero + a( i-1, i ) = czero + i = i - 1 + else + e( i ) = czero + end if + i = i - 1 + end do + ! convert permutations + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and czero out + ! corresponding entries in input storage a + i = 1 + e( n ) = czero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_wswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i and ipiv(i) and i+1 and ipiv(i+1) + ! in a(i:n,1:i-1) + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_wswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + if( ip2/=(i+1) ) then + call stdlib_wswap( i-1, a( i+1, 1 ), lda,a( ip2, 1 ), lda ) + end if + end if + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_wswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i+1) and i and ipiv(i) + ! in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip2/=(i+1) ) then + call stdlib_wswap( i-1, a( ip2, 1 ), lda,a( i+1, 1 ), lda ) + end if + if( ip/=i ) then + call stdlib_wswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_wsyconvf_rook + + !> ZSYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_wsyequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + real(qp), intent(out) :: s(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(qp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + complex(qp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,int,log,max,min,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'ZSYEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_qp / s( j ) + end do + tol = one / sqrt( 2.0_qp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + s( i ) * real( work( i ),KIND=qp) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_wlassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = cabs1( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( real( work( i ),KIND=qp) - t*si ) + c0 = -(t*si)*si + 2 * real( work( i ),KIND=qp) * si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + real( work( i ),KIND=qp) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_qlamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_qlamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_wsyequb + + !> ZSYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + + pure subroutine stdlib_wsymv( uplo, n, alpha, a, lda, x, incx, beta, y, incy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, incy, lda, n + complex(qp), intent(in) :: alpha, beta + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), x(*) + complex(qp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + complex(qp) :: temp1, temp2 + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( lda0 ) then + kx = 1 + else + kx = 1 - ( n-1 )*incx + end if + if( incy>0 ) then + ky = 1 + else + ky = 1 - ( n-1 )*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + ! first form y := beta*y. + if( beta/=cone ) then + if( incy==1 ) then + if( beta==czero ) then + do i = 1, n + y( i ) = czero + end do + else + do i = 1, n + y( i ) = beta*y( i ) + end do + end if + else + iy = ky + if( beta==czero ) then + do i = 1, n + y( iy ) = czero + iy = iy + incy + end do + else + do i = 1, n + y( iy ) = beta*y( iy ) + iy = iy + incy + end do + end if + end if + end if + if( alpha==czero )return + if( stdlib_lsame( uplo, 'U' ) ) then + ! form y when a is stored in upper triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + do i = 1, j - 1 + y( i ) = y( i ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( i ) + end do + y( j ) = y( j ) + temp1*a( j, j ) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + ix = kx + iy = ky + do i = 1, j - 1 + y( iy ) = y( iy ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( ix ) + ix = ix + incx + iy = iy + incy + end do + y( jy ) = y( jy ) + temp1*a( j, j ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + y( j ) = y( j ) + temp1*a( j, j ) + do i = j + 1, n + y( i ) = y( i ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( i ) + end do + y( j ) = y( j ) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + y( jy ) = y( jy ) + temp1*a( j, j ) + ix = jx + iy = jy + do i = j + 1, n + ix = ix + incx + iy = iy + incy + y( iy ) = y( iy ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( ix ) + end do + y( jy ) = y( jy ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_wsymv + + !> ZSYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + + pure subroutine stdlib_wsyr( uplo, n, alpha, x, incx, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, lda, n + complex(qp), intent(in) :: alpha + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, ix, j, jx, kx + complex(qp) :: temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 5 + else if( lda ZSYRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_wsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_wsytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_waxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_wsytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wsytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_wsyrfs + + !> ZSYSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_wsysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSV computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_wsysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_sytrf, lwkopt_sytrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> ZSYTRF_RK is called to compute the factorization of a complex + !> symmetric matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine ZSYTRS_3. + + pure subroutine stdlib_wsysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSV_ROOK: computes the solution to a complex system of linear + !> equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> ZSYTRF_ROOK is called to compute the factorization of a complex + !> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling ZSYTRS_ROOK. + + pure subroutine stdlib_wsysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSVX: uses the diagonal pivoting factorization to compute the + !> solution to a complex system of linear equations A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_wsysvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(qp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: af(ldaf,*) + complex(qp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(qp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_wlansy( 'I', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_wsycon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_wlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_wsytrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_wsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZSYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + + pure subroutine stdlib_wsyswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(qp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_wswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=a(i1+i,i2) + a(i1+i,i2)=tmp + end do + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from i1 to i1-1 + call stdlib_wswap( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=a(i2,i1+i) + a(i2,i1+i)=tmp + end do + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_wsyswapr + + !> ZSYTF2: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_wsytf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(qp) :: absakk, alpha, colmax, rowmax + complex(qp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iwamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero .or. stdlib_qisnan(absakk) ) then + ! column k is zero or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = imax + stdlib_iwamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_iwamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, cabs1( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_wswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + call stdlib_wswap( kk-kp-1, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = cone / a( k, k ) + call stdlib_wsyr( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_wscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*a( j, k-1 )-a( j, k ) ) + wk = d12*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*wk -a( i, k-1 )*wkm1 + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_iwamax( imax-k, a( imax, k ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZSYTF2_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_wsytf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: e(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(qp) :: absakk, alpha, colmax, rowmax, dtemp, sfmin + complex(qp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,max,sqrt,aimag,real + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iwamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_iwamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1( a( imax, imax ) )1 )call stdlib_wswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_wswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_wswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_wswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( cabs1( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = cone / a( k, k ) + call stdlib_wsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_wscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = czero + a( k-1, k ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**t using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1( a( imax, imax ) )(k+1) )call stdlib_wswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_wswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_wswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_wswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = cone / a( k, k ) + call stdlib_wsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_wscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZSYTF2_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_wsytf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: sevten = 17.0e+0_qp + + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(qp) :: absakk, alpha, colmax, rowmax, dtemp, sfmin + complex(qp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,max,sqrt,aimag,real + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=qp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_iwamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_iwamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! cabs1( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1(a( imax, imax ))1 )call stdlib_wswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_wswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + if( kp>1 )call stdlib_wswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_wswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( cabs1( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = cone / a( k, k ) + call stdlib_wsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_wscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! cabs1( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1(a( imax, imax ))(k+1) )call stdlib_wswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_wswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = cone / a( k, k ) + call stdlib_wsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_wscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_wsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZSYTRF: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wsytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_wlasyf( uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_wsytf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_wlasyf; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_wlasyf( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, n, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_wsytf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_wsytrf + + !> ZSYTRF_AA: computes the factorization of a complex symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a complex symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wsytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'ZSYTRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_wlasyf_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_wswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j, j+1 ) + a( j, j+1 ) = cone + call stdlib_wcopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_wscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_wgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_wgemv( 'NO TRANSPOSE', mj, jb+1,-cone, work( j3-j1+1+k1*n ),& + n,a( j1-k2, j3 ), 1,cone, a( j3, j3 ), lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_wgemm + call stdlib_wgemm( 'TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-cone, a( j1-& + k2, j2 ), lda,work( j3-j1+1+k1*n ), n,cone, a( j2, j3 ), lda ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_wcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**t using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_wcopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_wlasyf; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_wlasyf_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_wswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j+1, j ) + a( j+1, j ) = cone + call stdlib_wcopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_wscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_wgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_wgemv( 'NO TRANSPOSE', mj, jb+1,-cone, work( j3-j1+1+k1*n ),& + n,a( j3, j1-k2 ), lda,cone, a( j3, j3 ), 1 ) + j3 = j3 + 1 + end do + ! update off-diagonal block in j2-th block column with stdlib_wgemm + call stdlib_wgemm( 'NO TRANSPOSE', 'TRANSPOSE',n-j3+1, nj, jb+1,-cone, & + work( j3-j1+1+k1*n ), n,a( j2, j1-k2 ), lda,cone, a( j3, j2 ), lda ) + + end do + ! recover t( j+1, j ) + a( j+1, j ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_wcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_wsytrf_aa + + !> ZSYTRF_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_wsytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_wlasyf_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_wsytf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_wlasyf_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_wsytf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_wswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wsytrf_rk + + !> ZSYTRF_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_wsytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_wlasyf_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_wsytf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_wlasyf_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_wlasyf_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_wsytf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_wsytrf_rook + + !> ZSYTRI: computes the inverse of a complex symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> ZSYTRF. + + pure subroutine stdlib_wsytri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + complex(qp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_wsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_wdotu( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = a( k, k+1 ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-cone ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_wsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_wdotu( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_wdotu( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_wcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_wsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_wdotu( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_wswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k ZSYTRI_ROOK: computes the inverse of a complex symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by ZSYTRF_ROOK. + + pure subroutine stdlib_wsytri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + complex(qp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_wsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_wdotu( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = a( k, k+1 ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-cone ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_wcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_wsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_wdotu( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_wdotu( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_wcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_wsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_wdotu( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k+1,1:k+1) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_wswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_wswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_wswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_wswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k ZSYTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSYTRF. + + pure subroutine stdlib_wsytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + call stdlib_wscal( nrhs, cone / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_wgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_wgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, a( 1, k ),1, cone, b( & + k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, a( 1, k ),1, cone, b( & + k, 1 ), ldb ) + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb,a( 1, k+1 ), 1, cone, b(& + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZSYTRS2: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSYTRF and converted by ZSYCONV. + + pure subroutine stdlib_wsytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_wtrsm('L','U','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + call stdlib_wscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_wtrsm('L','U','T','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_wswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_wtrsm('L','L','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + call stdlib_wscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / akm1k + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i, j ) / akm1k + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**t \ b) -> b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_wtrsm('L','L','T','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_wsyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_wsytrs2 + + !> ZSYTRS_3: solves a system of linear equations A * X = B with a complex + !> symmetric matrix A using the factorization computed + !> by ZSYTRF_RK or ZSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_wsytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*), e(*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_wtrsm( 'L', 'U', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + call stdlib_wscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_wtrsm( 'L', 'U', 'T', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_wtrsm( 'L', 'L', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + call stdlib_wscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else if( i b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_wtrsm('L', 'L', 'T', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_wsytrs_3 + + !> ZSYTRS_AA: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by ZSYTRF_AA. + + pure subroutine stdlib_wsytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**t \ b -> b [ (u**t \p**t * b) ] + call stdlib_wtrsm( 'L', 'U', 'T', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**t \p**t * b) ] + call stdlib_wlacpy( 'F', 1, n, a( 1, 1 ), lda+1, work( n ), 1) + if( n>1 ) then + call stdlib_wlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_wlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_wgtsv( n, nrhs, work( 1 ), work( n ), work( 2*n ), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**t \p**t * b) ) ] + call stdlib_wtrsm( 'L', 'U', 'N', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b -> b [ p * (u \ (t \ (u**t \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**t. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_wtrsm( 'L', 'L', 'N', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_wlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_wlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1 ) + call stdlib_wlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_wgtsv( n, nrhs, work( 1 ), work(n), work( 2*n ), b, ldb,info) + ! 3) backward substitution with l**t + if( n>1 ) then + ! compute (l**t \ b) -> b [ l**t \ (t \ (l \p**t * b) ) ] + call stdlib_wtrsm( 'L', 'L', 'T', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b -> b [ p * (l**t \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_wsytrs_aa + + !> ZSYTRS_ROOK: solves a system of linear equations A*X = B with + !> a complex symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSYTRF_ROOK. + + pure subroutine stdlib_wsytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + complex(qp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_wgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + call stdlib_wscal( nrhs, cone / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1 ) + if( kp/=k-1 )call stdlib_wswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + if( k>2 ) then + call stdlib_wgeru( k-2, nrhs,-cone, a( 1, k ), 1, b( k, 1 ),ldb, b( 1, 1 ), & + ldb ) + call stdlib_wgeru( k-2, nrhs,-cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 )& + , ldb ) + end if + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 )call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), 1, & + cone, b( k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), 1, cone, & + b( k, 1 ), ldb ) + call stdlib_wgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 ), 1, cone,& + b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1). + kp = -ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_wswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_wswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZTBCON: estimates the reciprocal of the condition number of a + !> triangular band matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_wtbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,rwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab 0. + if( anorm>zero ) then + ! estimate the 1-norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_wlatbs( uplo, 'NO TRANSPOSE', diag, normin, n, kd,ab, ldab, work, & + scale, rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_wlatbs( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, kd, ab, ldab,& + work, scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_iwamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale ZTBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by ZTBTRS or some other + !> means before entering this routine. ZTBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_wtbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, ferr,& + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: ab(ldab,*), b(ldb,*), x(ldx,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_wtbsv( uplo, transt, diag, n, kd, ab, ldab, work,1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wtbsv( uplo, transn, diag, n, kd, ab, ldab, work,1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_wtbrfs + + !> ZTBTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by-NRHS matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_wtbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: ab(ldab,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab Level 3 BLAS like routine for A in RFP Format. + !> ZTFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**H. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_wtfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, diag, side, trans, uplo + integer(ilp), intent(in) :: ldb, m, n + complex(qp), intent(in) :: alpha + ! Array Arguments + complex(qp), intent(in) :: a(0:*) + complex(qp), intent(inout) :: b(0:ldb-1,0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans + integer(ilp) :: m1, m2, n1, n2, k, info, i, j + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lside = stdlib_lsame( side, 'L' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lside .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -3 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -4 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 ) then + info = -7 + else if( ldb ZTFTRI: computes the inverse of a triangular matrix A stored in RFP + !> format. + !> This is a Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_wtftri( transr, uplo, diag, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo, diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_wtrtri( 'L', diag, n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_wtrmm( 'R', 'L', 'N', diag, n2, n1, -cone, a( 0 ),n, a( n1 ), n ) + + call stdlib_wtrtri( 'U', diag, n2, a( n ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_wtrmm( 'L', 'U', 'C', diag, n2, n1, cone, a( n ), n,a( n1 ), n ) + + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_wtrtri( 'L', diag, n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_wtrmm( 'L', 'L', 'C', diag, n1, n2, -cone, a( n2 ),n, a( 0 ), n ) + + call stdlib_wtrtri( 'U', diag, n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_wtrmm( 'R', 'U', 'N', diag, n1, n2, cone, a( n1 ),n, a( 0 ), n ) + + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_wtrtri( 'U', diag, n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_wtrmm( 'L', 'U', 'N', diag, n1, n2, -cone, a( 0 ),n1, a( n1*n1 ), & + n1 ) + call stdlib_wtrtri( 'L', diag, n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_wtrmm( 'R', 'L', 'C', diag, n1, n2, cone, a( 1 ),n1, a( n1*n1 ), & + n1 ) + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_wtrtri( 'U', diag, n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_wtrmm( 'R', 'U', 'C', diag, n2, n1, -cone,a( n2*n2 ), n2, a( 0 ), & + n2 ) + call stdlib_wtrtri( 'L', diag, n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_wtrmm( 'L', 'L', 'N', diag, n2, n1, cone,a( n1*n2 ), n2, a( 0 ), & + n2 ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_wtrtri( 'L', diag, k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_wtrmm( 'R', 'L', 'N', diag, k, k, -cone, a( 1 ),n+1, a( k+1 ), n+& + 1 ) + call stdlib_wtrtri( 'U', diag, k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_wtrmm( 'L', 'U', 'C', diag, k, k, cone, a( 0 ), n+1,a( k+1 ), n+1 & + ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_wtrtri( 'L', diag, k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_wtrmm( 'L', 'L', 'C', diag, k, k, -cone, a( k+1 ),n+1, a( 0 ), n+& + 1 ) + call stdlib_wtrtri( 'U', diag, k, a( k ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_wtrmm( 'R', 'U', 'N', diag, k, k, cone, a( k ), n+1,a( 0 ), n+1 ) + + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_wtrtri( 'U', diag, k, a( k ), k, info ) + if( info>0 )return + call stdlib_wtrmm( 'L', 'U', 'N', diag, k, k, -cone, a( k ), k,a( k*( k+1 ) ),& + k ) + call stdlib_wtrtri( 'L', diag, k, a( 0 ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_wtrmm( 'R', 'L', 'C', diag, k, k, cone, a( 0 ), k,a( k*( k+1 ) ), & + k ) + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_wtrtri( 'U', diag, k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_wtrmm( 'R', 'U', 'C', diag, k, k, -cone,a( k*( k+1 ) ), k, a( 0 ),& + k ) + call stdlib_wtrtri( 'L', diag, k, a( k*k ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_wtrmm( 'L', 'L', 'N', diag, k, k, cone, a( k*k ), k,a( 0 ), k ) + + end if + end if + end if + return + end subroutine stdlib_wtftri + + !> ZTFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + + pure subroutine stdlib_wtfttp( transr, uplo, n, arf, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(out) :: ap(0:*) + complex(qp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: conjg + ! Intrinsic Functions + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTFTTP', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + ap( 0 ) = arf( 0 ) + else + ap( 0 ) = conjg( arf( 0 ) ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_wtfttp + + !> ZTFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + + pure subroutine stdlib_wtfttr( transr, uplo, n, arf, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(qp), intent(out) :: a(0:lda-1,0:*) + complex(qp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt, nx2, np1x2 + integer(ilp) :: i, j, l, ij + ! Intrinsic Functions + intrinsic :: conjg,max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda=n + ij = 0 + do j = 0, n2 + do i = n1, n2 + j + a( n2+j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = j, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0); lda=n + ij = nt - n + do j = n - 1, n1, -1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = j - n1, n1 - 1 + a( j-n1, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + ij = ij - nx2 + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ij = 0 + do j = 0, n2 - 1 + do i = 0, j + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = n1 + j, n - 1 + a( i, n1+j ) = arf( ij ) + ij = ij + 1 + end do + end do + do j = n2, n - 1 + do i = 0, n1 - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ij = 0 + do j = 0, n1 + do i = n1, n - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + do j = 0, n1 - 1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = n2 + j, n - 1 + a( n2+j, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1); lda=n+1 + ij = 0 + do j = 0, k - 1 + do i = k, k + j + a( k+j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = j, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0); lda=n+1 + ij = nt - n - 1 + do j = n - 1, k, -1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = j - k, k - 1 + a( j-k, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + ij = ij - np1x2 + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper, a=b) + ! t1 -> a(0,1) , t2 -> a(0,0) , s -> a(0,k+1) : + ! t1 -> a(0+k) , t2 -> a(0+0) , s -> a(0+k*(k+1)); lda=k + ij = 0 + j = k + do i = k, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do j = 0, k - 2 + do i = 0, j + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = k + 1 + j, n - 1 + a( i, k+1+j ) = arf( ij ) + ij = ij + 1 + end do + end do + do j = k - 1, n - 1 + do i = 0, k - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is even (see paper, a=b) + ! t1 -> a(0,k+1) , t2 -> a(0,k) , s -> a(0,0) + ! t1 -> a(0+k*(k+1)) , t2 -> a(0+k*k) , s -> a(0+0)); lda=k + ij = 0 + do j = 0, k + do i = k, n - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + do j = 0, k - 2 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = k + 1 + j, n - 1 + a( k+1+j, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + ! note that here j = k-1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end if + end if + end if + return + end subroutine stdlib_wtfttr + + !> ZTGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of complex matrices (S,P), where S and P are upper triangular. + !> Matrix pairs of this type are produced by the generalized Schur + !> factorization of a complex matrix pair (A,B): + !> A = Q*S*Z**H, B = Q*P*Z**H + !> as computed by ZGGHRD + ZHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal elements of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the unitary factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + + pure subroutine stdlib_wtgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, & + mm, m, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldp, lds, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: p(ldp,*), s(lds,*) + complex(qp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: compl, compr, ilall, ilback, ilbbad, ilcomp, lsa, lsb + integer(ilp) :: i, ibeg, ieig, iend, ihwmny, im, iside, isrc, j, je, jr + real(qp) :: acoefa, acoeff, anorm, ascale, bcoefa, big, bignum, bnorm, bscale, dmin, & + safmin, sbeta, scale, small, temp, ulp, xmax + complex(qp) :: bcoeff, ca, cb, d, salpha, sum, suma, sumb, x + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(qp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=qp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode and test the input parameters + if( stdlib_lsame( howmny, 'A' ) ) then + ihwmny = 1 + ilall = .true. + ilback = .false. + else if( stdlib_lsame( howmny, 'S' ) ) then + ihwmny = 2 + ilall = .false. + ilback = .false. + else if( stdlib_lsame( howmny, 'B' ) ) then + ihwmny = 3 + ilall = .true. + ilback = .true. + else + ihwmny = -1 + end if + if( stdlib_lsame( side, 'R' ) ) then + iside = 1 + compl = .false. + compr = .true. + else if( stdlib_lsame( side, 'L' ) ) then + iside = 2 + compl = .true. + compr = .false. + else if( stdlib_lsame( side, 'B' ) ) then + iside = 3 + compl = .true. + compr = .true. + else + iside = -1 + end if + info = 0 + if( iside<0 ) then + info = -1 + else if( ihwmny<0 ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lds=safmin .and. abs( acoeff )=safmin .and. abs1( bcoeff )bignum*temp ) then + do jr = je, j - 1 + work( jr ) = temp*work( jr ) + end do + xmax = one + end if + suma = czero + sumb = czero + do jr = je, j - 1 + suma = suma + conjg( s( jr, j ) )*work( jr ) + sumb = sumb + conjg( p( jr, j ) )*work( jr ) + end do + sum = acoeff*suma - conjg( bcoeff )*sumb + ! form x(j) = - sum / conjg( a*s(j,j) - b*p(j,j) ) + ! with scaling and perturbation of the denominator + d = conjg( acoeff*s( j, j )-bcoeff*p( j, j ) ) + if( abs1( d )<=dmin )d = cmplx( dmin,KIND=qp) + if( abs1( d )=bignum*abs1( d ) ) then + temp = one / abs1( sum ) + do jr = je, j - 1 + work( jr ) = temp*work( jr ) + end do + xmax = temp*xmax + sum = temp*sum + end if + end if + work( j ) = stdlib_wladiv( -sum, d ) + xmax = max( xmax, abs1( work( j ) ) ) + end do loop_100 + ! back transform eigenvector if howmny='b'. + if( ilback ) then + call stdlib_wgemv( 'N', n, n+1-je, cone, vl( 1, je ), ldvl,work( je ), 1, & + czero, work( n+1 ), 1 ) + isrc = 2 + ibeg = 1 + else + isrc = 1 + ibeg = je + end if + ! copy and scale eigenvector into column of vl + xmax = zero + do jr = ibeg, n + xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) ) + end do + if( xmax>safmin ) then + temp = one / xmax + do jr = ibeg, n + vl( jr, ieig ) = temp*work( ( isrc-1 )*n+jr ) + end do + else + ibeg = n + 1 + end if + do jr = 1, ibeg - 1 + vl( jr, ieig ) = czero + end do + end if + end do loop_140 + end if + ! right eigenvectors + if( compr ) then + ieig = im + 1 + ! main loop over eigenvalues + loop_250: do je = n, 1, -1 + if( ilall ) then + ilcomp = .true. + else + ilcomp = select( je ) + end if + if( ilcomp ) then + ieig = ieig - 1 + if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=qp) )& + <=safmin ) then + ! singular matrix pencil -- return unit eigenvector + do jr = 1, n + vr( jr, ieig ) = czero + end do + vr( ieig, ieig ) = cone + cycle loop_250 + end if + ! non-singular eigenvalue: + ! compute coefficients a and b in + ! ( a a - b b ) x = 0 + temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=qp) )& + *bscale, safmin ) + salpha = ( temp*s( je, je ) )*ascale + sbeta = ( temp*real( p( je, je ),KIND=qp) )*bscale + acoeff = sbeta*ascale + bcoeff = salpha*bscale + ! scale to avoid underflow + lsa = abs( sbeta )>=safmin .and. abs( acoeff )=safmin .and. abs1( bcoeff )=bignum*abs1( d ) ) then + temp = one / abs1( work( j ) ) + do jr = 1, je + work( jr ) = temp*work( jr ) + end do + end if + end if + work( j ) = stdlib_wladiv( -work( j ), d ) + if( j>1 ) then + ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling + if( abs1( work( j ) )>one ) then + temp = one / abs1( work( j ) ) + if( acoefa*rwork( j )+bcoefa*rwork( n+j )>=bignum*temp ) then + do jr = 1, je + work( jr ) = temp*work( jr ) + end do + end if + end if + ca = acoeff*work( j ) + cb = bcoeff*work( j ) + do jr = 1, j - 1 + work( jr ) = work( jr ) + ca*s( jr, j ) -cb*p( jr, j ) + end do + end if + end do loop_210 + ! back transform eigenvector if howmny='b'. + if( ilback ) then + call stdlib_wgemv( 'N', n, je, cone, vr, ldvr, work, 1,czero, work( n+1 ), & + 1 ) + isrc = 2 + iend = n + else + isrc = 1 + iend = je + end if + ! copy and scale eigenvector into column of vr + xmax = zero + do jr = 1, iend + xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) ) + end do + if( xmax>safmin ) then + temp = one / xmax + do jr = 1, iend + vr( jr, ieig ) = temp*work( ( isrc-1 )*n+jr ) + end do + else + iend = 0 + end if + do jr = iend + 1, n + vr( jr, ieig ) = czero + end do + end if + end do loop_250 + end if + return + end subroutine stdlib_wtgevc + + !> ZTGEX2: swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) + !> in an upper triangular matrix pair (A, B) by an unitary equivalence + !> transformation. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + + pure subroutine stdlib_wtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, lda, ldb, ldq, ldz, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: twenty = 2.0e+1_qp + integer(ilp), parameter :: ldst = 2 + logical(lk), parameter :: wands = .true. + + + + + ! Local Scalars + logical(lk) :: strong, weak + integer(ilp) :: i, m + real(qp) :: cq, cz, eps, sa, sb, scale, smlnum, sum, thresha, threshb + complex(qp) :: cdum, f, g, sq, sz + ! Local Arrays + complex(qp) :: s(ldst,ldst), t(ldst,ldst), work(8) + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=1 )return + m = ldst + weak = .false. + strong = .false. + ! make a local copy of selected block in (a, b) + call stdlib_wlacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst ) + call stdlib_wlacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst ) + ! compute the threshold for testing the acceptance of swapping. + eps = stdlib_qlamch( 'P' ) + smlnum = stdlib_qlamch( 'S' ) / eps + scale = real( czero,KIND=qp) + sum = real( cone,KIND=qp) + call stdlib_wlacpy( 'FULL', m, m, s, ldst, work, m ) + call stdlib_wlacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m ) + call stdlib_wlassq( m*m, work, 1, scale, sum ) + sa = scale*sqrt( sum ) + scale = real( czero,KIND=qp) + sum = real( cone,KIND=qp) + call stdlib_wlassq( m*m, work(m*m+1), 1, scale, sum ) + sb = scale*sqrt( sum ) + ! thres has been changed from + ! thresh = max( ten*eps*sa, smlnum ) + ! to + ! thresh = max( twenty*eps*sa, smlnum ) + ! on 04/01/10. + ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by + ! jim demmel and guillaume revy. see forum post 1783. + thresha = max( twenty*eps*sa, smlnum ) + threshb = max( twenty*eps*sb, smlnum ) + ! compute unitary ql and rq that swap 1-by-1 and 1-by-1 blocks + ! using givens rotations and perform the swap tentatively. + f = s( 2, 2 )*t( 1, 1 ) - t( 2, 2 )*s( 1, 1 ) + g = s( 2, 2 )*t( 1, 2 ) - t( 2, 2 )*s( 1, 2 ) + sa = abs( s( 2, 2 ) ) * abs( t( 1, 1 ) ) + sb = abs( s( 1, 1 ) ) * abs( t( 2, 2 ) ) + call stdlib_wlartg( g, f, cz, sz, cdum ) + sz = -sz + call stdlib_wrot( 2, s( 1, 1 ), 1, s( 1, 2 ), 1, cz, conjg( sz ) ) + call stdlib_wrot( 2, t( 1, 1 ), 1, t( 1, 2 ), 1, cz, conjg( sz ) ) + if( sa>=sb ) then + call stdlib_wlartg( s( 1, 1 ), s( 2, 1 ), cq, sq, cdum ) + else + call stdlib_wlartg( t( 1, 1 ), t( 2, 1 ), cq, sq, cdum ) + end if + call stdlib_wrot( 2, s( 1, 1 ), ldst, s( 2, 1 ), ldst, cq, sq ) + call stdlib_wrot( 2, t( 1, 1 ), ldst, t( 2, 1 ), ldst, cq, sq ) + ! weak stability test: |s21| <= o(eps f-norm((a))) + ! and |t21| <= o(eps f-norm((b))) + weak = abs( s( 2, 1 ) )<=thresha .and.abs( t( 2, 1 ) )<=threshb + if( .not.weak )go to 20 + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_wlacpy( 'FULL', m, m, s, ldst, work, m ) + call stdlib_wlacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m ) + call stdlib_wrot( 2, work, 1, work( 3 ), 1, cz, -conjg( sz ) ) + call stdlib_wrot( 2, work( 5 ), 1, work( 7 ), 1, cz, -conjg( sz ) ) + call stdlib_wrot( 2, work, 2, work( 2 ), 2, cq, -sq ) + call stdlib_wrot( 2, work( 5 ), 2, work( 6 ), 2, cq, -sq ) + do i = 1, 2 + work( i ) = work( i ) - a( j1+i-1, j1 ) + work( i+2 ) = work( i+2 ) - a( j1+i-1, j1+1 ) + work( i+4 ) = work( i+4 ) - b( j1+i-1, j1 ) + work( i+6 ) = work( i+6 ) - b( j1+i-1, j1+1 ) + end do + scale = real( czero,KIND=qp) + sum = real( cone,KIND=qp) + call stdlib_wlassq( m*m, work, 1, scale, sum ) + sa = scale*sqrt( sum ) + scale = real( czero,KIND=qp) + sum = real( cone,KIND=qp) + call stdlib_wlassq( m*m, work(m*m+1), 1, scale, sum ) + sb = scale*sqrt( sum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 20 + end if + ! if the swap is accepted ("weakly" and "strongly"), apply the + ! equivalence transformations to the original matrix pair (a,b) + call stdlib_wrot( j1+1, a( 1, j1 ), 1, a( 1, j1+1 ), 1, cz,conjg( sz ) ) + call stdlib_wrot( j1+1, b( 1, j1 ), 1, b( 1, j1+1 ), 1, cz,conjg( sz ) ) + call stdlib_wrot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda, cq, sq ) + call stdlib_wrot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb, cq, sq ) + ! set n1 by n2 (2,1) blocks to 0 + a( j1+1, j1 ) = czero + b( j1+1, j1 ) = czero + ! accumulate transformations into q and z if requested. + if( wantz )call stdlib_wrot( n, z( 1, j1 ), 1, z( 1, j1+1 ), 1, cz,conjg( sz ) ) + + if( wantq )call stdlib_wrot( n, q( 1, j1 ), 1, q( 1, j1+1 ), 1, cq,conjg( sq ) ) + + ! exit with info = 0 if swap was successfully performed. + return + ! exit with info = 1 if swap was rejected. + 20 continue + info = 1 + return + end subroutine stdlib_wtgex2 + + !> ZTGEXC: reorders the generalized Schur decomposition of a complex + !> matrix pair (A,B), using an unitary equivalence transformation + !> (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with + !> row index IFST is moved to row ILST. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + + pure subroutine stdlib_wtgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ifst, lda, ldb, ldq, ldz, n + integer(ilp), intent(inout) :: ilst + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: here + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input arguments. + info = 0 + if( n<0 ) then + info = -3 + else if( ldan ) then + info = -12 + else if( ilst<1 .or. ilst>n ) then + info = -13 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTGEXC', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + if( ifst==ilst )return + if( ifst=ilst )go to 20 + here = here + 1 + end if + ilst = here + return + end subroutine stdlib_wtgexc + + !> ZTGSEN: reorders the generalized Schur decomposition of a complex + !> matrix pair (A, B) (in terms of an unitary equivalence trans- + !> formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the pair (A,B). The leading + !> columns of Q and Z form unitary bases of the corresponding left and + !> right eigenspaces (deflating subspaces). (A, B) must be in + !> generalized Schur canonical form, that is, A and B are both upper + !> triangular. + !> ZTGSEN also computes the generalized eigenvalues + !> w(j)= ALPHA(j) / BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, the routine computes estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + + pure subroutine stdlib_wtgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, q, & + ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(qp), intent(out) :: pl, pr + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: dif(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + complex(qp), intent(out) :: alpha(*), beta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, swap, wantd, wantd1, wantd2, wantp + integer(ilp) :: i, ierr, ijb, k, kase, ks, liwmin, lwmin, mn2, n1, n2 + real(qp) :: dscale, dsum, rdscal, safmin + complex(qp) :: temp1, temp2 + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,cmplx,conjg,max,sqrt + ! Executable Statements + ! decode and test the input parameters + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ijob<0 .or. ijob>5 ) then + info = -1 + else if( n<0 ) then + info = -5 + else if( lda=4 + wantd1 = ijob==2 .or. ijob==4 + wantd2 = ijob==3 .or. ijob==5 + wantd = wantd1 .or. wantd2 + ! set m to the dimension of the specified pair of deflating + ! subspaces. + m = 0 + if( .not.lquery .or. ijob/=0 ) then + do k = 1, n + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + if( k0 ) then + ! swap is rejected: exit. + info = 1 + if( wantp ) then + pl = zero + pr = zero + end if + if( wantd ) then + dif( 1 ) = zero + dif( 2 ) = zero + end if + go to 70 + end if + end if + end do + if( wantp ) then + ! solve generalized sylvester equation for r and l: + ! a11 * r - l * a22 = a12 + ! b11 * r - l * b22 = b12 + n1 = m + n2 = n - m + i = n1 + 1 + call stdlib_wlacpy( 'FULL', n1, n2, a( 1, i ), lda, work, n1 ) + call stdlib_wlacpy( 'FULL', n1, n2, b( 1, i ), ldb, work( n1*n2+1 ),n1 ) + ijb = 0 + call stdlib_wtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,& + i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-2*n1*n2, & + iwork, ierr ) + ! estimate the reciprocal of norms of "projections" onto + ! left and right eigenspaces + rdscal = zero + dsum = one + call stdlib_wlassq( n1*n2, work, 1, rdscal, dsum ) + pl = rdscal*sqrt( dsum ) + if( pl==zero ) then + pl = one + else + pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) ) + end if + rdscal = zero + dsum = one + call stdlib_wlassq( n1*n2, work( n1*n2+1 ), 1, rdscal, dsum ) + pr = rdscal*sqrt( dsum ) + if( pr==zero ) then + pr = one + else + pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) ) + end if + end if + if( wantd ) then + ! compute estimates difu and difl. + if( wantd1 ) then + n1 = m + n2 = n - m + i = n1 + 1 + ijb = idifjb + ! frobenius norm-based difu estimate. + call stdlib_wtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(& + i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + ! frobenius norm-based difl estimate. + call stdlib_wtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),& + ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2 ), work( n1*n2*2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + else + ! compute 1-norm-based estimates of difu and difl using + ! reversed communication with stdlib_wlacn2. in each step a + ! generalized sylvester equation or a transposed variant + ! is solved. + kase = 0 + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + mn2 = 2*n1*n2 + ! 1-norm-based estimate of difu. + 40 continue + call stdlib_wlacn2( mn2, work( mn2+1 ), work, dif( 1 ), kase,isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation + call stdlib_wtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_wtgsyl( 'C', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 40 + end if + dif( 1 ) = dscale / dif( 1 ) + ! 1-norm-based estimate of difl. + 50 continue + call stdlib_wlacn2( mn2, work( mn2+1 ), work, dif( 2 ), kase,isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation + call stdlib_wtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_wtgsyl( 'C', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 50 + end if + dif( 2 ) = dscale / dif( 2 ) + end if + end if + ! if b(k,k) is complex, make it real and positive (normalization + ! of the generalized schur form) and store the generalized + ! eigenvalues of reordered pair (a, b) + do k = 1, n + dscale = abs( b( k, k ) ) + if( dscale>safmin ) then + temp1 = conjg( b( k, k ) / dscale ) + temp2 = b( k, k ) / dscale + b( k, k ) = dscale + call stdlib_wscal( n-k, temp1, b( k, k+1 ), ldb ) + call stdlib_wscal( n-k+1, temp1, a( k, k ), lda ) + if( wantq )call stdlib_wscal( n, temp2, q( 1, k ), 1 ) + else + b( k, k ) = cmplx( zero, zero,KIND=qp) + end if + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + end do + 70 continue + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_wtgsen + + !> ZTGSJA: computes the generalized singular value decomposition (GSVD) + !> of two complex upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine ZGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), + !> where U, V and Q are unitary matrices. + !> R is a nonsingular upper triangular matrix, and D1 + !> and D2 are ``diagonal'' matrices, which are of the following + !> structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the unitary transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + + pure subroutine stdlib_wtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobq, jobu, jobv + integer(ilp), intent(out) :: info, ncycle + integer(ilp), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p + real(qp), intent(in) :: tola, tolb + ! Array Arguments + real(qp), intent(out) :: alpha(*), beta(*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + real(qp), parameter :: hugenum = huge(zero) + + + + ! Local Scalars + logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv + integer(ilp) :: i, j, kcycle + real(qp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin + complex(qp) :: a2, b2, snq, snu, snv + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,min,huge + ! Executable Statements + ! decode and test the input parameters + initu = stdlib_lsame( jobu, 'I' ) + wantu = initu .or. stdlib_lsame( jobu, 'U' ) + initv = stdlib_lsame( jobv, 'I' ) + wantv = initv .or. stdlib_lsame( jobv, 'V' ) + initq = stdlib_lsame( jobq, 'I' ) + wantq = initq .or. stdlib_lsame( jobq, 'Q' ) + info = 0 + if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -1 + else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -2 + else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( p<0 ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda=-hugenum) ) then + if( gamma=beta( k+i ) ) then + call stdlib_wdscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) + else + call stdlib_wdscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) + call stdlib_wcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + else + alpha( k+i ) = zero + beta( k+i ) = one + call stdlib_wcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + end do + ! post-assignment + do i = m + 1, k + l + alpha( i ) = zero + beta( i ) = one + end do + if( k+l ZTGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B). + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + + pure subroutine stdlib_wtgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, & + dif, mm, m, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: dif(*), s(*) + complex(qp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, somcon, wantbh, wantdf, wants + integer(ilp) :: i, ierr, ifst, ilst, k, ks, lwmin, n1, n2 + real(qp) :: bignum, cond, eps, lnrm, rnrm, scale, smlnum + complex(qp) :: yhax, yhbx + ! Local Arrays + complex(qp) :: dummy(1), dummy1(1) + ! Intrinsic Functions + intrinsic :: abs,cmplx,max + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantdf = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.wants .and. .not.wantdf ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lda0 ) then + ! ill-conditioned problem - swap rejected. + dif( ks ) = zero + else + ! reordering successful, solve generalized sylvester + ! equation for r and l, + ! a22 * r - l * a11 = a12 + ! b22 * r - l * b11 = b12, + ! and compute estimate of difl[(a11,b11), (a22, b22)]. + n1 = 1 + n2 = n - n1 + i = n*n + 1 + call stdlib_wtgsyl( 'N', idifjb, n2, n1, work( n*n1+n1+1 ),n, work, n, & + work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, & + dif( ks ), dummy,1, iwork, ierr ) + end if + end if + end if + end do loop_20 + work( 1 ) = lwmin + return + end subroutine stdlib_wtgsna + + !> ZTGSY2: solves the generalized Sylvester equation + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, + !> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, + !> N-by-N and M-by-N, respectively. A, B, D and E are upper triangular + !> (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output + !> scaling factor chosen to avoid overflow. + !> In matrix notation solving equation (1) corresponds to solve + !> Zx = scale * b, where Z is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Ik is the identity matrix of size k and X**H is the conjuguate transpose of X. + !> kron(X, Y) is the Kronecker product between the matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case is used to compute an estimate of Dif[(A, D), (B, E)] = + !> = sigma_min(Z) using reverse communication with ZLACON. + !> ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL + !> of an upper bound on the separation between to matrix pairs. Then + !> the input (A, D), (B, E) are sub-pencils of two matrix pairs in + !> ZTGSYL. + + pure subroutine stdlib_wtgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, rdsum, rdscal,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n + integer(ilp), intent(out) :: info + real(qp), intent(inout) :: rdscal, rdsum + real(qp), intent(out) :: scale + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + complex(qp), intent(inout) :: c(ldc,*), f(ldf,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ldz = 2 + + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ierr, j, k + real(qp) :: scaloc + complex(qp) :: alpha + ! Local Arrays + integer(ilp) :: ipiv(ldz), jpiv(ldz) + complex(qp) :: rhs(ldz), z(ldz,ldz) + ! Intrinsic Functions + intrinsic :: cmplx,conjg,max + ! Executable Statements + ! decode and test input parameters + info = 0 + ierr = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>2 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda0 )info = ierr + if( ijob==0 ) then + call stdlib_wgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp),c( 1, k ), 1 ) + + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp),f( 1, k ), 1 ) + + end do + scale = scale*scaloc + end if + else + call stdlib_wlatdf( ijob, ldz, z, ldz, rhs, rdsum, rdscal,ipiv, jpiv ) + + end if + ! unpack solution vector(s) + c( i, j ) = rhs( 1 ) + f( i, j ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining equation. + if( i>1 ) then + alpha = -rhs( 1 ) + call stdlib_waxpy( i-1, alpha, a( 1, i ), 1, c( 1, j ), 1 ) + call stdlib_waxpy( i-1, alpha, d( 1, i ), 1, f( 1, j ), 1 ) + end if + if( j0 )info = ierr + call stdlib_wgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp), c( 1, k ),1 ) + + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( i, j ) = rhs( 1 ) + f( i, j ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining equation. + do k = 1, j - 1 + f( i, k ) = f( i, k ) + rhs( 1 )*conjg( b( k, j ) ) +rhs( 2 )*conjg( e( k, & + j ) ) + end do + do k = i + 1, m + c( k, j ) = c( k, j ) - conjg( a( i, k ) )*rhs( 1 ) -conjg( d( i, k ) )& + *rhs( 2 ) + end do + end do loop_70 + end do loop_80 + end if + return + end subroutine stdlib_wtgsy2 + + !> ZTGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with complex entries. A, B, D and E are upper + !> triangular (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 + !> is an output scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale*b, where Z + !> is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Here Ix is the identity matrix of size x and X**H is the conjugate + !> transpose of X. Kron(X, Y) is the Kronecker product between the + !> matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case (TRANS = 'C') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using ZLACON. + !> If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of + !> Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. + !> This is a level-3 BLAS algorithm. + + pure subroutine stdlib_wtgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, dif, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n + integer(ilp), intent(out) :: info + real(qp), intent(out) :: dif, scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + complex(qp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + complex(qp), intent(inout) :: c(ldc,*), f(ldf,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_zcopy by calls to stdlib_zlaset. + ! sven hammarling, 1/5/02. + + + ! Local Scalars + logical(lk) :: lquery, notran + integer(ilp) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, & + p, pq, q + real(qp) :: dscale, dsum, scale2, scaloc + ! Intrinsic Functions + intrinsic :: real,cmplx,max,sqrt + ! Executable Statements + ! decode and test input parameters + info = 0 + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>4 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda=3 ) then + ifunc = ijob - 2 + call stdlib_wlaset( 'F', m, n, czero, czero, c, ldc ) + call stdlib_wlaset( 'F', m, n, czero, czero, f, ldf ) + else if( ijob>=1 .and. notran ) then + isolve = 2 + end if + end if + if( ( mb<=1 .and. nb<=1 ) .or. ( mb>=m .and. nb>=n ) )then + ! use unblocked level 2 solver + loop_30: do iround = 1, isolve + scale = one + dscale = zero + dsum = one + pq = m*n + call stdlib_wtgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,& + ldf, scale, dsum, dscale,info ) + if( dscale/=zero ) then + if( ijob==1 .or. ijob==3 ) then + dif = sqrt( real( 2*m*n,KIND=qp) ) / ( dscale*sqrt( dsum ) ) + else + dif = sqrt( real( pq,KIND=qp) ) / ( dscale*sqrt( dsum ) ) + end if + end if + if( isolve==2 .and. iround==1 ) then + if( notran ) then + ifunc = ijob + end if + scale2 = scale + call stdlib_wlacpy( 'F', m, n, c, ldc, work, m ) + call stdlib_wlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m ) + call stdlib_wlaset( 'F', m, n, czero, czero, c, ldc ) + call stdlib_wlaset( 'F', m, n, czero, czero, f, ldf ) + else if( isolve==2 .and. iround==2 ) then + call stdlib_wlacpy( 'F', m, n, work, m, c, ldc ) + call stdlib_wlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf ) + scale = scale2 + end if + end do loop_30 + return + end if + ! determine block structure of a + p = 0 + i = 1 + 40 continue + if( i>m )go to 50 + p = p + 1 + iwork( p ) = i + i = i + mb + if( i>=m )go to 50 + go to 40 + 50 continue + iwork( p+1 ) = m + 1 + if( iwork( p )==iwork( p+1 ) )p = p - 1 + ! determine block structure of b + q = p + 1 + j = 1 + 60 continue + if( j>n )go to 70 + q = q + 1 + iwork( q ) = j + j = j + nb + if( j>=n )go to 70 + go to 60 + 70 continue + iwork( q+1 ) = n + 1 + if( iwork( q )==iwork( q+1 ) )q = q - 1 + if( notran ) then + loop_150: do iround = 1, isolve + ! solve (i, j) - subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q + pq = 0 + scale = one + dscale = zero + dsum = one + loop_130: do j = p + 2, q + js = iwork( j ) + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_120: do i = p, 1, -1 + is = iwork( i ) + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + call stdlib_wtgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), & + ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, & + scaloc, dsum, dscale,linfo ) + if( linfo>0 )info = linfo + pq = pq + mb*nb + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp),c( 1, k ), 1 ) + + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_wscal( is-1, cmplx( scaloc, zero,KIND=qp),c( 1, k ), 1 ) + + call stdlib_wscal( is-1, cmplx( scaloc, zero,KIND=qp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_wscal( m-ie, cmplx( scaloc, zero,KIND=qp),c( ie+1, k ), & + 1 ) + call stdlib_wscal( m-ie, cmplx( scaloc, zero,KIND=qp),f( ie+1, k ), & + 1 ) + end do + do k = je + 1, n + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp),c( 1, k ), 1 ) + + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp),f( 1, k ), 1 ) + + end do + scale = scale*scaloc + end if + ! substitute r(i,j) and l(i,j) into remaining equation. + if( i>1 ) then + call stdlib_wgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=qp), a(& + 1, is ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=qp),c( 1, js ), & + ldc ) + call stdlib_wgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=qp), d(& + 1, is ), ldd,c( is, js ), ldc, cmplx( one, zero,KIND=qp),f( 1, js ), & + ldf ) + end if + if( j0 )info = linfo + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp), c( 1, k ),1 ) + + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp), f( 1, k ),1 ) + + end do + do k = js, je + call stdlib_wscal( is-1, cmplx( scaloc, zero,KIND=qp),c( 1, k ), 1 ) + + call stdlib_wscal( is-1, cmplx( scaloc, zero,KIND=qp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_wscal( m-ie, cmplx( scaloc, zero,KIND=qp),c( ie+1, k ), 1 ) + + call stdlib_wscal( m-ie, cmplx( scaloc, zero,KIND=qp),f( ie+1, k ), 1 ) + + end do + do k = je + 1, n + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp), c( 1, k ),1 ) + + call stdlib_wscal( m, cmplx( scaloc, zero,KIND=qp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! substitute r(i,j) and l(i,j) into remaining equation. + if( j>p+2 ) then + call stdlib_wgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=qp), c( is,& + js ), ldc,b( 1, js ), ldb, cmplx( one, zero,KIND=qp),f( is, 1 ), ldf ) + + call stdlib_wgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=qp), f( is,& + js ), ldf,e( 1, js ), lde, cmplx( one, zero,KIND=qp),f( is, 1 ), ldf ) + + end if + if( i

ZTPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_wtpcon( norm, uplo, diag, n, ap, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: ap(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTPCON', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + rcond = one + return + end if + rcond = zero + smlnum = stdlib_qlamch( 'SAFE MINIMUM' )*real( max( 1, n ),KIND=qp) + ! compute the norm of the triangular matrix a. + anorm = stdlib_wlantp( norm, uplo, diag, n, ap, rwork ) + ! continue only if anorm > 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_wlatps( uplo, 'NO TRANSPOSE', diag, normin, n, ap,work, scale, & + rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_wlatps( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, ap, work, & + scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_iwamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale ZTPLQT: computes a blocked LQ factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_wtplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, mb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, nb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( mb<1 .or. (mb>m .and. m>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_wtplqt2( ib, nb, lb, a(i,i), lda, b( i, 1 ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(i+ib:m,:) from the right + if( i+ib<=m ) then + call stdlib_wtprfb( 'R', 'N', 'F', 'R', m-i-ib+1, nb, ib, lb,b( i, 1 ), ldb, t( & + 1, i ), ldt,a( i+ib, i ), lda, b( i+ib, 1 ), ldb,work, m-i-ib+1) + end if + end do + return + end subroutine stdlib_wtplqt + + !> ZTPLQT2: computes a LQ a factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_wtplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda ZTPMLQT: applies a complex unitary matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_wtpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, mb, ldt + ! Array Arguments + complex(qp), intent(in) :: v(ldv,*), t(ldt,*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, nb, lb, kf, ldaq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldaq = max( 1, k ) + else if ( right ) then + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( mb<1 .or. (mb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_wtprfb( 'L', 'C', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + do i = 1, k, mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_wtprfb( 'R', 'N', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. tran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_wtprfb( 'L', 'N', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_wtprfb( 'R', 'C', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_wtpmlqt + + !> ZTPMQRT: applies a complex orthogonal matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_wtpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt + ! Array Arguments + complex(qp), intent(in) :: v(ldv,*), t(ldt,*) + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, mb, lb, kf, ldaq, ldvq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldvq = max( 1, m ) + ldaq = max( 1, k ) + else if ( right ) then + ldvq = max( 1, n ) + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( nb<1 .or. (nb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_wtprfb( 'L', 'C', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + do i = 1, k, nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_wtprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. notran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_wtprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_wtprfb( 'R', 'C', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_wtpmqrt + + !> ZTPQRT: computes a blocked QR factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_wtpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, nb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, mb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( nb<1 .or. (nb>n .and. n>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_wtpqrt2( mb, ib, lb, a(i,i), lda, b( 1, i ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**h to b(:,i+ib:n) from the left + if( i+ib<=n ) then + call stdlib_wtprfb( 'L', 'C', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1, i ), ldb, t( & + 1, i ), ldt,a( i, i+ib ), lda, b( 1, i+ib ), ldb,work, ib ) + end if + end do + return + end subroutine stdlib_wtpqrt + + !> ZTPQRT2: computes a QR factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_wtpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + complex(qp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda ZTPRFB: applies a complex "triangular-pentagonal" block reflector H or its + !> conjugate transpose H**H to a complex matrix C, which is composed of two + !> blocks A and B, either from the left or right. + + pure subroutine stdlib_wtprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), b(ldb,*) + complex(qp), intent(in) :: t(ldt,*), v(ldv,*) + complex(qp), intent(out) :: work(ldwork,*) + ! ========================================================================== + + ! Local Scalars + integer(ilp) :: i, j, mp, np, kp + logical(lk) :: left, forward, column, right, backward, row + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return + if( stdlib_lsame( storev, 'C' ) ) then + column = .true. + row = .false. + else if ( stdlib_lsame( storev, 'R' ) ) then + column = .false. + row = .true. + else + column = .false. + row = .false. + end if + if( stdlib_lsame( side, 'L' ) ) then + left = .true. + right = .false. + else if( stdlib_lsame( side, 'R' ) ) then + left = .false. + right = .true. + else + left = .false. + right = .false. + end if + if( stdlib_lsame( direct, 'F' ) ) then + forward = .true. + backward = .false. + else if( stdlib_lsame( direct, 'B' ) ) then + forward = .false. + backward = .true. + else + forward = .false. + backward = .false. + end if + ! --------------------------------------------------------------------------- + if( column .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (m-by-k) + ! form h c or h**h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - t (a + v**h b) or a = a - t**h (a + v**h b) + ! b = b - v t (a + v**h b) or b = b - v t**h (a + v**h b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'U', 'C', 'N', l, n, cone, v( mp, 1 ), ldv,work, ldwork ) + + call stdlib_wgemm( 'C', 'N', l, n, m-l, cone, v, ldv, b, ldb,cone, work, ldwork ) + + call stdlib_wgemm( 'C', 'N', k-l, n, m, cone, v( 1, kp ), ldv,b, ldb, czero, work( & + kp, 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'N', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb ) + + call stdlib_wgemm( 'N', 'N', l, n, k-l, -cone, v( mp, kp ), ldv,work( kp, 1 ), & + ldwork, cone, b( mp, 1 ), ldb ) + call stdlib_wtrmm( 'L', 'U', 'N', 'N', l, n, cone, v( mp, 1 ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (n-by-k) + ! form c h or c h**h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - (a + b v) t or a = a - (a + b v) t**h + ! b = b - (a + b v) t v**h or b = b - (a + b v) t**h v**h + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_wtrmm( 'R', 'U', 'N', 'N', m, l, cone, v( np, 1 ), ldv,work, ldwork ) + + call stdlib_wgemm( 'N', 'N', m, l, n-l, cone, b, ldb,v, ldv, cone, work, ldwork ) + + call stdlib_wgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v( 1, kp ), ldv, czero, work( & + 1, kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_wgemm( 'N', 'C', m, l, k-l, -cone, work( 1, kp ), ldwork,v( np, kp ), & + ldv, cone, b( 1, np ), ldb ) + call stdlib_wtrmm( 'R', 'U', 'C', 'N', m, l, cone, v( np, 1 ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (m-by-k) + ! [ i ] (k-by-k) + ! form h c or h**h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - t (a + v**h b) or a = a - t**h (a + v**h b) + ! b = b - v t (a + v**h b) or b = b - v t**h (a + v**h b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_wgemm( 'C', 'N', l, n, m-l, cone, v( mp, kp ), ldv,b( mp, 1 ), ldb, & + cone, work( kp, 1 ), ldwork ) + call stdlib_wgemm( 'C', 'N', k-l, n, m, cone, v, ldv,b, ldb, czero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'L', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'N', 'N', m-l, n, k, -cone, v( mp, 1 ), ldv,work, ldwork, cone, & + b( mp, 1 ), ldb ) + call stdlib_wgemm( 'N', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb ) + + call stdlib_wtrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (n-by-k) + ! [ i ] (k-by-k) + ! form c h or c h**h where c = [ b a ] (b is m-by-n, a is m-by-k) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - (a + b v) t or a = a - (a + b v) t**h + ! b = b - (a + b v) t v**h or b = b - (a + b v) t**h v**h + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_wtrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_wgemm( 'N', 'N', m, l, n-l, cone, b( 1, np ), ldb,v( np, kp ), ldv, & + cone, work( 1, kp ), ldwork ) + call stdlib_wgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v, ldv, czero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v( np, 1 ), ldv, cone, & + b( 1, np ), ldb ) + call stdlib_wgemm( 'N', 'C', m, l, k-l, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_wtrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - t (a + v b) or a = a - t**h (a + v b) + ! b = b - v**h t (a + v b) or b = b - v**h t**h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1, mp ), ldv,work, ldb ) + + call stdlib_wgemm( 'N', 'N', l, n, m-l, cone, v, ldv,b, ldb,cone, work, ldwork ) + + call stdlib_wgemm( 'N', 'N', k-l, n, m, cone, v( kp, 1 ), ldv,b, ldb, czero, work( & + kp, 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'C', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb ) + + call stdlib_wgemm( 'C', 'N', l, n, k-l, -cone, v( kp, mp ), ldv,work( kp, 1 ), & + ldwork, cone, b( mp, 1 ), ldb ) + call stdlib_wtrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1, mp ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - (a + b v**h) t or a = a - (a + b v**h) t**h + ! b = b - (a + b v**h) t v or b = b - (a + b v**h) t**h v + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_wtrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1, np ), ldv,work, ldwork ) + + call stdlib_wgemm( 'N', 'C', m, l, n-l, cone, b, ldb, v, ldv,cone, work, ldwork ) + + call stdlib_wgemm( 'N', 'C', m, k-l, n, cone, b, ldb,v( kp, 1 ), ldv, czero, work( & + 1, kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_wgemm( 'N', 'N', m, l, k-l, -cone, work( 1, kp ), ldwork,v( kp, np ), & + ldv, cone, b( 1, np ), ldb ) + call stdlib_wtrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1, np ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - t (a + v b) or a = a - t**h (a + v b) + ! b = b - v**h t (a + v b) or b = b - v**h t**h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'U', 'N', 'N', l, n, cone, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_wgemm( 'N', 'N', l, n, m-l, cone, v( kp, mp ), ldv,b( mp, 1 ), ldb, & + cone, work( kp, 1 ), ldwork ) + call stdlib_wgemm( 'N', 'N', k-l, n, m, cone, v, ldv, b, ldb,czero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'L', 'L ', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'C', 'N', m-l, n, k, -cone, v( 1, mp ), ldv,work, ldwork, cone, & + b( mp, 1 ), ldb ) + call stdlib_wgemm( 'C', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb ) + + call stdlib_wtrmm( 'L', 'U', 'C', 'N', l, n, cone, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**h where c = [ b a ] (a is m-by-k, b is m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - (a + b v**h) t or a = a - (a + b v**h) t**h + ! b = b - (a + b v**h) t v or b = b - (a + b v**h) t**h v + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_wtrmm( 'R', 'U', 'C', 'N', m, l, cone, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_wgemm( 'N', 'C', m, l, n-l, cone, b( 1, np ), ldb,v( kp, np ), ldv, & + cone, work( 1, kp ), ldwork ) + call stdlib_wgemm( 'N', 'C', m, k-l, n, cone, b, ldb, v, ldv,czero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_wtrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_wgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v( 1, np ), ldv, cone, & + b( 1, np ), ldb ) + call stdlib_wgemm( 'N', 'N', m, l, k-l , -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_wtrmm( 'R', 'U', 'N', 'N', m, l, cone, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + end if + return + end subroutine stdlib_wtprfb + + !> ZTPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by ZTPTRS or some other + !> means before entering this routine. ZTPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_wtprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: ap(*), b(ldb,*), x(ldx,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, kc, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_wtpsv( uplo, transt, diag, n, ap, work, 1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wtpsv( uplo, transn, diag, n, ap, work, 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_wtprfs + + !> ZTPTRI: computes the inverse of a complex upper or lower triangular + !> matrix A stored in packed format. + + pure subroutine stdlib_wtptri( uplo, diag, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc, jclast, jj + complex(qp) :: ajj + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTPTRI', -info ) + return + end if + ! check for singularity if non-unit. + if( nounit ) then + if( upper ) then + jj = 0 + do info = 1, n + jj = jj + info + if( ap( jj )==czero )return + end do + else + jj = 1 + do info = 1, n + if( ap( jj )==czero )return + jj = jj + n - info + 1 + end do + end if + info = 0 + end if + if( upper ) then + ! compute inverse of upper triangular matrix. + jc = 1 + do j = 1, n + if( nounit ) then + ap( jc+j-1 ) = cone / ap( jc+j-1 ) + ajj = -ap( jc+j-1 ) + else + ajj = -cone + end if + ! compute elements 1:j-1 of j-th column. + call stdlib_wtpmv( 'UPPER', 'NO TRANSPOSE', diag, j-1, ap,ap( jc ), 1 ) + call stdlib_wscal( j-1, ajj, ap( jc ), 1 ) + jc = jc + j + end do + else + ! compute inverse of lower triangular matrix. + jc = n*( n+1 ) / 2 + do j = n, 1, -1 + if( nounit ) then + ap( jc ) = cone / ap( jc ) + ajj = -ap( jc ) + else + ajj = -cone + end if + if( j ZTPTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + + pure subroutine stdlib_wtptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: ap(*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldb ZTPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + + pure subroutine stdlib_wtpttf( transr, uplo, n, ap, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(qp), intent(in) :: ap(0:*) + complex(qp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: conjg,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTPTTF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + arf( 0 ) = ap( 0 ) + else + arf( 0 ) = conjg( ap( 0 ) ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_wtpttf + + !> ZTPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + + pure subroutine stdlib_wtpttr( uplo, n, ap, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(qp), intent(out) :: a(lda,*) + complex(qp), intent(in) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZTRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_wtrcon( norm, uplo, diag, n, a, lda, rcond, work,rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(qp), intent(out) :: rcond + ! Array Arguments + real(qp), intent(out) :: rwork(*) + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(qp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_wlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_wlatrs( uplo, 'NO TRANSPOSE', diag, normin, n, a,lda, work, scale,& + rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_wlatrs( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, a, lda, work,& + scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_iwamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale ZTREVC: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + + pure subroutine stdlib_wtrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + complex(qp), parameter :: cmzero = (0.0e+0_qp,0.0e+0_qp) + complex(qp), parameter :: cmone = (1.0e+0_qp,0.0e+0_qp) + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, over, rightv, somev + integer(ilp) :: i, ii, is, j, k, ki + real(qp) :: ovfl, remax, scale, smin, smlnum, ulp, unfl + complex(qp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + if( somev ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt1 ) then + call stdlib_wlatrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', 'Y',ki-1, t, ldt, & + work( 1 ), scale, rwork,info ) + work( ki ) = scale + end if + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_wcopy( ki, work( 1 ), 1, vr( 1, is ), 1 ) + ii = stdlib_iwamax( ki, vr( 1, is ), 1 ) + remax = one / cabs1( vr( ii, is ) ) + call stdlib_wdscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = cmzero + end do + else + if( ki>1 )call stdlib_wgemv( 'N', n, ki-1, cmone, vr, ldvr, work( 1 ),1, & + cmplx( scale,KIND=qp), vr( 1, ki ), 1 ) + ii = stdlib_iwamax( n, vr( 1, ki ), 1 ) + remax = one / cabs1( vr( ii, ki ) ) + call stdlib_wdscal( n, remax, vr( 1, ki ), 1 ) + end if + ! set back the original diagonal elements of t. + do k = 1, ki - 1 + t( k, k ) = work( k+n ) + end do + is = is - 1 + end do loop_80 + end if + if( leftv ) then + ! compute left eigenvectors. + is = 1 + loop_130: do ki = 1, n + if( somev ) then + if( .not.select( ki ) )cycle loop_130 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + work( n ) = cmone + ! form right-hand side. + do k = ki + 1, n + work( k ) = -conjg( t( ki, k ) ) + end do + ! solve the triangular system: + ! (t(ki+1:n,ki+1:n) - t(ki,ki))**h * x = scale*work. + do k = ki + 1, n + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) ) ZTREVC3: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + + pure subroutine stdlib_wtrevc3( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, lwork, rwork, lrwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, lwork, lrwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(inout) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmin = 8 + integer(ilp), parameter :: nbmax = 128 + + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, lquery, over, rightv, somev + integer(ilp) :: i, ii, is, j, k, ki, iv, maxwrk, nb + real(qp) :: ovfl, remax, scale, smin, smlnum, ulp, unfl + complex(qp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + if( somev ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + nb = stdlib_ilaenv( 1, 'ZTREVC', side // howmny, n, -1, -1, -1 ) + maxwrk = n + 2*n*nb + work(1) = maxwrk + rwork(1) = n + lquery = ( lwork==-1 .or. lrwork==-1 ) + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt= n + 2*n*nbmin ) then + nb = (lwork - n) / (2*n) + nb = min( nb, nbmax ) + call stdlib_wlaset( 'F', n, 1+2*nb, czero, czero, work, n ) + else + nb = 1 + end if + ! set the constants to control overflow. + unfl = stdlib_qlamch( 'SAFE MINIMUM' ) + ovfl = one / unfl + call stdlib_qlabad( unfl, ovfl ) + ulp = stdlib_qlamch( 'PRECISION' ) + smlnum = unfl*( n / ulp ) + ! store the diagonal elements of t in working array work. + do i = 1, n + work( i ) = t( i, i ) + end do + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + rwork( 1 ) = zero + do j = 2, n + rwork( j ) = stdlib_qzasum( j-1, t( 1, j ), 1 ) + end do + if( rightv ) then + ! ============================================================ + ! compute right eigenvectors. + ! iv is index of column in current block. + ! non-blocked version always uses iv=nb=1; + ! blocked version starts with iv=nb, goes down to 1. + ! (note the "0-th" column is used to store the original diagonal.) + iv = nb + is = m + loop_80: do ki = n, 1, -1 + if( somev ) then + if( .not.select( ki ) )cycle loop_80 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + ! -------------------------------------------------------- + ! complex right eigenvector + work( ki + iv*n ) = cone + ! form right-hand side. + do k = 1, ki - 1 + work( k + iv*n ) = -t( k, ki ) + end do + ! solve upper triangular system: + ! [ t(1:ki-1,1:ki-1) - t(ki,ki) ]*x = scale*work. + do k = 1, ki - 1 + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) )1 ) then + call stdlib_wlatrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', 'Y',ki-1, t, ldt, & + work( 1 + iv*n ), scale,rwork, info ) + work( ki + iv*n ) = scale + end if + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_wcopy( ki, work( 1 + iv*n ), 1, vr( 1, is ), 1 ) + ii = stdlib_iwamax( ki, vr( 1, is ), 1 ) + remax = one / cabs1( vr( ii, is ) ) + call stdlib_wdscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = czero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>1 )call stdlib_wgemv( 'N', n, ki-1, cone, vr, ldvr,work( 1 + iv*n ), 1,& + cmplx( scale,KIND=qp),vr( 1, ki ), 1 ) + ii = stdlib_iwamax( n, vr( 1, ki ), 1 ) + remax = one / cabs1( vr( ii, ki ) ) + call stdlib_wdscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + iv*n ) = czero + end do + ! columns iv:nb of work are valid vectors. + ! when the number of vectors stored reaches nb, + ! or if this was last vector, do the gemm + if( (iv==1) .or. (ki==1) ) then + call stdlib_wgemm( 'N', 'N', n, nb-iv+1, ki+nb-iv, cone,vr, ldvr,work( 1 + & + (iv)*n ), n,czero,work( 1 + (nb+iv)*n ), n ) + ! normalize vectors + do k = iv, nb + ii = stdlib_iwamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / cabs1( work( ii + (nb+k)*n ) ) + call stdlib_wdscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_wlacpy( 'F', n, nb-iv+1,work( 1 + (nb+iv)*n ), n,vr( 1, ki ), & + ldvr ) + iv = nb + else + iv = iv - 1 + end if + end if + ! restore the original diagonal elements of t. + do k = 1, ki - 1 + t( k, k ) = work( k ) + end do + is = is - 1 + end do loop_80 + end if + if( leftv ) then + ! ============================================================ + ! compute left eigenvectors. + ! iv is index of column in current block. + ! non-blocked version always uses iv=1; + ! blocked version starts with iv=1, goes up to nb. + ! (note the "0-th" column is used to store the original diagonal.) + iv = 1 + is = 1 + loop_130: do ki = 1, n + if( somev ) then + if( .not.select( ki ) )cycle loop_130 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + ! -------------------------------------------------------- + ! complex left eigenvector + work( ki + iv*n ) = cone + ! form right-hand side. + do k = ki + 1, n + work( k + iv*n ) = -conjg( t( ki, k ) ) + end do + ! solve conjugate-transposed triangular system: + ! [ t(ki+1:n,ki+1:n) - t(ki,ki) ]**h * x = scale*work. + do k = ki + 1, n + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) ) ZTREXC: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that the diagonal element of T with row index IFST + !> is moved to row ILST. + !> The Schur form T is reordered by a unitary similarity transformation + !> Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by + !> postmultplying it with Z. + + pure subroutine stdlib_wtrexc( compq, n, t, ldt, q, ldq, ifst, ilst, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq + integer(ilp), intent(in) :: ifst, ilst, ldq, ldt, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: q(ldq,*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: wantq + integer(ilp) :: k, m1, m2, m3 + real(qp) :: cs + complex(qp) :: sn, t11, t22, temp + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + wantq = stdlib_lsame( compq, 'V' ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldtn ).and.( n>0 )) then + info = -7 + else if(( ilst<1 .or. ilst>n ).and.( n>0 )) then + info = -8 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTREXC', -info ) + return + end if + ! quick return if possible + if( n<=1 .or. ifst==ilst )return + if( ifst ZTRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by ZTRTRS or some other + !> means before entering this routine. ZTRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_wtrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + real(qp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(qp), intent(in) :: a(lda,*), b(ldb,*), x(ldx,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, nz + real(qp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(qp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=qp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_wlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_wlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_wtrsv( uplo, transt, diag, n, a, lda, work, 1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_wtrsv( uplo, transn, diag, n, a, lda, work, 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_wtrrfs + + !> ZTRSEN: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in + !> the leading positions on the diagonal of the upper triangular matrix + !> T, and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + + subroutine stdlib_wtrsen( job, compq, select, n, t, ldt, q, ldq, w, m, s,sep, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldq, ldt, lwork, n + real(qp), intent(out) :: s, sep + ! Array Arguments + logical(lk), intent(in) :: select(*) + complex(qp), intent(inout) :: q(ldq,*), t(ldt,*) + complex(qp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantbh, wantq, wants, wantsp + integer(ilp) :: ierr, k, kase, ks, lwmin, n1, n2, nn + real(qp) :: est, rnorm, scale + ! Local Arrays + integer(ilp) :: isave(3) + real(qp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode and test the input parameters. + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + wantq = stdlib_lsame( compq, 'V' ) + ! set m to the number of selected eigenvalues. + m = 0 + do k = 1, n + if( select( k ) )m = m + 1 + end do + n1 = m + n2 = n - m + nn = n1*n2 + info = 0 + lquery = ( lwork==-1 ) + if( wantsp ) then + lwmin = max( 1, 2*nn ) + else if( stdlib_lsame( job, 'N' ) ) then + lwmin = 1 + else if( stdlib_lsame( job, 'E' ) ) then + lwmin = max( 1, nn ) + end if + if( .not.stdlib_lsame( job, 'N' ) .and. .not.wants .and. .not.wantsp )then + info = -1 + else if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt ZTRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a complex upper triangular + !> matrix T (or of any matrix Q*T*Q**H with Q unitary). + + pure subroutine stdlib_wtrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm,& + m, work, ldwork, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, ldwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(qp), intent(out) :: rwork(*), s(*), sep(*) + complex(qp), intent(in) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(qp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: somcon, wantbh, wants, wantsp + character :: normin + integer(ilp) :: i, ierr, ix, j, k, kase, ks + real(qp) :: bignum, eps, est, lnrm, rnrm, scale, smlnum, xnorm + complex(qp) :: cdum, prod + ! Local Arrays + integer(ilp) :: isave(3) + complex(qp) :: dummy(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(qp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=qp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + ! set m to the number of eigenpairs for which condition numbers are + ! to be computed. + if( somcon ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + if( .not.wants .and. .not.wantsp ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt ZTRSYL: solves the complex Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**H, and A and B are both upper triangular. A is + !> M-by-M and B is N-by-N; the right hand side C and the solution X are + !> M-by-N; and scale is an output scale factor, set <= 1 to avoid + !> overflow in X. + + subroutine stdlib_wtrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trana, tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, lda, ldb, ldc, m, n + real(qp), intent(out) :: scale + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), b(ldb,*) + complex(qp), intent(inout) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notrna, notrnb + integer(ilp) :: j, k, l + real(qp) :: bignum, da11, db, eps, scaloc, sgn, smin, smlnum + complex(qp) :: a11, suml, sumr, vec, x11 + ! Local Arrays + real(qp) :: dum(1) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Executable Statements + ! decode and test input parameters + notrna = stdlib_lsame( trana, 'N' ) + notrnb = stdlib_lsame( tranb, 'N' ) + info = 0 + if( .not.notrna .and. .not.stdlib_lsame( trana, 'C' ) ) then + info = -1 + else if( .not.notrnb .and. .not.stdlib_lsame( tranb, 'C' ) ) then + info = -2 + else if( isgn/=1 .and. isgn/=-1 ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldaone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_wladiv( vec*cmplx( scaloc,KIND=qp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_wdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_30 + else if( .not.notrna .and. notrnb ) then + ! solve a**h *x + isgn*x*b = scale*c. + ! the (k,l)th block of x is determined starting from + ! upper-left corner column by column by + ! a**h(k,k)*x(k,l) + isgn*x(k,l)*b(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 l-1 + ! r(k,l) = sum [a**h(i,k)*x(i,l)] + isgn*sum [x(k,j)*b(j,l)] + ! i=1 j=1 + loop_60: do l = 1, n + do k = 1, m + suml = stdlib_wdotc( k-1, a( 1, k ), 1, c( 1, l ), 1 ) + sumr = stdlib_wdotu( l-1, c( k, 1 ), ldc, b( 1, l ), 1 ) + vec = c( k, l ) - ( suml+sgn*sumr ) + scaloc = one + a11 = conjg( a( k, k ) ) + sgn*b( l, l ) + da11 = abs( real( a11,KIND=qp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=qp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_wladiv( vec*cmplx( scaloc,KIND=qp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_wdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_60 + else if( .not.notrna .and. .not.notrnb ) then + ! solve a**h*x + isgn*x*b**h = c. + ! the (k,l)th block of x is determined starting from + ! upper-right corner column by column by + ! a**h(k,k)*x(k,l) + isgn*x(k,l)*b**h(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 + ! r(k,l) = sum [a**h(i,k)*x(i,l)] + + ! i=1 + ! n + ! isgn*sum [x(k,j)*b**h(l,j)]. + ! j=l+1 + loop_90: do l = n, 1, -1 + do k = 1, m + suml = stdlib_wdotc( k-1, a( 1, k ), 1, c( 1, l ), 1 ) + sumr = stdlib_wdotc( n-l, c( k, min( l+1, n ) ), ldc,b( l, min( l+1, n ) ), & + ldb ) + vec = c( k, l ) - ( suml+sgn*conjg( sumr ) ) + scaloc = one + a11 = conjg( a( k, k )+sgn*b( l, l ) ) + da11 = abs( real( a11,KIND=qp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=qp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_wladiv( vec*cmplx( scaloc,KIND=qp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_wdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_90 + else if( notrna .and. .not.notrnb ) then + ! solve a*x + isgn*x*b**h = c. + ! the (k,l)th block of x is determined starting from + ! bottom-left corner column by column by + ! a(k,k)*x(k,l) + isgn*x(k,l)*b**h(l,l) = c(k,l) - r(k,l) + ! where + ! m n + ! r(k,l) = sum [a(k,i)*x(i,l)] + isgn*sum [x(k,j)*b**h(l,j)] + ! i=k+1 j=l+1 + loop_120: do l = n, 1, -1 + do k = m, 1, -1 + suml = stdlib_wdotu( m-k, a( k, min( k+1, m ) ), lda,c( min( k+1, m ), l ), 1 & + ) + sumr = stdlib_wdotc( n-l, c( k, min( l+1, n ) ), ldc,b( l, min( l+1, n ) ), & + ldb ) + vec = c( k, l ) - ( suml+sgn*conjg( sumr ) ) + scaloc = one + a11 = a( k, k ) + sgn*conjg( b( l, l ) ) + da11 = abs( real( a11,KIND=qp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=qp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_wladiv( vec*cmplx( scaloc,KIND=qp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_wdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_120 + end if + return + end subroutine stdlib_wtrsyl + + !> ZTRTI2: computes the inverse of a complex upper or lower triangular + !> matrix. + !> This is the Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_wtrti2( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + complex(qp) :: ajj + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda ZTRTRI: computes the inverse of a complex upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_wtrtri( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jb, nb, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_wtrti2( uplo, diag, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute inverse of upper triangular matrix + do j = 1, n, nb + jb = min( nb, n-j+1 ) + ! compute rows 1:j-1 of current block column + call stdlib_wtrmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, cone, a, & + lda, a( 1, j ), lda ) + call stdlib_wtrsm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, -cone, a( & + j, j ), lda, a( 1, j ), lda ) + ! compute inverse of current diagonal block + call stdlib_wtrti2( 'UPPER', diag, jb, a( j, j ), lda, info ) + end do + else + ! compute inverse of lower triangular matrix + nn = ( ( n-1 ) / nb )*nb + 1 + do j = nn, 1, -nb + jb = min( nb, n-j+1 ) + if( j+jb<=n ) then + ! compute rows j+jb:n of current block column + call stdlib_wtrmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, & + cone, a( j+jb, j+jb ), lda,a( j+jb, j ), lda ) + call stdlib_wtrsm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, -& + cone, a( j, j ), lda,a( j+jb, j ), lda ) + end if + ! compute inverse of current diagonal block + call stdlib_wtrti2( 'LOWER', diag, jb, a( j, j ), lda, info ) + end do + end if + end if + return + end subroutine stdlib_wtrtri + + !> ZTRTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_wtrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( lda ZTRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + + pure subroutine stdlib_wtrttf( transr, uplo, n, a, lda, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(qp), intent(in) :: a(0:lda-1,0:*) + complex(qp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2 + ! Intrinsic Functions + intrinsic :: conjg,max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda=n + ij = 0 + do j = 0, n2 + do i = n1, n2 + j + arf( ij ) = conjg( a( n2+j, i ) ) + ij = ij + 1 + end do + do i = j, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0); lda=n + ij = nt - n + do j = n - 1, n1, -1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = j - n1, n1 - 1 + arf( ij ) = conjg( a( j-n1, l ) ) + ij = ij + 1 + end do + ij = ij - nx2 + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ij = 0 + do j = 0, n2 - 1 + do i = 0, j + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + do i = n1 + j, n - 1 + arf( ij ) = a( i, n1+j ) + ij = ij + 1 + end do + end do + do j = n2, n - 1 + do i = 0, n1 - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda=n2 + ij = 0 + do j = 0, n1 + do i = n1, n - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + do j = 0, n1 - 1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = n2 + j, n - 1 + arf( ij ) = conjg( a( n2+j, l ) ) + ij = ij + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1); lda=n+1 + ij = 0 + do j = 0, k - 1 + do i = k, k + j + arf( ij ) = conjg( a( k+j, i ) ) + ij = ij + 1 + end do + do i = j, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0); lda=n+1 + ij = nt - n - 1 + do j = n - 1, k, -1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = j - k, k - 1 + arf( ij ) = conjg( a( j-k, l ) ) + ij = ij + 1 + end do + ij = ij - np1x2 + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper, a=b) + ! t1 -> a(0,1) , t2 -> a(0,0) , s -> a(0,k+1) : + ! t1 -> a(0+k) , t2 -> a(0+0) , s -> a(0+k*(k+1)); lda=k + ij = 0 + j = k + do i = k, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do j = 0, k - 2 + do i = 0, j + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + do i = k + 1 + j, n - 1 + arf( ij ) = a( i, k+1+j ) + ij = ij + 1 + end do + end do + do j = k - 1, n - 1 + do i = 0, k - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is even (see paper, a=b) + ! t1 -> a(0,k+1) , t2 -> a(0,k) , s -> a(0,0) + ! t1 -> a(0+k*(k+1)) , t2 -> a(0+k*k) , s -> a(0+0)); lda=k + ij = 0 + do j = 0, k + do i = k, n - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + do j = 0, k - 2 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = k + 1 + j, n - 1 + arf( ij ) = conjg( a( k+1+j, l ) ) + ij = ij + 1 + end do + end do + ! note that here j = k-1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end if + end if + end if + return + end subroutine stdlib_wtrttf + + !> ZTRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + + pure subroutine stdlib_wtrttp( uplo, n, a, lda, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(qp), intent(in) :: a(lda,*) + complex(qp), intent(out) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZTZRZF: reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A + !> to upper triangular form by means of unitary transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N unitary matrix and R is an M-by-M upper + !> triangular matrix. + + pure subroutine stdlib_wtzrzf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iws, ki, kk, ldwork, lwkmin, lwkopt, m1, mu, nb, nbmin, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarzt( 'BACKWARD', 'ROWWISE', n-m, ib, a( i, m1 ),lda, tau( i ), & + work, ldwork ) + ! apply h to a(1:i-1,i:n) from the right + call stdlib_wlarzb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', i-1, n-i+1,& + ib, n-m, a( i, m1 ),lda, work, ldwork, a( 1, i ), lda,work( ib+1 ), ldwork ) + + end if + end do + mu = i + nb - 1 + else + mu = m + end if + ! use unblocked code to factor the last or only block + if( mu>0 )call stdlib_wlatrz( mu, n, n-m, a, lda, tau, work ) + work( 1 ) = lwkopt + return + end subroutine stdlib_wtzrzf + + !> ZUNBDB: simultaneously bidiagonalizes the blocks of an M-by-M + !> partitioned unitary matrix X: + !> [ B11 | B12 0 0 ] + !> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H + !> X = [-----------] = [---------] [----------------] [---------] . + !> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] + !> [ 0 | 0 0 I ] + !> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is + !> not the case, then X must be transposed and/or permuted. This can be + !> done in constant time using the TRANS and SIGNS options. See ZUNCSD + !> for details.) + !> The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- + !> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are + !> represented implicitly by Householder vectors. + !> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_wunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: signs, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11, ldx12, ldx21, ldx22, lwork, m, p, q + ! Array Arguments + real(qp), intent(out) :: phi(*), theta(*) + complex(qp), intent(out) :: taup1(*), taup2(*), tauq1(*), tauq2(*), work(*) + complex(qp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) + + ! ==================================================================== + ! Parameters + real(qp), parameter :: realone = 1.0_qp + + + ! Local Scalars + logical(lk) :: colmajor, lquery + integer(ilp) :: i, lworkmin, lworkopt + real(qp) :: z1, z2, z3, z4 + ! Intrinsic Functions + intrinsic :: atan2,cos,max,min,sin + intrinsic :: cmplx,conjg + ! Executable Statements + ! test input arguments + info = 0 + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( .not. stdlib_lsame( signs, 'O' ) ) then + z1 = realone + z2 = realone + z3 = realone + z4 = realone + else + z1 = realone + z2 = -realone + z3 = realone + z4 = -realone + end if + lquery = lwork == -1 + if( m < 0 ) then + info = -3 + else if( p < 0 .or. p > m ) then + info = -4 + else if( q < 0 .or. q > p .or. q > m-p .or.q > m-q ) then + info = -5 + else if( colmajor .and. ldx11 < max( 1, p ) ) then + info = -7 + else if( .not.colmajor .and. ldx11 < max( 1, q ) ) then + info = -7 + else if( colmajor .and. ldx12 < max( 1, p ) ) then + info = -9 + else if( .not.colmajor .and. ldx12 < max( 1, m-q ) ) then + info = -9 + else if( colmajor .and. ldx21 < max( 1, m-p ) ) then + info = -11 + else if( .not.colmajor .and. ldx21 < max( 1, q ) ) then + info = -11 + else if( colmajor .and. ldx22 < max( 1, m-p ) ) then + info = -13 + else if( .not.colmajor .and. ldx22 < max( 1, m-q ) ) then + info = -13 + end if + ! compute workspace + if( info == 0 ) then + lworkopt = m - q + lworkmin = m - q + work(1) = lworkopt + if( lwork < lworkmin .and. .not. lquery ) then + info = -21 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'XORBDB', -info ) + return + else if( lquery ) then + return + end if + ! handle column-major and row-major separately + if( colmajor ) then + ! reduce columns 1, ..., q of x11, x12, x21, and x22 + do i = 1, q + if( i == 1 ) then + call stdlib_wscal( p-i+1, cmplx( z1, 0.0_qp,KIND=qp), x11(i,i), 1 ) + else + call stdlib_wscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_qp,KIND=qp),x11(i,i), & + 1 ) + call stdlib_waxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_qp,KIND=qp), x12(& + i,i-1), 1, x11(i,i), 1 ) + end if + if( i == 1 ) then + call stdlib_wscal( m-p-i+1, cmplx( z2, 0.0_qp,KIND=qp), x21(i,i), 1 ) + else + call stdlib_wscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_qp,KIND=qp),x21(i,i),& + 1 ) + call stdlib_waxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_qp,KIND=qp), & + x22(i,i-1), 1, x21(i,i), 1 ) + end if + theta(i) = atan2( stdlib_qznrm2( m-p-i+1, x21(i,i), 1 ),stdlib_qznrm2( p-i+1, & + x11(i,i), 1 ) ) + if( p > i ) then + call stdlib_wlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + else if ( p == i ) then + call stdlib_wlarfgp( p-i+1, x11(i,i), x11(i,i), 1, taup1(i) ) + end if + x11(i,i) = cone + if ( m-p > i ) then + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1,taup2(i) ) + else if ( m-p == i ) then + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i,i), 1,taup2(i) ) + end if + x21(i,i) = cone + if ( q > i ) then + call stdlib_wlarf( 'L', p-i+1, q-i, x11(i,i), 1,conjg(taup1(i)), x11(i,i+1), & + ldx11, work ) + call stdlib_wlarf( 'L', m-p-i+1, q-i, x21(i,i), 1,conjg(taup2(i)), x21(i,i+1),& + ldx21, work ) + end if + if ( m-q+1 > i ) then + call stdlib_wlarf( 'L', p-i+1, m-q-i+1, x11(i,i), 1,conjg(taup1(i)), x12(i,i),& + ldx12, work ) + call stdlib_wlarf( 'L', m-p-i+1, m-q-i+1, x21(i,i), 1,conjg(taup2(i)), x22(i,& + i), ldx22, work ) + end if + if( i < q ) then + call stdlib_wscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_qp,KIND=qp),x11(i,i+& + 1), ldx11 ) + call stdlib_waxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_qp,KIND=qp),x21(i,i+1)& + , ldx21, x11(i,i+1), ldx11 ) + end if + call stdlib_wscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_qp,KIND=qp),x12(i,i)& + , ldx12 ) + call stdlib_waxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_qp,KIND=qp),x22(i,i),& + ldx22, x12(i,i), ldx12 ) + if( i < q )phi(i) = atan2( stdlib_qznrm2( q-i, x11(i,i+1), ldx11 ),stdlib_qznrm2(& + m-q-i+1, x12(i,i), ldx12 ) ) + if( i < q ) then + call stdlib_wlacgv( q-i, x11(i,i+1), ldx11 ) + if ( i == q-1 ) then + call stdlib_wlarfgp( q-i, x11(i,i+1), x11(i,i+1), ldx11,tauq1(i) ) + else + call stdlib_wlarfgp( q-i, x11(i,i+1), x11(i,i+2), ldx11,tauq1(i) ) + end if + x11(i,i+1) = cone + end if + if ( m-q+1 > i ) then + call stdlib_wlacgv( m-q-i+1, x12(i,i), ldx12 ) + if ( m-q == i ) then + call stdlib_wlarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) + else + call stdlib_wlarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) + + end if + end if + x12(i,i) = cone + if( i < q ) then + call stdlib_wlarf( 'R', p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x11(i+1,i+1), & + ldx11, work ) + call stdlib_wlarf( 'R', m-p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x21(i+1,i+1), & + ldx21, work ) + end if + if ( p > i ) then + call stdlib_wlarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & + ldx12, work ) + end if + if ( m-p > i ) then + call stdlib_wlarf( 'R', m-p-i, m-q-i+1, x12(i,i), ldx12,tauq2(i), x22(i+1,i), & + ldx22, work ) + end if + if( i < q )call stdlib_wlacgv( q-i, x11(i,i+1), ldx11 ) + call stdlib_wlacgv( m-q-i+1, x12(i,i), ldx12 ) + end do + ! reduce columns q + 1, ..., p of x12, x22 + do i = q + 1, p + call stdlib_wscal( m-q-i+1, cmplx( -z1*z4, 0.0_qp,KIND=qp), x12(i,i),ldx12 ) + + call stdlib_wlacgv( m-q-i+1, x12(i,i), ldx12 ) + if ( i >= m-q ) then + call stdlib_wlarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) + else + call stdlib_wlarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) + end if + x12(i,i) = cone + if ( p > i ) then + call stdlib_wlarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & + ldx12, work ) + end if + if( m-p-q >= 1 )call stdlib_wlarf( 'R', m-p-q, m-q-i+1, x12(i,i), ldx12,tauq2(i),& + x22(q+1,i), ldx22, work ) + call stdlib_wlacgv( m-q-i+1, x12(i,i), ldx12 ) + end do + ! reduce columns p + 1, ..., m - q of x12, x22 + do i = 1, m - p - q + call stdlib_wscal( m-p-q-i+1, cmplx( z2*z4, 0.0_qp,KIND=qp),x22(q+i,p+i), ldx22 ) + + call stdlib_wlacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) + call stdlib_wlarfgp( m-p-q-i+1, x22(q+i,p+i), x22(q+i,p+i+1),ldx22, tauq2(p+i) ) + + x22(q+i,p+i) = cone + call stdlib_wlarf( 'R', m-p-q-i, m-p-q-i+1, x22(q+i,p+i), ldx22,tauq2(p+i), x22(& + q+i+1,p+i), ldx22, work ) + call stdlib_wlacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) + end do + else + ! reduce columns 1, ..., q of x11, x12, x21, x22 + do i = 1, q + if( i == 1 ) then + call stdlib_wscal( p-i+1, cmplx( z1, 0.0_qp,KIND=qp), x11(i,i),ldx11 ) + else + call stdlib_wscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_qp,KIND=qp),x11(i,i), & + ldx11 ) + call stdlib_waxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_qp,KIND=qp), x12(& + i-1,i), ldx12, x11(i,i), ldx11 ) + end if + if( i == 1 ) then + call stdlib_wscal( m-p-i+1, cmplx( z2, 0.0_qp,KIND=qp), x21(i,i),ldx21 ) + + else + call stdlib_wscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_qp,KIND=qp),x21(i,i),& + ldx21 ) + call stdlib_waxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_qp,KIND=qp), & + x22(i-1,i), ldx22, x21(i,i), ldx21 ) + end if + theta(i) = atan2( stdlib_qznrm2( m-p-i+1, x21(i,i), ldx21 ),stdlib_qznrm2( p-i+1,& + x11(i,i), ldx11 ) ) + call stdlib_wlacgv( p-i+1, x11(i,i), ldx11 ) + call stdlib_wlacgv( m-p-i+1, x21(i,i), ldx21 ) + call stdlib_wlarfgp( p-i+1, x11(i,i), x11(i,i+1), ldx11, taup1(i) ) + x11(i,i) = cone + if ( i == m-p ) then + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i,i), ldx21,taup2(i) ) + else + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i,i+1), ldx21,taup2(i) ) + end if + x21(i,i) = cone + call stdlib_wlarf( 'R', q-i, p-i+1, x11(i,i), ldx11, taup1(i),x11(i+1,i), ldx11, & + work ) + call stdlib_wlarf( 'R', m-q-i+1, p-i+1, x11(i,i), ldx11, taup1(i),x12(i,i), & + ldx12, work ) + call stdlib_wlarf( 'R', q-i, m-p-i+1, x21(i,i), ldx21, taup2(i),x21(i+1,i), & + ldx21, work ) + call stdlib_wlarf( 'R', m-q-i+1, m-p-i+1, x21(i,i), ldx21,taup2(i), x22(i,i), & + ldx22, work ) + call stdlib_wlacgv( p-i+1, x11(i,i), ldx11 ) + call stdlib_wlacgv( m-p-i+1, x21(i,i), ldx21 ) + if( i < q ) then + call stdlib_wscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_qp,KIND=qp),x11(i+1,& + i), 1 ) + call stdlib_waxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_qp,KIND=qp),x21(i+1,i)& + , 1, x11(i+1,i), 1 ) + end if + call stdlib_wscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_qp,KIND=qp),x12(i,i)& + , 1 ) + call stdlib_waxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_qp,KIND=qp),x22(i,i),& + 1, x12(i,i), 1 ) + if( i < q )phi(i) = atan2( stdlib_qznrm2( q-i, x11(i+1,i), 1 ),stdlib_qznrm2( m-& + q-i+1, x12(i,i), 1 ) ) + if( i < q ) then + call stdlib_wlarfgp( q-i, x11(i+1,i), x11(i+2,i), 1, tauq1(i) ) + x11(i+1,i) = cone + end if + call stdlib_wlarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1, tauq2(i) ) + x12(i,i) = cone + if( i < q ) then + call stdlib_wlarf( 'L', q-i, p-i, x11(i+1,i), 1,conjg(tauq1(i)), x11(i+1,i+1),& + ldx11, work ) + call stdlib_wlarf( 'L', q-i, m-p-i, x11(i+1,i), 1,conjg(tauq1(i)), x21(i+1,i+& + 1), ldx21, work ) + end if + call stdlib_wlarf( 'L', m-q-i+1, p-i, x12(i,i), 1,conjg(tauq2(i)), x12(i,i+1), & + ldx12, work ) + if ( m-p > i ) then + call stdlib_wlarf( 'L', m-q-i+1, m-p-i, x12(i,i), 1,conjg(tauq2(i)), x22(i,i+& + 1), ldx22, work ) + end if + end do + ! reduce columns q + 1, ..., p of x12, x22 + do i = q + 1, p + call stdlib_wscal( m-q-i+1, cmplx( -z1*z4, 0.0_qp,KIND=qp), x12(i,i), 1 ) + call stdlib_wlarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1, tauq2(i) ) + x12(i,i) = cone + if ( p > i ) then + call stdlib_wlarf( 'L', m-q-i+1, p-i, x12(i,i), 1,conjg(tauq2(i)), x12(i,i+1),& + ldx12, work ) + end if + if( m-p-q >= 1 )call stdlib_wlarf( 'L', m-q-i+1, m-p-q, x12(i,i), 1,conjg(tauq2(& + i)), x22(i,q+1), ldx22, work ) + end do + ! reduce columns p + 1, ..., m - q of x12, x22 + do i = 1, m - p - q + call stdlib_wscal( m-p-q-i+1, cmplx( z2*z4, 0.0_qp,KIND=qp),x22(p+i,q+i), 1 ) + + call stdlib_wlarfgp( m-p-q-i+1, x22(p+i,q+i), x22(p+i+1,q+i), 1,tauq2(p+i) ) + + x22(p+i,q+i) = cone + if ( m-p-q /= i ) then + call stdlib_wlarf( 'L', m-p-q-i+1, m-p-q-i, x22(p+i,q+i), 1,conjg(tauq2(p+i)),& + x22(p+i,q+i+1), ldx22,work ) + end if + end do + end if + return + end subroutine stdlib_wunbdb + + !> ZUNBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines ZUNBDB2, ZUNBDB3, and ZUNBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_wunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(qp), intent(out) :: phi(*), theta(*) + complex(qp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(qp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(qp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < q .or. m-p < q ) then + info = -2 + else if( q < 0 .or. m-q < q ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-2 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB1', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., q of x11 and x21 + do i = 1, q + call stdlib_wlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + theta(i) = atan2( real( x21(i,i),KIND=qp), real( x11(i,i),KIND=qp) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i) = cone + x21(i,i) = cone + call stdlib_wlarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + call stdlib_wlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + if( i < q ) then + call stdlib_wdrot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c,s ) + call stdlib_wlacgv( q-i, x21(i,i+1), ldx21 ) + call stdlib_wlarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) + s = real( x21(i,i+1),KIND=qp) + x21(i,i+1) = cone + call stdlib_wlarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & + ldx11, work(ilarf) ) + call stdlib_wlarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & + ldx21, work(ilarf) ) + call stdlib_wlacgv( q-i, x21(i,i+1), ldx21 ) + c = sqrt( stdlib_qznrm2( p-i, x11(i+1,i+1), 1 )**2+ stdlib_qznrm2( m-p-i, x21(i+& + 1,i+1), 1 )**2 ) + phi(i) = atan2( s, c ) + call stdlib_wunbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1,x21(i+1,i+1), 1, x11(i+1,& + i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) + end if + end do + return + end subroutine stdlib_wunbdb1 + + !> ZUNBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines ZUNBDB1, ZUNBDB3, and ZUNBDB4 handle cases in + !> which P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_wunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(qp), intent(out) :: phi(*), theta(*) + complex(qp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(qp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(qp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < 0 .or. p > m-p ) then + info = -2 + else if( q < 0 .or. q < p .or. m-q < p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB2', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., p of x11 and x21 + do i = 1, p + if( i > 1 ) then + call stdlib_wdrot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c,s ) + end if + call stdlib_wlacgv( q-i+1, x11(i,i), ldx11 ) + call stdlib_wlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + c = real( x11(i,i),KIND=qp) + x11(i,i) = cone + call stdlib_wlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_wlarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & + work(ilarf) ) + call stdlib_wlacgv( q-i+1, x11(i,i), ldx11 ) + s = sqrt( stdlib_qznrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_qznrm2( m-p-i+1, x21(i,i), & + 1 )**2 ) + theta(i) = atan2( s, c ) + call stdlib_wunbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1, x21(i,i), 1,x11(i+1,i+1), & + ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_wscal( p-i, cnegone, x11(i+1,i), 1 ) + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + if( i < p ) then + call stdlib_wlarfgp( p-i, x11(i+1,i), x11(i+2,i), 1, taup1(i) ) + phi(i) = atan2( real( x11(i+1,i),KIND=qp), real( x21(i,i),KIND=qp) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x11(i+1,i) = cone + call stdlib_wlarf( 'L', p-i, q-i, x11(i+1,i), 1, conjg(taup1(i)),x11(i+1,i+1), & + ldx11, work(ilarf) ) + end if + x21(i,i) = cone + call stdlib_wlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + end do + ! reduce the bottom-right portion of x21 to the identity matrix + do i = p + 1, q + call stdlib_wlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + x21(i,i) = cone + call stdlib_wlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + end do + return + end subroutine stdlib_wunbdb2 + + !> ZUNBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_wunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(qp), intent(out) :: phi(*), theta(*) + complex(qp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(qp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(qp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( 2*p < m .or. p > m ) then + info = -2 + else if( q < m-p .or. m-q < m-p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB3', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., m-p of x11 and x21 + do i = 1, m-p + if( i > 1 ) then + call stdlib_wdrot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c,s ) + end if + call stdlib_wlacgv( q-i+1, x21(i,i), ldx21 ) + call stdlib_wlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + s = real( x21(i,i),KIND=qp) + x21(i,i) = cone + call stdlib_wlarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_wlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + call stdlib_wlacgv( q-i+1, x21(i,i), ldx21 ) + c = sqrt( stdlib_qznrm2( p-i+1, x11(i,i), 1 )**2+ stdlib_qznrm2( m-p-i, x21(i+1,i), & + 1 )**2 ) + theta(i) = atan2( s, c ) + call stdlib_wunbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1, x21(i+1,i), 1,x11(i,i+1), & + ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_wlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + if( i < m-p ) then + call stdlib_wlarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1, taup2(i) ) + phi(i) = atan2( real( x21(i+1,i),KIND=qp), real( x11(i,i),KIND=qp) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x21(i+1,i) = cone + call stdlib_wlarf( 'L', m-p-i, q-i, x21(i+1,i), 1,conjg(taup2(i)), x21(i+1,i+1), & + ldx21,work(ilarf) ) + end if + x11(i,i) = cone + call stdlib_wlarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + end do + ! reduce the bottom-right portion of x11 to the identity matrix + do i = m-p + 1, q + call stdlib_wlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + x11(i,i) = cone + call stdlib_wlarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + end do + return + end subroutine stdlib_wunbdb3 + + !> ZUNBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_wunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + phantom, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(qp), intent(out) :: phi(*), theta(*) + complex(qp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) + complex(qp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(qp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < m-q .or. m-p < m-q ) then + info = -2 + else if( q < m-q .or. q > m ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( q-1, p-1, m-p-1 ) + iorbdb5 = 2 + lorbdb5 = q + lworkopt = ilarf + llarf - 1 + lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB4', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., m-q of x11 and x21 + do i = 1, m-q + if( i == 1 ) then + do j = 1, m + phantom(j) = czero + end do + call stdlib_wunbdb5( p, m-p, q, phantom(1), 1, phantom(p+1), 1,x11, ldx11, x21, & + ldx21, work(iorbdb5),lorbdb5, childinfo ) + call stdlib_wscal( p, cnegone, phantom(1), 1 ) + call stdlib_wlarfgp( p, phantom(1), phantom(2), 1, taup1(1) ) + call stdlib_wlarfgp( m-p, phantom(p+1), phantom(p+2), 1, taup2(1) ) + theta(i) = atan2( real( phantom(1),KIND=qp), real( phantom(p+1),KIND=qp) ) + + c = cos( theta(i) ) + s = sin( theta(i) ) + phantom(1) = cone + phantom(p+1) = cone + call stdlib_wlarf( 'L', p, q, phantom(1), 1, conjg(taup1(1)), x11,ldx11, work(& + ilarf) ) + call stdlib_wlarf( 'L', m-p, q, phantom(p+1), 1, conjg(taup2(1)),x21, ldx21, & + work(ilarf) ) + else + call stdlib_wunbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1,x21(i,i-1), 1, x11(i,i)& + , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) + call stdlib_wscal( p-i+1, cnegone, x11(i,i-1), 1 ) + call stdlib_wlarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1, taup1(i) ) + call stdlib_wlarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1,taup2(i) ) + theta(i) = atan2( real( x11(i,i-1),KIND=qp), real( x21(i,i-1),KIND=qp) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i-1) = cone + x21(i,i-1) = cone + call stdlib_wlarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1,conjg(taup1(i)), x11(i,i), & + ldx11, work(ilarf) ) + call stdlib_wlarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1,conjg(taup2(i)), x21(i,i), & + ldx21, work(ilarf) ) + end if + call stdlib_wdrot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) + call stdlib_wlacgv( q-i+1, x21(i,i), ldx21 ) + call stdlib_wlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + c = real( x21(i,i),KIND=qp) + x21(i,i) = cone + call stdlib_wlarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_wlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + call stdlib_wlacgv( q-i+1, x21(i,i), ldx21 ) + if( i < m-q ) then + s = sqrt( stdlib_qznrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_qznrm2( m-p-i, x21(i+1,& + i), 1 )**2 ) + phi(i) = atan2( s, c ) + end if + end do + ! reduce the bottom-right portion of x11 to [ i 0 ] + do i = m - q + 1, p + call stdlib_wlacgv( q-i+1, x11(i,i), ldx11 ) + call stdlib_wlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + x11(i,i) = cone + call stdlib_wlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_wlarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & + work(ilarf) ) + call stdlib_wlacgv( q-i+1, x11(i,i), ldx11 ) + end do + ! reduce the bottom-right portion of x21 to [ 0 i ] + do i = p + 1, q + call stdlib_wlacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) + call stdlib_wlarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) + + x21(m-q+i-p,i) = cone + call stdlib_wlarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& + , ldx21, work(ilarf) ) + call stdlib_wlacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) + end do + return + end subroutine stdlib_wunbdb4 + + !> ZUNBDB5: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then some other vector from the orthogonal complement + !> is returned. This vector is chosen in an arbitrary but deterministic + !> way. + + pure subroutine stdlib_wunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, j + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB5', -info ) + return + end if + ! project x onto the orthogonal complement of q + call stdlib_wunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2,work, lwork, & + childinfo ) + ! if the projection is nonzero, then return + if( stdlib_qznrm2(m1,x1,incx1) /= czero.or. stdlib_qznrm2(m2,x2,incx2) /= czero ) & + then + return + end if + ! project each standard basis vector e_1,...,e_m1 in turn, stopping + ! when a nonzero projection is found + do i = 1, m1 + do j = 1, m1 + x1(j) = czero + end do + x1(i) = cone + do j = 1, m2 + x2(j) = czero + end do + call stdlib_wunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, childinfo ) + if( stdlib_qznrm2(m1,x1,incx1) /= czero.or. stdlib_qznrm2(m2,x2,incx2) /= czero ) & + then + return + end if + end do + ! project each standard basis vector e_(m1+1),...,e_(m1+m2) in turn, + ! stopping when a nonzero projection is found + do i = 1, m2 + do j = 1, m1 + x1(j) = czero + end do + do j = 1, m2 + x2(j) = czero + end do + x2(i) = cone + call stdlib_wunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, childinfo ) + if( stdlib_qznrm2(m1,x1,incx1) /= czero.or. stdlib_qznrm2(m2,x2,incx2) /= czero ) & + then + return + end if + end do + return + end subroutine stdlib_wunbdb5 + + !> ZUNBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + + pure subroutine stdlib_wunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + complex(qp), intent(out) :: work(*) + complex(qp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + ! Parameters + real(qp), parameter :: alphasq = 0.01_qp + real(qp), parameter :: realone = 1.0_qp + real(qp), parameter :: realzero = 0.0_qp + + + ! Local Scalars + integer(ilp) :: i + real(qp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB6', -info ) + return + end if + ! first, project x onto the orthogonal complement of q's column + ! space + scl1 = realzero + ssq1 = realone + call stdlib_wlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_wlassq( m2, x2, incx2, scl2, ssq2 ) + normsq1 = scl1**2*ssq1 + scl2**2*ssq2 + if( m1 == 0 ) then + do i = 1, n + work(i) = czero + end do + else + call stdlib_wgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1 ) + end if + call stdlib_wgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1 ) + call stdlib_wgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1, cone, x1,incx1 ) + call stdlib_wgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1, cone, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_wlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_wlassq( m2, x2, incx2, scl2, ssq2 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if projection is sufficiently large in norm, then stop. + ! if projection is czero, then stop. + ! otherwise, project again. + if( normsq2 >= alphasq*normsq1 ) then + return + end if + if( normsq2 == czero ) then + return + end if + normsq1 = normsq2 + do i = 1, n + work(i) = czero + end do + if( m1 == 0 ) then + do i = 1, n + work(i) = czero + end do + else + call stdlib_wgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1 ) + end if + call stdlib_wgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1 ) + call stdlib_wgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1, cone, x1,incx1 ) + call stdlib_wgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1, cone, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_wlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_wlassq( m1, x1, incx1, scl1, ssq1 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if second projection is sufficiently large in norm, then do + ! nothing more. alternatively, if it shrunk significantly, then + ! truncate it to czero. + if( normsq2 < alphasq*normsq1 ) then + do i = 1, m1 + x1(i) = czero + end do + do i = 1, m2 + x2(i) = czero + end do + end if + return + end subroutine stdlib_wunbdb6 + + !> ZUNCSD: computes the CS decomposition of an M-by-M partitioned + !> unitary matrix X: + !> [ I 0 0 | 0 0 0 ] + !> [ 0 C 0 | 0 -S 0 ] + !> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H + !> X = [-----------] = [---------] [---------------------] [---------] . + !> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] + !> [ 0 S 0 | 0 C 0 ] + !> [ 0 0 I | 0 0 0 ] + !> X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, + !> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are + !> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in + !> which R = MIN(P,M-P,Q,M-Q). + + recursive subroutine stdlib_wuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, x11, & + ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, & + work, lwork, rwork, lrwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, signs, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, ldx11, ldx12, ldx21, ldx22, & + lrwork, lwork, m, p, q + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(qp), intent(out) :: theta(*) + real(qp), intent(out) :: rwork(*) + complex(qp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*), work(*) + + complex(qp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) + + ! =================================================================== + + ! Local Scalars + character :: transt, signst + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, itauq2, j, lbbcsdwork, & + lbbcsdworkmin, lbbcsdworkopt, lorbdbwork, lorbdbworkmin, lorbdbworkopt, lorglqwork, & + lorglqworkmin, lorglqworkopt, lorgqrwork, lorgqrworkmin, lorgqrworkopt, lworkmin, & + lworkopt, p1, q1 + logical(lk) :: colmajor, defaultsigns, lquery, wantu1, wantu2, wantv1t, wantv2t + integer(ilp) :: lrworkmin, lrworkopt + logical(lk) :: lrquery + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + defaultsigns = .not. stdlib_lsame( signs, 'O' ) + lquery = lwork == -1 + lrquery = lrwork == -1 + if( m < 0 ) then + info = -7 + else if( p < 0 .or. p > m ) then + info = -8 + else if( q < 0 .or. q > m ) then + info = -9 + else if ( colmajor .and. ldx11 < max( 1, p ) ) then + info = -11 + else if (.not. colmajor .and. ldx11 < max( 1, q ) ) then + info = -11 + else if (colmajor .and. ldx12 < max( 1, p ) ) then + info = -13 + else if (.not. colmajor .and. ldx12 < max( 1, m-q ) ) then + info = -13 + else if (colmajor .and. ldx21 < max( 1, m-p ) ) then + info = -15 + else if (.not. colmajor .and. ldx21 < max( 1, q ) ) then + info = -15 + else if (colmajor .and. ldx22 < max( 1, m-p ) ) then + info = -17 + else if (.not. colmajor .and. ldx22 < max( 1, m-q ) ) then + info = -17 + else if( wantu1 .and. ldu1 < p ) then + info = -20 + else if( wantu2 .and. ldu2 < m-p ) then + info = -22 + else if( wantv1t .and. ldv1t < q ) then + info = -24 + else if( wantv2t .and. ldv2t < m-q ) then + info = -26 + end if + ! work with transpose if convenient + if( info == 0 .and. min( p, m-p ) < min( q, m-q ) ) then + if( colmajor ) then + transt = 'T' + else + transt = 'N' + end if + if( defaultsigns ) then + signst = 'O' + else + signst = 'D' + end if + call stdlib_wuncsd( jobv1t, jobv2t, jobu1, jobu2, transt, signst, m,q, p, x11, & + ldx11, x21, ldx21, x12, ldx12, x22,ldx22, theta, v1t, ldv1t, v2t, ldv2t, u1, ldu1,& + u2, ldu2, work, lwork, rwork, lrwork, iwork,info ) + return + end if + ! work with permutation [ 0 i; i 0 ] * x * [ 0 i; i 0 ] if + ! convenient + if( info == 0 .and. m-q < q ) then + if( defaultsigns ) then + signst = 'O' + else + signst = 'D' + end if + call stdlib_wuncsd( jobu2, jobu1, jobv2t, jobv1t, trans, signst, m,m-p, m-q, x22, & + ldx22, x21, ldx21, x12, ldx12, x11,ldx11, theta, u2, ldu2, u1, ldu1, v2t, ldv2t, & + v1t,ldv1t, work, lwork, rwork, lrwork, iwork, info ) + return + end if + ! compute workspace + if( info == 0 ) then + ! real workspace + iphi = 2 + ib11d = iphi + max( 1, q - 1 ) + ib11e = ib11d + max( 1, q ) + ib12d = ib11e + max( 1, q - 1 ) + ib12e = ib12d + max( 1, q ) + ib21d = ib12e + max( 1, q - 1 ) + ib21e = ib21d + max( 1, q ) + ib22d = ib21e + max( 1, q - 1 ) + ib22e = ib22d + max( 1, q ) + ibbcsd = ib22e + max( 1, q - 1 ) + call stdlib_wbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, theta, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, theta, theta, theta, theta, theta,theta, & + theta, theta, rwork, -1, childinfo ) + lbbcsdworkopt = int( rwork(1),KIND=ilp) + lbbcsdworkmin = lbbcsdworkopt + lrworkopt = ibbcsd + lbbcsdworkopt - 1 + lrworkmin = ibbcsd + lbbcsdworkmin - 1 + rwork(1) = lrworkopt + ! complex workspace + itaup1 = 2 + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m - p ) + itauq2 = itauq1 + max( 1, q ) + iorgqr = itauq2 + max( 1, m - q ) + call stdlib_wungqr( m-q, m-q, m-q, u1, max(1,m-q), u1, work, -1,childinfo ) + lorgqrworkopt = int( work(1),KIND=ilp) + lorgqrworkmin = max( 1, m - q ) + iorglq = itauq2 + max( 1, m - q ) + call stdlib_wunglq( m-q, m-q, m-q, u1, max(1,m-q), u1, work, -1,childinfo ) + lorglqworkopt = int( work(1),KIND=ilp) + lorglqworkmin = max( 1, m - q ) + iorbdb = itauq2 + max( 1, m - q ) + call stdlib_wunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, theta, u1, u2,v1t, v2t, work, -1, childinfo ) + lorbdbworkopt = int( work(1),KIND=ilp) + lorbdbworkmin = lorbdbworkopt + lworkopt = max( iorgqr + lorgqrworkopt, iorglq + lorglqworkopt,iorbdb + & + lorbdbworkopt ) - 1 + lworkmin = max( iorgqr + lorgqrworkmin, iorglq + lorglqworkmin,iorbdb + & + lorbdbworkmin ) - 1 + work(1) = max(lworkopt,lworkmin) + if( lwork < lworkmin.and. .not. ( lquery .or. lrquery ) ) then + info = -22 + else if( lrwork < lrworkmin.and. .not. ( lquery .or. lrquery ) ) then + info = -24 + else + lorgqrwork = lwork - iorgqr + 1 + lorglqwork = lwork - iorglq + 1 + lorbdbwork = lwork - iorbdb + 1 + lbbcsdwork = lrwork - ibbcsd + 1 + end if + end if + ! abort if any illegal arguments + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNCSD', -info ) + return + else if( lquery .or. lrquery ) then + return + end if + ! transform to bidiagonal block form + call stdlib_wunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12, x21,ldx21, x22, & + ldx22, theta, rwork(iphi), work(itaup1),work(itaup2), work(itauq1), work(itauq2),work(& + iorbdb), lorbdbwork, childinfo ) + ! accumulate householder reflectors + if( colmajor ) then + if( wantu1 .and. p > 0 ) then + call stdlib_wlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_wungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqrwork, & + info) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_wungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqrwork,& + info ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wlacpy( 'U', q-1, q-1, x11(1,2), ldx11, v1t(2,2),ldv1t ) + v1t(1, 1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_wunglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglqwork, info ) + end if + if( wantv2t .and. m-q > 0 ) then + call stdlib_wlacpy( 'U', p, m-q, x12, ldx12, v2t, ldv2t ) + if( m-p > q) then + call stdlib_wlacpy( 'U', m-p-q, m-p-q, x22(q+1,p+1), ldx22,v2t(p+1,p+1), & + ldv2t ) + end if + if( m > q ) then + call stdlib_wunglq( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorglq), & + lorglqwork, info ) + end if + end if + else + if( wantu1 .and. p > 0 ) then + call stdlib_wlacpy( 'U', q, p, x11, ldx11, u1, ldu1 ) + call stdlib_wunglq( p, p, q, u1, ldu1, work(itaup1), work(iorglq),lorglqwork, & + info) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wlacpy( 'U', q, m-p, x21, ldx21, u2, ldu2 ) + call stdlib_wunglq( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorglq), lorglqwork,& + info ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wlacpy( 'L', q-1, q-1, x11(2,1), ldx11, v1t(2,2),ldv1t ) + v1t(1, 1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_wungqr( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorgqr), & + lorgqrwork, info ) + end if + if( wantv2t .and. m-q > 0 ) then + p1 = min( p+1, m ) + q1 = min( q+1, m ) + call stdlib_wlacpy( 'L', m-q, p, x12, ldx12, v2t, ldv2t ) + if( m > p+q ) then + call stdlib_wlacpy( 'L', m-p-q, m-p-q, x22(p1,q1), ldx22,v2t(p+1,p+1), ldv2t ) + + end if + call stdlib_wungqr( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorgqr), & + lorgqrwork, info ) + end if + end if + ! compute the csd of the matrix in bidiagonal-block form + call stdlib_wbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q, theta,rwork(iphi), & + u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, rwork(ib11d), rwork(ib11e), rwork(ib12d),& + rwork(ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),& + lbbcsdwork, info ) + ! permute rows and columns to place identity submatrices in top- + ! left corner of (1,1)-block and/or bottom-right corner of (1,2)- + ! block and/or bottom-right corner of (2,1)-block and/or top-left + ! corner of (2,2)-block + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + if( colmajor ) then + call stdlib_wlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + else + call stdlib_wlapmr( .false., m-p, m-p, u2, ldu2, iwork ) + end if + end if + if( m > 0 .and. wantv2t ) then + do i = 1, p + iwork(i) = m - p - q + i + end do + do i = p + 1, m - q + iwork(i) = i - p + end do + if( .not. colmajor ) then + call stdlib_wlapmt( .false., m-q, m-q, v2t, ldv2t, iwork ) + else + call stdlib_wlapmr( .false., m-q, m-q, v2t, ldv2t, iwork ) + end if + end if + return + ! end stdlib_wuncsd + end subroutine stdlib_wuncsd + + !> ZUNCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + + subroutine stdlib_wuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q + integer(ilp), intent(in) :: lrwork + integer(ilp) :: lrworkmin, lrworkopt + ! Array Arguments + real(qp), intent(out) :: rwork(*) + real(qp), intent(out) :: theta(*) + complex(qp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) + complex(qp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & + lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & + r + logical(lk) :: lquery, wantu1, wantu2, wantv1t + ! Local Arrays + real(qp) :: dum(1) + complex(qp) :: cdum(1,1) + ! Intrinsic Function + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + lquery = ( lwork==-1 ) .or. ( lrwork==-1 ) + if( m < 0 ) then + info = -4 + else if( p < 0 .or. p > m ) then + info = -5 + else if( q < 0 .or. q > m ) then + info = -6 + else if( ldx11 < max( 1, p ) ) then + info = -8 + else if( ldx21 < max( 1, m-p ) ) then + info = -10 + else if( wantu1 .and. ldu1 < max( 1, p ) ) then + info = -13 + else if( wantu2 .and. ldu2 < max( 1, m - p ) ) then + info = -15 + else if( wantv1t .and. ldv1t < max( 1, q ) ) then + info = -17 + end if + r = min( p, m-p, q, m-q ) + ! compute workspace + ! work layout: + ! |-----------------------------------------| + ! | lworkopt (1) | + ! |-----------------------------------------| + ! | taup1 (max(1,p)) | + ! | taup2 (max(1,m-p)) | + ! | tauq1 (max(1,q)) | + ! |-----------------------------------------| + ! | stdlib_wunbdb work | stdlib_wungqr work | stdlib_wunglq work | + ! | | | | + ! | | | | + ! | | | | + ! | | | | + ! |-----------------------------------------| + ! rwork layout: + ! |------------------| + ! | lrworkopt (1) | + ! |------------------| + ! | phi (max(1,r-1)) | + ! |------------------| + ! | b11d (r) | + ! | b11e (r-1) | + ! | b12d (r) | + ! | b12e (r-1) | + ! | b21d (r) | + ! | b21e (r-1) | + ! | b22d (r) | + ! | b22e (r-1) | + ! | stdlib_wbbcsd rwork | + ! |------------------| + if( info == 0 ) then + iphi = 2 + ib11d = iphi + max( 1, r-1 ) + ib11e = ib11d + max( 1, r ) + ib12d = ib11e + max( 1, r - 1 ) + ib12e = ib12d + max( 1, r ) + ib21d = ib12e + max( 1, r - 1 ) + ib21e = ib21d + max( 1, r ) + ib22d = ib21e + max( 1, r - 1 ) + ib22e = ib22d + max( 1, r ) + ibbcsd = ib22e + max( 1, r - 1 ) + itaup1 = 2 + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m-p ) + iorbdb = itauq1 + max( 1, q ) + iorgqr = itauq1 + max( 1, q ) + iorglq = itauq1 + max( 1, q ) + lorgqrmin = 1 + lorgqropt = 1 + lorglqmin = 1 + lorglqopt = 1 + if( r == q ) then + call stdlib_wunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work, -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_wungqr( p, p, q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + endif + if( wantu2 .and. m-p > 0 ) then + call stdlib_wungqr( m-p, m-p, q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wunglq( q-1, q-1, q-1, v1t, ldv1t,cdum, work(1), -1, childinfo ) + + lorglqmin = max( lorglqmin, q-1 ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_wbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum, u1, ldu1,& + u2, ldu2, v1t, ldv1t, cdum, 1,dum, dum, dum, dum, dum, dum, dum, dum,rwork(1), -& + 1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else if( r == p ) then + call stdlib_wunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_wungqr( p-1, p-1, p-1, u1(2,2), ldu1, cdum, work(1),-1, childinfo & + ) + lorgqrmin = max( lorgqrmin, p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wungqr( m-p, m-p, q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wunglq( q, q, r, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_wbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum, v1t, & + ldv1t, cdum, 1, u1, ldu1, u2, ldu2,dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else if( r == m-p ) then + call stdlib_wunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_wungqr( p, p, q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wungqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2, cdum,work(1), -1, & + childinfo ) + lorgqrmin = max( lorgqrmin, m-p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wunglq( q, q, r, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_wbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum, cdum,& + 1, v1t, ldv1t, u2, ldu2, u1,ldu1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else + call stdlib_wunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, cdum, work(1), -1, childinfo) + lorbdb = m + int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_wungqr( p, p, m-q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wungqr( m-p, m-p, m-q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wunglq( q, q, q, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_wbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum, u2, & + ldu2, u1, ldu1, cdum, 1, v1t,ldv1t, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + end if + lrworkmin = ibbcsd+lbbcsd-1 + lrworkopt = lrworkmin + rwork(1) = lrworkopt + lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1 ) + lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1 ) + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -19 + end if + if( lrwork < lrworkmin .and. .not.lquery ) then + info = -21 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNCSD2BY1', -info ) + return + else if( lquery ) then + return + end if + lorgqr = lwork-iorgqr+1 + lorglq = lwork-iorglq+1 + ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, + ! in which r = min(p,m-p,q,m-q) + if( r == q ) then + ! case 1: r = q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_wunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_wlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_wungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_wungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + v1t(1,1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_wlacpy( 'U', q-1, q-1, x21(1,2), ldx21, v1t(2,2),ldv1t ) + call stdlib_wunglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglq, childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_wbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,rwork(iphi), u1, & + ldu1, u2, ldu2, v1t, ldv1t, cdum,1, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& + ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),lrwork-& + ibbcsd+1, childinfo ) + ! permute rows and columns to place czero submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_wlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == p ) then + ! case 2: r = p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_wunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + u1(1,1) = cone + do j = 2, p + u1(1,j) = czero + u1(j,1) = czero + end do + call stdlib_wlacpy( 'L', p-1, p-1, x11(2,1), ldx11, u1(2,2), ldu1 ) + call stdlib_wungqr( p-1, p-1, p-1, u1(2,2), ldu1, work(itaup1),work(iorgqr), & + lorgqr, childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_wlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_wungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wlacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) + call stdlib_wunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_wbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,rwork(iphi), v1t,& + ldv1t, cdum, 1, u1, ldu1, u2,ldu2, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& + ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd), & + lbbcsd,childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_wlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == m-p ) then + ! case 3: r = m-p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_wunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_wlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_wungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + u2(1,1) = cone + do j = 2, m-p + u2(1,j) = czero + u2(j,1) = czero + end do + call stdlib_wlacpy( 'L', m-p-1, m-p-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_wungqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2,work(itaup2), work(iorgqr)& + , lorgqr, childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wlacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) + call stdlib_wunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_wbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, rwork(iphi), & + cdum, 1, v1t, ldv1t, u2, ldu2,u1, ldu1, rwork(ib11d), rwork(ib11e),rwork(ib12d), & + rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & + lbbcsd, childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > r ) then + do i = 1, r + iwork(i) = q - r + i + end do + do i = r + 1, q + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_wlapmt( .false., p, q, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_wlapmr( .false., q, q, v1t, ldv1t, iwork ) + end if + end if + else + ! case 4: r = m-q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_wunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, & + childinfo ) + ! accumulate householder reflectors + if( wantu2 .and. m-p > 0 ) then + call stdlib_wcopy( m-p, work(iorbdb+p), 1, u2, 1 ) + end if + if( wantu1 .and. p > 0 ) then + call stdlib_wcopy( p, work(iorbdb), 1, u1, 1 ) + do j = 2, p + u1(1,j) = czero + end do + call stdlib_wlacpy( 'L', p-1, m-q-1, x11(2,1), ldx11, u1(2,2),ldu1 ) + call stdlib_wungqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + do j = 2, m-p + u2(1,j) = czero + end do + call stdlib_wlacpy( 'L', m-p-1, m-q-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_wungqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_wlacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) + call stdlib_wlacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& + 1), ldv1t ) + call stdlib_wlacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) + + call stdlib_wunglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_wbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, rwork(iphi), & + u2, ldu2, u1, ldu1, cdum, 1,v1t, ldv1t, rwork(ib11d), rwork(ib11e),rwork(ib12d), & + rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & + lbbcsd, childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( p > r ) then + do i = 1, r + iwork(i) = p - r + i + end do + do i = r + 1, p + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_wlapmt( .false., p, p, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_wlapmr( .false., p, q, v1t, ldv1t, iwork ) + end if + end if + end if + return + end subroutine stdlib_wuncsd2by1 + + !> ZUNG2L: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. + + pure subroutine stdlib_wung2l( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda ZUNG2R: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. + + pure subroutine stdlib_wung2r( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda ZUNGBR: generates one of the complex unitary matrices Q or P**H + !> determined by ZGEBRD when reducing a complex matrix A to bidiagonal + !> form: A = Q * B * P**H. Q and P**H are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + !> is of order N: + !> if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m + !> rows of P**H, where n >= m >= k; + !> if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as + !> an N-by-N matrix. + + pure subroutine stdlib_wungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantq + integer(ilp) :: i, iinfo, j, lwkopt, mn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + wantq = stdlib_lsame( vect, 'Q' ) + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 .or. ( wantq .and. ( n>m .or. nn .or. m=k ) then + call stdlib_wungqr( m, n, k, a, lda, tau, work, -1, iinfo ) + else + if( m>1 ) then + call stdlib_wungqr( m-1, m-1, m-1, a, lda, tau, work, -1,iinfo ) + end if + end if + else + if( k1 ) then + call stdlib_wunglq( n-1, n-1, n-1, a, lda, tau, work, -1,iinfo ) + end if + end if + end if + lwkopt = real( work( 1 ),KIND=qp) + lwkopt = max (lwkopt, mn) + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZUNGBR', -info ) + return + else if( lquery ) then + work( 1 ) = lwkopt + return + end if + ! quick return if possible + if( m==0 .or. n==0 ) then + work( 1 ) = 1 + return + end if + if( wantq ) then + ! form q, determined by a call to stdlib_wgebrd to reduce an m-by-k + ! matrix + if( m>=k ) then + ! if m >= k, assume m >= n >= k + call stdlib_wungqr( m, n, k, a, lda, tau, work, lwork, iinfo ) + else + ! if m < k, assume m = n + ! shift the vectors which define the elementary reflectors cone + ! column to the right, and set the first row and column of q + ! to those of the unit matrix + do j = m, 2, -1 + a( 1, j ) = czero + do i = j + 1, m + a( i, j ) = a( i, j-1 ) + end do + end do + a( 1, 1 ) = cone + do i = 2, m + a( i, 1 ) = czero + end do + if( m>1 ) then + ! form q(2:m,2:m) + call stdlib_wungqr( m-1, m-1, m-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + else + ! form p**h, determined by a call to stdlib_wgebrd to reduce a k-by-n + ! matrix + if( k= n, assume m = n + ! shift the vectors which define the elementary reflectors cone + ! row downward, and set the first row and column of p**h to + ! those of the unit matrix + a( 1, 1 ) = cone + do i = 2, n + a( i, 1 ) = czero + end do + do j = 2, n + do i = j - 1, 2, -1 + a( i, j ) = a( i-1, j ) + end do + a( 1, j ) = czero + end do + if( n>1 ) then + ! form p**h(2:n,2:n) + call stdlib_wunglq( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wungbr + + !> ZUNGHR: generates a complex unitary matrix Q which is defined as the + !> product of IHI-ILO elementary reflectors of order N, as returned by + !> ZGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + pure subroutine stdlib_wunghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lwkopt, nb, nh + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nh = ihi - ilo + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda0 ) then + ! generate q(ilo+1:ihi,ilo+1:ihi) + call stdlib_wungqr( nh, nh, nh, a( ilo+1, ilo+1 ), lda, tau( ilo ),work, lwork, & + iinfo ) + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunghr + + !> ZUNGL2: generates an m-by-n complex matrix Q with orthonormal rows, + !> which is defined as the first m rows of a product of k elementary + !> reflectors of order n + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. + + pure subroutine stdlib_wungl2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldak .and. j<=m )a( j, j ) = cone + end do + end if + do i = k, 1, -1 + ! apply h(i)**h to a(i:m,i:n) from the right + if( i ZUNGLQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. + + pure subroutine stdlib_wunglq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZUNGLQ', ' ', m, n, k, -1 ) + lwkopt = max( 1, m )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=m ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_wlarft( 'FORWARD', 'ROWWISE', n-i+1, ib, a( i, i ),lda, tau( i ), & + work, ldwork ) + ! apply h**h to a(i+ib:m,i:n) from the right + call stdlib_wlarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'FORWARD','ROWWISE', m-i-& + ib+1, n-i+1, ib, a( i, i ),lda, work, ldwork, a( i+ib, i ), lda,work( ib+1 ), & + ldwork ) + end if + ! apply h**h to columns i:n of current block + call stdlib_wungl2( ib, n-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set columns 1:i-1 of current block to czero + do j = 1, i - 1 + do l = i, i + ib - 1 + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_wunglq + + !> ZUNGQL: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. + + pure subroutine stdlib_wungql( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + if( n-k+i>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_wlarfb( 'LEFT', 'NO TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-& + 1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + ! apply h to rows 1:m-k+i+ib-1 of current block + call stdlib_wung2l( m-k+i+ib-1, ib, ib, a( 1, n-k+i ), lda,tau( i ), work, iinfo & + ) + ! set rows m-k+i+ib:m of current block to czero + do j = n - k + i, n - k + i + ib - 1 + do l = m - k + i + ib, m + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_wungql + + !> ZUNGQR: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. + + pure subroutine stdlib_wungqr( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZUNGQR', ' ', m, n, k, -1 ) + lwkopt = max( 1, n )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=n ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_wlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i & + ), work, ldwork ) + ! apply h to a(i:m,i+ib:n) from the left + call stdlib_wlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-& + i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), & + ldwork ) + end if + ! apply h to rows i:m of current block + call stdlib_wung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set rows 1:i-1 of current block to czero + do j = i, i + ib - 1 + do l = 1, i - 1 + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_wungqr + + !> ZUNGR2: generates an m by n complex matrix Q with orthonormal rows, + !> which is defined as the last m rows of a product of k elementary + !> reflectors of order n + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. + + pure subroutine stdlib_wungr2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldan-m .and. j<=n-k )a( m-n+j, j ) = cone + end do + end if + do i = 1, k + ii = m - k + i + ! apply h(i)**h to a(1:m-k+i,1:n-k+i) from the right + call stdlib_wlacgv( n-m+ii-1, a( ii, 1 ), lda ) + a( ii, n-m+ii ) = cone + call stdlib_wlarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1 ), lda,conjg( tau( i ) ), a, lda,& + work ) + call stdlib_wscal( n-m+ii-1, -tau( i ), a( ii, 1 ), lda ) + call stdlib_wlacgv( n-m+ii-1, a( ii, 1 ), lda ) + a( ii, n-m+ii ) = cone - conjg( tau( i ) ) + ! set a(m-k+i,n-k+i+1:n) to czero + do l = n - m + ii + 1, n + a( ii, l ) = czero + end do + end do + return + end subroutine stdlib_wungr2 + + !> ZUNGRQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. + + pure subroutine stdlib_wungrq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + ii = m - k + i + if( ii>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h**h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_wlarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'BACKWARD','ROWWISE', ii-& + 1, n-k+i+ib-1, ib, a( ii, 1 ),lda, work, ldwork, a, lda, work( ib+1 ),ldwork ) + + end if + ! apply h**h to columns 1:n-k+i+ib-1 of current block + call stdlib_wungr2( ib, n-k+i+ib-1, ib, a( ii, 1 ), lda, tau( i ),work, iinfo ) + + ! set columns n-k+i+ib:n of current block to czero + do l = n - k + i + ib, n + do j = ii, ii + ib - 1 + a( j, l ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_wungrq + + !> ZUNGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> ZHETRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_wungtr( uplo, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, j, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + ! generate q(2:n,2:n) + call stdlib_wungqr( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wungtr + + !> ZUNGTSQR: generates an M-by-N complex matrix Q_out with orthonormal + !> columns, which are the first N columns of a product of comlpex unitary + !> matrices of order M which are returned by ZLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for ZLATSQR. + + pure subroutine stdlib_wungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: t(ldt,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input parameters + lquery = lwork==-1 + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. m ZUNGTSQR_ROW: generates an M-by-N complex matrix Q_out with + !> orthonormal columns from the output of ZLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by ZLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of ZLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine ZLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which ZLATSQR generates the output blocks. + + pure subroutine stdlib_wungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(in) :: t(ldt,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, & + num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1 + ! Local Arrays + complex(qp) :: dummy(1,1) + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m=m, then the loop is never executed. + if ( mb=m, then we have only one row block of a of size m + ! and we work on the entire matrix a. + mb1 = min( mb, m ) + ! apply column blocks of h in the top row block from right to left. + ! kb is the column index of the current block reflector in + ! the matrices t and v. + do kb = kb_last, 1, -nblocal + ! determine the size of the current column block knb in + ! the matrices t and v. + knb = min( nblocal, n - kb + 1 ) + if( mb1-kb-knb+1==0 ) then + ! in stdlib_dlarfb_gett parameters, when m=0, then the matrix b + ! does not exist, hence we need to pass a dummy array + ! reference dummy(1,1) to b with lddummy=1. + call stdlib_wlarfb_gett( 'N', 0, n-kb+1, knb,t( 1, kb ), ldt, a( kb, kb ), lda,& + dummy( 1, 1 ), 1, work, knb ) + else + call stdlib_wlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1, kb ), ldt, a( kb, & + kb ), lda,a( kb+knb, kb), lda, work, knb ) + end if + end do + work( 1 ) = cmplx( lworkopt,KIND=qp) + return + end subroutine stdlib_wungtsqr_row + + !> ZUNHR_COL: takes an M-by-N complex matrix Q_in with orthonormal columns + !> as input, stored in A, and performs Householder Reconstruction (HR), + !> i.e. reconstructs Householder vectors V(i) implicitly representing + !> another M-by-N matrix Q_out, with the property that Q_in = Q_out*S, + !> where S is an N-by-N diagonal matrix with diagonal entries + !> equal to +1 or -1. The Householder vectors (columns V(i) of V) are + !> stored in A on output, and the diagonal entries of S are stored in D. + !> Block reflectors are also returned in T + !> (same output format as ZGEQRT). + + pure subroutine stdlib_wunhr_col( m, n, nb, a, lda, t, ldt, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*) + complex(qp), intent(out) :: d(*), t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( nb<1 ) then + info = -3 + else if( ldan ) then + call stdlib_wtrsm( 'R', 'U', 'N', 'N', m-n, n, cone, a, lda,a( n+1, 1 ), lda ) + + end if + ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n) + ! as a sequence of upper-triangular blocks with nb-size column + ! blocking. + ! loop over the column blocks of size nb of the array a(1:m,1:n) + ! and the array t(1:nb,1:n), jb is the column index of a column + ! block, jnb is the column block size at each step jb. + nplusone = n + 1 + do jb = 1, n, nb + ! (2-0) determine the column block size jnb. + jnb = min( nplusone-jb, nb ) + ! (2-1) copy the upper-triangular part of the current jnb-by-jnb + ! diagonal block u(jb) (of the n-by-n matrix u) stored + ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part + ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1) + ! column-by-column, total jnb*(jnb+1)/2 elements. + jbtemp1 = jb - 1 + do j = jb, jb+jnb-1 + call stdlib_wcopy( j-jbtemp1, a( jb, j ), 1, t( 1, j ), 1 ) + end do + ! (2-2) perform on the upper-triangular part of the current + ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored + ! in t(1:jnb,jb:jb+jnb-1) the following operation in place: + ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper- + ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication + ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb + ! diagonal block s(jb) of the n-by-n sign matrix s from the + ! right means changing the sign of each j-th column of the block + ! u(jb) according to the sign of the diagonal element of the block + ! s(jb), i.e. s(j,j) that is stored in the array element d(j). + do j = jb, jb+jnb-1 + if( d( j )==cone ) then + call stdlib_wscal( j-jbtemp1, -cone, t( 1, j ), 1 ) + end if + end do + ! (2-3) perform the triangular solve for the current block + ! matrix x(jb): + ! x(jb) * (a(jb)**t) = b(jb), where: + ! a(jb)**t is a jnb-by-jnb unit upper-triangular + ! coefficient block, and a(jb)=v1(jb), which + ! is a jnb-by-jnb unit lower-triangular block + ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1). + ! the n-by-n matrix v1 is the upper part + ! of the m-by-n lower-trapezoidal matrix v + ! stored in a(1:m,1:n); + ! b(jb) is a jnb-by-jnb upper-triangular right-hand + ! side block, b(jb) = (-1)*u(jb)*s(jb), and + ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1); + ! x(jb) is a jnb-by-jnb upper-triangular solution + ! block, x(jb) is the upper-triangular block + ! reflector t(jb), and x(jb) is stored + ! in t(1:jnb,jb:jb+jnb-1). + ! in other words, we perform the triangular solve for the + ! upper-triangular block t(jb): + ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb). + ! even though the blocks x(jb) and b(jb) are upper- + ! triangular, the routine stdlib_wtrsm will access all jnb**2 + ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore, + ! we need to set to zero the elements of the block + ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call + ! to stdlib_wtrsm. + ! (2-3a) set the elements to zero. + jbtemp2 = jb - 2 + do j = jb, jb+jnb-2 + do i = j-jbtemp2, nb + t( i, j ) = czero + end do + end do + ! (2-3b) perform the triangular solve. + call stdlib_wtrsm( 'R', 'L', 'C', 'U', jnb, jnb, cone,a( jb, jb ), lda, t( 1, jb ), & + ldt ) + end do + return + end subroutine stdlib_wunhr_col + + + pure subroutine stdlib_wunm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: m, n, n1, n2, ldq, ldc, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(in) :: q(ldq,*) + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, ldwork, len, lwkopt, nb, nq, nw + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q; + ! nw is the minimum dimension of work. + if( left ) then + nq = m + else + nq = n + end if + nw = nq + if( n1==0 .or. n2==0 ) nw = 1 + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( n1<0 .or. n1+n2/=nq ) then + info = -5 + else if( n2<0 ) then + info = -6 + else if( ldq ZUNM2L: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_wunm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + complex(qp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNM2R: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_wunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + complex(qp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda If VECT = 'Q', ZUNMBR: overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'C': P**H * C C * P**H + !> Here Q and P**H are the unitary matrices determined by ZGEBRD when + !> reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + !> and P**H are defined as products of elementary reflectors H(i) and + !> G(i) respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the unitary matrix Q or P**H that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + + pure subroutine stdlib_wunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: applyq, left, lquery, notran + character :: transt + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + applyq = stdlib_lsame( vect, 'Q' ) + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q or p and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( k<0 ) then + info = -6 + else if( ( applyq .and. lda0 .and. n>0 ) then + if( applyq ) then + if( left ) then + nb = stdlib_ilaenv( 1, 'ZUNMQR', side // trans, m-1, n, m-1,-1 ) + else + nb = stdlib_ilaenv( 1, 'ZUNMQR', side // trans, m, n-1, n-1,-1 ) + end if + else + if( left ) then + nb = stdlib_ilaenv( 1, 'ZUNMLQ', side // trans, m-1, n, m-1,-1 ) + else + nb = stdlib_ilaenv( 1, 'ZUNMLQ', side // trans, m, n-1, n-1,-1 ) + end if + end if + lwkopt = nw*nb + else + lwkopt = 1 + end if + work( 1 ) = lwkopt + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZUNMBR', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( m==0 .or. n==0 )return + if( applyq ) then + ! apply q + if( nq>=k ) then + ! q was determined by a call to stdlib_wgebrd with nq >= k + call stdlib_wunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & + ) + else if( nq>1 ) then + ! q was determined by a call to stdlib_wgebrd with nq < k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_wunmqr( side, trans, mi, ni, nq-1, a( 2, 1 ), lda, tau,c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + else + ! apply p + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + if( nq>k ) then + ! p was determined by a call to stdlib_wgebrd with nq > k + call stdlib_wunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & + iinfo ) + else if( nq>1 ) then + ! p was determined by a call to stdlib_wgebrd with nq <= k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_wunmlq( side, transt, mi, ni, nq-1, a( 1, 2 ), lda,tau, c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunmbr + + !> ZUNMHR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> IHI-ILO elementary reflectors, as returned by ZGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + pure subroutine stdlib_wunmhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: ihi, ilo, lda, ldc, lwork, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, nh, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nh = ihi - ilo + left = stdlib_lsame( side, 'L' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 .or. ilo>max( 1, nq ) ) then + info = -5 + else if( ihinq ) then + info = -6 + else if( lda ZUNML2: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_wunml2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + complex(qp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNMLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_wunml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_wlarft( 'FORWARD', 'ROWWISE', nq-i+1, ib, a( i, i ),lda, tau( i ), & + work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_wlarfb( side, transt, 'FORWARD', 'ROWWISE', mi, ni, ib,a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunmlq + + !> ZUNMQL: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_wunm2l( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarft( 'BACKWARD', 'COLUMNWISE', nq-k+i+ib-1, ib,a( 1, i ), lda, & + tau( i ), work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**h is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**h + call stdlib_wlarfb( side, trans, 'BACKWARD', 'COLUMNWISE', mi, ni,ib, a( 1, i ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunmql + + !> ZUNMQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_wunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_wlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),& + work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_wlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunmqr + + !> ZUNMR2: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + complex(qp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNMR3: overwrites the general complex m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZTZRZF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, m, n + ! Array Arguments + complex(qp), intent(in) :: a(lda,*), tau(*) + complex(qp), intent(inout) :: c(ldc,*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, ja, jc, mi, ni, nq + complex(qp) :: taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda ZUNMRQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_wunmr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1 ), lda, tau( & + i ), work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**h is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**h + call stdlib_wlarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1 ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunmrq + + !> ZUNMRZ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZTZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_wunmrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, ja, jc, ldwork, lwkopt, mi, nb, & + nbmin, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_wunmr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, iinfo ) + + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + ja = m - l + 1 + else + mi = m + ic = 1 + ja = n - l + 1 + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_wlarzt( 'BACKWARD', 'ROWWISE', l, ib, a( i, ja ), lda,tau( i ), work(& + iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_wlarzb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, l, a( i, ja )& + , lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_wunmrz + + !> ZUNMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by ZHETRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_wunmtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldc, lwork, m, n + ! Array Arguments + complex(qp), intent(inout) :: a(lda,*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery, upper + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda ZUPGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> ZHPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_wupgtr( uplo, n, ap, tau, q, ldq, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, n + ! Array Arguments + complex(qp), intent(in) :: ap(*), tau(*) + complex(qp), intent(out) :: q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, ij, j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldq1 ) then + ! generate q(2:n,2:n) + call stdlib_wung2r( n-1, n-1, n-1, q( 2, 2 ), ldq, tau, work,iinfo ) + end if + end if + return + end subroutine stdlib_wupgtr + + !> ZUPMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by ZHPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_wupmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, m, n + ! Array Arguments + complex(qp), intent(inout) :: ap(*), c(ldc,*) + complex(qp), intent(in) :: tau(*) + complex(qp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: forwrd, left, notran, upper + integer(ilp) :: i, i1, i2, i3, ic, ii, jc, mi, ni, nq + complex(qp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldc=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771 + real(dp), parameter, private :: tsml = rradix**ceiling((minexp-1)*half) + real(dp), parameter, private :: tbig = rradix**floor((maxexp-digits(zero)+1)*half) + real(dp), parameter, private :: ssml = rradix**(-floor((minexp-digits(zero))*half)) + real(dp), parameter, private :: sbig = rradix**(-ceiling((maxexp+digits(zero)-1)*half)) + + + contains + +#:if WITH_QP + !> ZLAG2W: converts a COMPLEX matrix, SA, to a COMPLEX*16 matrix, A. + !> Note that while it is possible to overflow while converting + !> from double to single, it is not possible to overflow when + !> converting from single to double. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_zlag2w( m, n, sa, ldsa, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + complex(dp), intent(in) :: sa(ldsa,*) + complex(qp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Executable Statements + info = 0 + do j = 1, n + do i = 1, m + a( i, j ) = sa( i, j ) + end do + end do + return + end subroutine stdlib_zlag2w +#:endif + + !> ZDRSCL: multiplies an n-element complex vector x by the real scalar + !> 1/a. This is done without overflow or underflow as long as + !> the final result x/a does not overflow or underflow. + + pure subroutine stdlib_zdrscl( n, sa, sx, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(dp), intent(in) :: sa + ! Array Arguments + complex(dp), intent(inout) :: sx(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + real(dp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! quick return if possible + if( n<=0 )return + ! get machine parameters + smlnum = stdlib_dlamch( 'S' ) + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! initialize the denominator to sa and the numerator to 1. + cden = sa + cnum = one + 10 continue + cden1 = cden*smlnum + cnum1 = cnum / bignum + if( abs( cden1 )>abs( cnum ) .and. cnum/=zero ) then + ! pre-multiply x by smlnum if cden is large compared to cnum. + mul = smlnum + done = .false. + cden = cden1 + else if( abs( cnum1 )>abs( cden ) ) then + ! pre-multiply x by bignum if cden is small compared to cnum. + mul = bignum + done = .false. + cnum = cnum1 + else + ! multiply x by cnum / cden and return. + mul = cnum / cden + done = .true. + end if + ! scale the vector x by mul + call stdlib_zdscal( n, mul, sx, incx ) + if( .not.done )go to 10 + return + end subroutine stdlib_zdrscl + + !> ZGBEQU: computes row and column scalings intended to equilibrate an + !> M-by-N band matrix A and reduce its condition number. R returns the + !> row scale factors and C the column scale factors, chosen to try to + !> make the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_zgbequ( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(out) :: c(*), r(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(dp) :: bignum, rcmax, rcmin, smlnum + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab ZGBEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from ZGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_zgbequb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(out) :: c(*), r(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, kd + real(dp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,log,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabzero ) then + r( i ) = radix**int( log( r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = max( j-ku, 1 ), min( j+kl, m ) + c( j ) = max( c( j ), cabs1( ab( kd+i-j, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_zgbequb + + !> ZGBTF2: computes an LU factorization of a complex m-by-n band matrix + !> A using partial pivoting with row interchanges. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jp, ju, km, kv + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in. + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab0 ) then + ! compute multipliers. + call stdlib_zscal( km, cone / ab( kv+1, j ), ab( kv+2, j ), 1 ) + ! update trailing submatrix within the band. + if( ju>j )call stdlib_zgeru( km, ju-j, -cone, ab( kv+2, j ), 1,ab( kv, j+1 ), & + ldab-1, ab( kv+1, j+1 ),ldab-1 ) + end if + else + ! if pivot is czero, set info to the index of the pivot + ! unless a czero pivot has already been found. + if( info==0 )info = j + end if + end do loop_40 + return + end subroutine stdlib_zgbtf2 + + !> ZGEBAK: forms the right or left eigenvectors of a complex general + !> matrix by backward transformation on the computed eigenvectors of the + !> balanced matrix output by ZGEBAL. + + pure subroutine stdlib_zgebak( job, side, n, ilo, ihi, scale, m, v, ldv,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: scale(*) + complex(dp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, ii, k + real(dp) :: s + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! decode and test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( m<0 ) then + info = -7 + else if( ldv=ilo .and. i<=ihi )cycle loop_40 + if( i=ilo .and. i<=ihi )cycle loop_50 + if( i ZGEBAL: balances a general complex matrix A. This involves, first, + !> permuting A by a similarity transformation to isolate eigenvalues + !> in the first 1 to ILO-1 and last IHI+1 to N elements on the + !> diagonal; and second, applying a diagonal similarity transformation + !> to rows and columns ILO to IHI to make the rows and columns as + !> close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrix, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors. + + pure subroutine stdlib_zgebal( job, n, a, lda, ilo, ihi, scale, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(out) :: scale(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sclfac = 2.0e+0_dp + real(dp), parameter :: factor = 0.95e+0_dp + + + + ! Local Scalars + logical(lk) :: noconv + integer(ilp) :: i, ica, iexc, ira, j, k, l, m + real(dp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2 + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=g .or. max( f, c, ca )>=sfmax2 .or.min( r, g, ra )<=sfmin2 )go to 170 + if( stdlib_disnan( c+f+ca+r+g+ra ) ) then + ! exit if nan to avoid infinite loop + info = -3 + call stdlib_xerbla( 'ZGEBAL', -info ) + return + end if + f = f*sclfac + c = c*sclfac + ca = ca*sclfac + r = r / sclfac + g = g / sclfac + ra = ra / sclfac + go to 160 + 170 continue + g = c / sclfac + 180 continue + if( g=sfmax2 .or.min( f, c, g, ca )<=sfmin2 )go to 190 + f = f / sclfac + c = c / sclfac + g = g / sclfac + ca = ca / sclfac + r = r*sclfac + ra = ra*sclfac + go to 180 + ! now balance. + 190 continue + if( ( c+r )>=factor*s )cycle loop_200 + if( fone .and. scale( i )>one ) then + if( scale( i )>=sfmax1 / f )cycle loop_200 + end if + g = one / f + scale( i ) = scale( i )*f + noconv = .true. + call stdlib_zdscal( n-k+1, g, a( i, k ), lda ) + call stdlib_zdscal( l, f, a( 1, i ), 1 ) + end do loop_200 + if( noconv )go to 140 + 210 continue + ilo = k + ihi = l + return + end subroutine stdlib_zgebal + + !> ZGEEQU: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. + !> R(i) and C(j) are restricted to be between SMLNUM = smallest safe + !> number and BIGNUM = largest safe number. Use of these scaling + !> factors is not guaranteed to reduce the condition number of A but + !> works well in practice. + + pure subroutine stdlib_zgeequ( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(out) :: c(*), r(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: bignum, rcmax, rcmin, smlnum + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEEQUB: computes row and column scalings intended to equilibrate an + !> M-by-N matrix A and reduce its condition number. R returns the row + !> scale factors and C the column scale factors, chosen to try to make + !> the largest element in each row and column of the matrix B with + !> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most + !> the radix. + !> R(i) and C(j) are restricted to be a power of the radix between + !> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use + !> of these scaling factors is not guaranteed to reduce the condition + !> number of A but works well in practice. + !> This routine differs from ZGEEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled entries' magnitudes are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_zgeequb( m, n, a, lda, r, c, rowcnd, colcnd, amax,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(out) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(out) :: c(*), r(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,log,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldazero ) then + r( i ) = radix**int( log(r( i ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do i = 1, m + rcmax = max( rcmax, r( i ) ) + rcmin = min( rcmin, r( i ) ) + end do + amax = rcmax + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do i = 1, m + if( r( i )==zero ) then + info = i + return + end if + end do + else + ! invert the scale factors. + do i = 1, m + r( i ) = one / min( max( r( i ), smlnum ), bignum ) + end do + ! compute rowcnd = min(r(i)) / max(r(i)). + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + ! compute column scale factors. + do j = 1, n + c( j ) = zero + end do + ! find the maximum element in each column, + ! assuming the row scaling computed above. + do j = 1, n + do i = 1, m + c( j ) = max( c( j ), cabs1( a( i, j ) )*r( i ) ) + end do + if( c( j )>zero ) then + c( j ) = radix**int( log( c( j ) ) / logrdx,KIND=ilp) + end if + end do + ! find the maximum and minimum scale factors. + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin==zero ) then + ! find the first zero scale factor and return an error code. + do j = 1, n + if( c( j )==zero ) then + info = m + j + return + end if + end do + else + ! invert the scale factors. + do j = 1, n + c( j ) = one / min( max( c( j ), smlnum ), bignum ) + end do + ! compute colcnd = min(c(j)) / max(c(j)). + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + end if + return + end subroutine stdlib_zgeequb + + !> ZGETC2: computes an LU factorization, using complete pivoting, of the + !> n-by-n matrix A. The factorization has the form A = P * L * U * Q, + !> where P and Q are permutation matrices, L is lower triangular with + !> unit diagonal elements and U is upper triangular. + !> This is a level 1 BLAS version of the algorithm. + + pure subroutine stdlib_zgetc2( n, a, lda, ipiv, jpiv, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*), jpiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ip, ipv, j, jp, jpv + real(dp) :: bignum, eps, smin, smlnum, xmax + ! Intrinsic Functions + intrinsic :: abs,cmplx,max + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + ! set constants to control overflow + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! handle the case n=1 by itself + if( n==1 ) then + ipiv( 1 ) = 1 + jpiv( 1 ) = 1 + if( abs( a( 1, 1 ) )=xmax ) then + xmax = abs( a( ip, jp ) ) + ipv = ip + jpv = jp + end if + end do + end do + if( i==1 )smin = max( eps*xmax, smlnum ) + ! swap rows + if( ipv/=i )call stdlib_zswap( n, a( ipv, 1 ), lda, a( i, 1 ), lda ) + ipiv( i ) = ipv + ! swap columns + if( jpv/=i )call stdlib_zswap( n, a( 1, jpv ), 1, a( 1, i ), 1 ) + jpiv( i ) = jpv + ! check for singularity + if( abs( a( i, i ) ) ZGETF2: computes an LU factorization of a general m-by-n matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_zgetf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: sfmin + integer(ilp) :: i, j, jp + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_zscal( m-j, cone / a( j, j ), a( j+1, j ), 1 ) + else + do i = 1, m-j + a( j+i, j ) = a( j+i, j ) / a( j, j ) + end do + end if + end if + else if( info==0 ) then + info = j + end if + if( j ZGGBAK: forms the right or left eigenvectors of a complex generalized + !> eigenvalue problem A*x = lambda*B*x, by backward transformation on + !> the computed eigenvectors of the balanced pair of matrices output by + !> ZGGBAL. + + pure subroutine stdlib_zggbak( job, side, n, ilo, ihi, lscale, rscale, m, v,ldv, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job, side + integer(ilp), intent(in) :: ihi, ilo, ldv, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(in) :: lscale(*), rscale(*) + complex(dp), intent(inout) :: v(ldv,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: leftv, rightv + integer(ilp) :: i, k + ! Intrinsic Functions + intrinsic :: max,int + ! Executable Statements + ! test the input parameters + rightv = stdlib_lsame( side, 'R' ) + leftv = stdlib_lsame( side, 'L' ) + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( .not.rightv .and. .not.leftv ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( n==0 .and. ihi==0 .and. ilo/=1 ) then + info = -4 + else if( n>0 .and. ( ihimax( 1, n ) ) )then + info = -5 + else if( n==0 .and. ilo==1 .and. ihi/=0 ) then + info = -5 + else if( m<0 ) then + info = -8 + else if( ldv ZGGBAL: balances a pair of general complex matrices (A,B). This + !> involves, first, permuting A and B by similarity transformations to + !> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N + !> elements on the diagonal; and second, applying a diagonal similarity + !> transformation to rows and columns ILO to IHI to make the rows + !> and columns as close in norm as possible. Both steps are optional. + !> Balancing may reduce the 1-norm of the matrices, and improve the + !> accuracy of the computed eigenvalues and/or eigenvectors in the + !> generalized eigenvalue problem A*x = lambda*B*x. + + pure subroutine stdlib_zggbal( job, n, a, lda, b, ldb, ilo, ihi, lscale,rscale, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: job + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, n + ! Array Arguments + real(dp), intent(out) :: lscale(*), rscale(*), work(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sclfac = 1.0e+1_dp + + + + ! Local Scalars + integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, & + lrab, lsfmax, lsfmin, m, nr, nrp2 + real(dp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, & + pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc + complex(dp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,int,log10,max,min,sign + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.stdlib_lsame( job, 'N' ) .and. .not.stdlib_lsame( job, 'P' ) & + .and..not.stdlib_lsame( job, 'S' ) .and. .not.stdlib_lsame( job, 'B' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldacmax )cmax = abs( cor ) + lscale( i ) = lscale( i ) + cor + cor = alpha*work( i ) + if( abs( cor )>cmax )cmax = abs( cor ) + rscale( i ) = rscale( i ) + cor + end do + if( cmax ZGTSV: solves the equation + !> A*X = B, + !> where A is an N-by-N tridiagonal matrix, by Gaussian elimination with + !> partial pivoting. + !> Note that the equation A**T *X = B may be solved by interchanging the + !> order of the arguments DU and DL. + + pure subroutine stdlib_zgtsv( n, nrhs, dl, d, du, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(dp), intent(inout) :: b(ldb,*), d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k + complex(dp) :: mult, temp, zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb=cabs1( dl( k ) ) ) then + ! no row interchange required + mult = dl( k ) / d( k ) + d( k+1 ) = d( k+1 ) - mult*du( k ) + do j = 1, nrhs + b( k+1, j ) = b( k+1, j ) - mult*b( k, j ) + end do + if( k<( n-1 ) )dl( k ) = czero + else + ! interchange rows k and k+1 + mult = d( k ) / dl( k ) + d( k ) = dl( k ) + temp = d( k+1 ) + d( k+1 ) = du( k ) - mult*temp + if( k<( n-1 ) ) then + dl( k ) = du( k+1 ) + du( k+1 ) = -mult*dl( k ) + end if + du( k ) = temp + do j = 1, nrhs + temp = b( k, j ) + b( k, j ) = b( k+1, j ) + b( k+1, j ) = temp - mult*b( k+1, j ) + end do + end if + end do loop_30 + if( d( n )==czero ) then + info = n + return + end if + ! back solve with the matrix u from the factorization. + do j = 1, nrhs + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) / d( n-1 ) + do k = n - 2, 1, -1 + b( k, j ) = ( b( k, j )-du( k )*b( k+1, j )-dl( k )*b( k+2, j ) ) / d( k ) + + end do + end do + return + end subroutine stdlib_zgtsv + + !> ZGTTRF: computes an LU factorization of a complex tridiagonal matrix A + !> using elimination with partial pivoting and row interchanges. + !> The factorization has the form + !> A = L * U + !> where L is a product of permutation and unit lower bidiagonal + !> matrices and U is upper triangular with nonzeros in only the main + !> diagonal and first two superdiagonals. + + pure subroutine stdlib_zgttrf( n, dl, d, du, du2, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: d(*), dl(*), du(*) + complex(dp), intent(out) :: du2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(dp) :: fact, temp, zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'ZGTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! initialize ipiv(i) = i and du2(i) = 0 + do i = 1, n + ipiv( i ) = i + end do + do i = 1, n - 2 + du2( i ) = zero + end do + do i = 1, n - 2 + if( cabs1( d( i ) )>=cabs1( dl( i ) ) ) then + ! no row interchange required, eliminate dl(i) + if( cabs1( d( i ) )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + ! interchange rows i and i+1, eliminate dl(i) + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + du2( i ) = du( i+1 ) + du( i+1 ) = -fact*du( i+1 ) + ipiv( i ) = i + 1 + end if + end do + if( n>1 ) then + i = n - 1 + if( cabs1( d( i ) )>=cabs1( dl( i ) ) ) then + if( cabs1( d( i ) )/=zero ) then + fact = dl( i ) / d( i ) + dl( i ) = fact + d( i+1 ) = d( i+1 ) - fact*du( i ) + end if + else + fact = d( i ) / dl( i ) + d( i ) = dl( i ) + dl( i ) = fact + temp = du( i ) + du( i ) = d( i+1 ) + d( i+1 ) = temp - fact*d( i+1 ) + ipiv( i ) = i + 1 + end if + end if + ! check for a zero on the diagonal of u. + do i = 1, n + if( cabs1( d( i ) )==zero ) then + info = i + go to 50 + end if + end do + 50 continue + return + end subroutine stdlib_zgttrf + + !> ZGTTS2: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by ZGTTRF. + + pure subroutine stdlib_zgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: itrans, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + complex(dp) :: temp + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( n==0 .or. nrhs==0 )return + if( itrans==0 ) then + ! solve a*x = b using the lu factorization of a, + ! overwriting each right hand side vector with its solution. + if( nrhs<=1 ) then + j = 1 + 10 continue + ! solve l*x = b. + do i = 1, n - 1 + if( ipiv( i )==i ) then + b( i+1, j ) = b( i+1, j ) - dl( i )*b( i, j ) + else + temp = b( i, j ) + b( i, j ) = b( i+1, j ) + b( i+1, j ) = temp - dl( i )*b( i, j ) + end if + end do + ! solve u*x = b. + b( n, j ) = b( n, j ) / d( n ) + if( n>1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + if( j1 )b( n-1, j ) = ( b( n-1, j )-du( n-1 )*b( n, j ) ) /d( n-1 ) + do i = n - 2, 1, -1 + b( i, j ) = ( b( i, j )-du( i )*b( i+1, j )-du2( i )*b( i+2, j ) ) / d( i ) + + end do + end do + end if + else if( itrans==1 ) then + ! solve a**t * x = b. + if( nrhs<=1 ) then + j = 1 + 70 continue + ! solve u**t * x = b. + b( 1, j ) = b( 1, j ) / d( 1 ) + if( n>1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d( i & + ) + end do + ! solve l**t * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + if( j1 )b( 2, j ) = ( b( 2, j )-du( 1 )*b( 1, j ) ) / d( 2 ) + do i = 3, n + b( i, j ) = ( b( i, j )-du( i-1 )*b( i-1, j )-du2( i-2 )*b( i-2, j ) ) / d(& + i ) + end do + ! solve l**t * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - dl( i )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - dl( i )*temp + b( i, j ) = temp + end if + end do + end do + end if + else + ! solve a**h * x = b. + if( nrhs<=1 ) then + j = 1 + 130 continue + ! solve u**h * x = b. + b( 1, j ) = b( 1, j ) / conjg( d( 1 ) ) + if( n>1 )b( 2, j ) = ( b( 2, j )-conjg( du( 1 ) )*b( 1, j ) ) /conjg( d( 2 ) ) + + do i = 3, n + b( i, j ) = ( b( i, j )-conjg( du( i-1 ) )*b( i-1, j )-conjg( du2( i-2 ) )*b( & + i-2, j ) ) /conjg( d( i ) ) + end do + ! solve l**h * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - conjg( dl( i ) )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - conjg( dl( i ) )*temp + b( i, j ) = temp + end if + end do + if( j1 )b( 2, j ) = ( b( 2, j )-conjg( du( 1 ) )*b( 1, j ) )/ conjg( d( 2 ) ) + + do i = 3, n + b( i, j ) = ( b( i, j )-conjg( du( i-1 ) )*b( i-1, j )-conjg( du2( i-2 ) )& + *b( i-2, j ) ) / conjg( d( i ) ) + end do + ! solve l**h * x = b. + do i = n - 1, 1, -1 + if( ipiv( i )==i ) then + b( i, j ) = b( i, j ) - conjg( dl( i ) )*b( i+1, j ) + else + temp = b( i+1, j ) + b( i+1, j ) = b( i, j ) - conjg( dl( i ) )*temp + b( i, j ) = temp + end if + end do + end do + end if + end if + end subroutine stdlib_zgtts2 + + !> ZHESWAPR: applies an elementary permutation on the rows and the columns of + !> a hermitian matrix. + + pure subroutine stdlib_zheswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(dp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_zswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + ! - swap a(i2,i1) and a(i1,i2) + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=conjg(a(i1+i,i2)) + a(i1+i,i2)=conjg(tmp) + end do + a(i1,i2)=conjg(a(i1,i2)) + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from 1 to i1-1 + call stdlib_zswap ( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + ! - swap a(i2,i1) and a(i1,i2) + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=conjg(a(i2,i1+i)) + a(i2,i1+i)=conjg(tmp) + end do + a(i2,i1)=conjg(a(i2,i1)) + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_zheswapr + + !> ZHETF2: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zhetf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(dp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt + complex(dp) :: d12, d21, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_izamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) .or. stdlib_disnan(absakk) ) then + ! column k is zero or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=dp) + else + ! ============================================================ + ! test for interchange + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine only rowmax. + jmax = imax + stdlib_izamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_izamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, cabs1( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( a( imax, imax ),KIND=dp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_zswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + a( kp, kk ) = conjg( a( kp, kk ) ) + r1 = real( a( kk, kk ),KIND=dp) + a( kk, kk ) = real( a( kp, kp ),KIND=dp) + a( kp, kp ) = r1 + if( kstep==2 ) then + a( k, k ) = real( a( k, k ),KIND=dp) + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + else + a( k, k ) = real( a( k, k ),KIND=dp) + if( kstep==2 )a( k-1, k-1 ) = real( a( k-1, k-1 ),KIND=dp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h + r1 = one / real( a( k, k ),KIND=dp) + call stdlib_zher( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_zdscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h + if( k>2 ) then + d = stdlib_dlapy2( real( a( k-1, k ),KIND=dp),aimag( a( k-1, k ) ) ) + + d22 = real( a( k-1, k-1 ),KIND=dp) / d + d11 = real( a( k, k ),KIND=dp) / d + tt = one / ( d11*d22-one ) + d12 = a( k-1, k ) / d + d = tt / d + do j = k - 2, 1, -1 + wkm1 = d*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = d*( d22*a( j, k )-d12*a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*conjg( wk ) -a( i, k-1 )*conjg( & + wkm1 ) + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + a( j, j ) = cmplx( real( a( j, j ),KIND=dp), zero,KIND=dp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 50 continue + ! if k > n, exit from loop + if( k>n )go to 90 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine only rowmax. + jmax = k - 1 + stdlib_izamax( imax-k, a( imax, k ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( a( imax, imax ),KIND=dp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZHETF2_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_zhetf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*) + ! ====================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: done, upper + integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p + real(dp) :: absakk, alpha, colmax, d, d11, d22, r1, dtemp, rowmax, tt, sfmin + complex(dp) :: d12, d21, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_izamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( ( max( absakk, colmax )==zero ) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=dp) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_izamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=dp) )1 )call stdlib_zswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! (2) swap and conjugate middle parts + do j = p + 1, k - 1 + t = conjg( a( j, k ) ) + a( j, k ) = conjg( a( p, j ) ) + a( p, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( p, k ) = conjg( a( p, k ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( k, k ),KIND=dp) + a( k, k ) = real( a( p, p ),KIND=dp) + a( p, p ) = r1 + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_zswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! (2) swap and conjugate middle parts + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( kp, kk ) = conjg( a( kp, kk ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( kk, kk ),KIND=dp) + a( kk, kk ) = real( a( kp, kp ),KIND=dp) + a( kp, kp ) = r1 + if( kstep==2 ) then + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=dp) + ! (5) swap row elements + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( real( a( k, k ),KIND=dp) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / real( a( k, k ),KIND=dp) + call stdlib_zher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_zdscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=dp) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + ! d = |a12| + d = stdlib_dlapy2( real( a( k-1, k ),KIND=dp),aimag( a( k-1, k ) ) ) + + d11 = real( a( k, k ) / d,KIND=dp) + d22 = real( a( k-1, k-1 ) / d,KIND=dp) + d12 = a( k-1, k ) / d + tt = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + ! compute d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j + wkm1 = tt*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = tt*( d22*a( j, k )-d12*a( j, k-1 ) ) + ! perform a rank-2 update of a(1:k-2,1:k-2) + do i = j, 1, -1 + a( i, j ) = a( i, j ) -( a( i, k ) / d )*conjg( wk ) -( a( i, k-1 ) & + / d )*conjg( wkm1 ) + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d + a( j, k-1 ) = wkm1 / d + ! (*) make sure that diagonal element of pivot is real + a( j, j ) = cmplx( real( a( j, j ),KIND=dp), zero,KIND=dp) + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = czero + a( k-1, k ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**h using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=dp) )1 )call stdlib_zswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! for both 1x1 and 2x2 pivots, interchange rows and + ! columns kk and kp in the trailing submatrix a(k:n,k:n) + if( kp/=kk ) then + ! (1) swap columnar parts + if( kp1 )call stdlib_zswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + else + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=dp) + if( kstep==2 )a( k+1, k+1 ) = real( a( k+1, k+1 ),KIND=dp) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of a now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / real( a( k, k ),KIND=dp) + call stdlib_zher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_zdscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=dp) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZHETF2_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**H is the conjugate transpose of U, and D is + !> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zhetf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ====================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: done, upper + integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p + real(dp) :: absakk, alpha, colmax, d, d11, d22, r1, dtemp, rowmax, tt, sfmin + complex(dp) :: d12, d21, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_izamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( ( max( absakk, colmax )==zero ) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=dp) + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_izamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=dp) )1 )call stdlib_zswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! (2) swap and conjugate middle parts + do j = p + 1, k - 1 + t = conjg( a( j, k ) ) + a( j, k ) = conjg( a( p, j ) ) + a( p, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( p, k ) = conjg( a( p, k ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( k, k ),KIND=dp) + a( k, k ) = real( a( p, p ),KIND=dp) + a( p, p ) = r1 + end if + ! for both 1x1 and 2x2 pivots, interchange rows and + ! columns kk and kp in the leading submatrix a(1:k,1:k) + if( kp/=kk ) then + ! (1) swap columnar parts + if( kp>1 )call stdlib_zswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! (2) swap and conjugate middle parts + do j = kp + 1, kk - 1 + t = conjg( a( j, kk ) ) + a( j, kk ) = conjg( a( kp, j ) ) + a( kp, j ) = t + end do + ! (3) swap and conjugate corner elements at row-col interserction + a( kp, kk ) = conjg( a( kp, kk ) ) + ! (4) swap diagonal elements at row-col intersection + r1 = real( a( kk, kk ),KIND=dp) + a( kk, kk ) = real( a( kp, kp ),KIND=dp) + a( kp, kp ) = r1 + if( kstep==2 ) then + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=dp) + ! (5) swap row elements + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + else + ! (*) make sure that diagonal element of pivot is real + a( k, k ) = real( a( k, k ),KIND=dp) + if( kstep==2 )a( k-1, k-1 ) = real( a( k-1, k-1 ),KIND=dp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( abs( real( a( k, k ),KIND=dp) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = one / real( a( k, k ),KIND=dp) + call stdlib_zher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_zdscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=dp) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zher( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + ! d = |a12| + d = stdlib_dlapy2( real( a( k-1, k ),KIND=dp),aimag( a( k-1, k ) ) ) + + d11 = real( a( k, k ) / d,KIND=dp) + d22 = real( a( k-1, k-1 ) / d,KIND=dp) + d12 = a( k-1, k ) / d + tt = one / ( d11*d22-one ) + do j = k - 2, 1, -1 + ! compute d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j + wkm1 = tt*( d11*a( j, k-1 )-conjg( d12 )*a( j, k ) ) + wk = tt*( d22*a( j, k )-d12*a( j, k-1 ) ) + ! perform a rank-2 update of a(1:k-2,1:k-2) + do i = j, 1, -1 + a( i, j ) = a( i, j ) -( a( i, k ) / d )*conjg( wk ) -( a( i, k-1 ) & + / d )*conjg( wkm1 ) + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d + a( j, k-1 ) = wkm1 / d + ! (*) make sure that diagonal element of pivot is real + a( j, j ) = cmplx( real( a( j, j ),KIND=dp), zero,KIND=dp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( a( k, k ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( a( imax, imax ),KIND=dp) )=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = one / real( a( k, k ),KIND=dp) + call stdlib_zher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_zdscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = real( a( k, k ),KIND=dp) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zher( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZHETRI: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> ZHETRF. + + pure subroutine stdlib_zhetri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp, kstep + real(dp) :: ak, akp1, d, t + complex(dp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=dp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zhemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_zdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=dp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = real( a( k, k ),KIND=dp) / t + akp1 = real( a( k+1, k+1 ),KIND=dp) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zhemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_zdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=dp) + a( k, k+1 ) = a( k, k+1 ) -stdlib_zdotc( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_zcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_zhemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -real( stdlib_zdotc( k-1, work, 1, a( 1, k+1 ),& + 1 ),KIND=dp) + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=dp) + ! compute column k of the inverse. + if( k ZHETRI_ROOK: computes the inverse of a complex Hermitian indefinite matrix + !> A using the factorization A = U*D*U**H or A = L*D*L**H computed by + !> ZHETRF_ROOK. + + pure subroutine stdlib_zhetri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp, kstep + real(dp) :: ak, akp1, d, t + complex(dp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 70 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=dp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zhemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_zdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=dp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( a( k, k+1 ) ) + ak = real( a( k, k ),KIND=dp) / t + akp1 = real( a( k+1, k+1 ),KIND=dp) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-one ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zhemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - real( stdlib_zdotc( k-1, work, 1, a( 1,k ), 1 ),& + KIND=dp) + a( k, k+1 ) = a( k, k+1 ) -stdlib_zdotc( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_zcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_zhemv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -real( stdlib_zdotc( k-1, work, 1, a( 1, k+1 ),& + 1 ),KIND=dp) + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k,1:k) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1) in the leading submatrix a(k+1:n,k+1:n) + ! (1) interchange rows and columns k and -ipiv(k) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + ! (2) interchange rows and columns k+1 and -ipiv(k+1) + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + do j = kp + 1, k - 1 + temp = conjg( a( j, k ) ) + a( j, k ) = conjg( a( kp, j ) ) + a( kp, j ) = temp + end do + a( kp, k ) = conjg( a( kp, k ) ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 70 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 80 continue + ! if k < 1, exit from loop. + if( k<1 )go to 120 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = one / real( a( k, k ),KIND=dp) + ! compute column k of the inverse. + if( k ZHETRS_3: solves a system of linear equations A * X = B with a complex + !> Hermitian matrix A using the factorization computed + !> by ZHETRF_RK or ZHETRF_BK: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_zhetrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*), e(*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + real(dp) :: s + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_ztrsm( 'L', 'U', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + s = real( cone,KIND=dp) / real( a( i, i ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / conjg( akm1k ) + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**h \ b) -> b [ u**h \ (d \ (u \p**t * b) ) ] + call stdlib_ztrsm( 'L', 'U', 'C', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (u**h \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**h. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ztrsm( 'L', 'L', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + s = real( cone,KIND=dp) / real( a( i, i ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( i, 1 ), ldb ) + else if( i b [ l**h \ (d \ (l \p**t * b) ) ] + call stdlib_ztrsm('L', 'L', 'C', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (l**h \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_zhetrs_3 + + !> Level 3 BLAS like routine for C in RFP Format. + !> ZHFRK: performs one of the Hermitian rank--k operations + !> C := alpha*A*A**H + beta*C, + !> or + !> C := alpha*A**H*A + beta*C, + !> where alpha and beta are real scalars, C is an n--by--n Hermitian + !> matrix and A is an n--by--k matrix in the first case and a k--by--n + !> matrix in the second case. + + pure subroutine stdlib_zhfrk( transr, uplo, trans, n, k, alpha, a, lda, beta,c ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: k, lda, n + character, intent(in) :: trans, transr, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: c(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, normaltransr, nisodd, notrans + integer(ilp) :: info, nrowa, j, nk, n1, n2 + complex(dp) :: calpha, cbeta + ! Intrinsic Functions + intrinsic :: max,cmplx + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( notrans ) then + nrowa = n + else + nrowa = k + end if + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( lda ZHPGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form, using packed storage. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by ZPPTRF. + + pure subroutine stdlib_zhpgst( itype, uplo, n, ap, bp, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, n + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(in) :: bp(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk + real(dp) :: ajj, akk, bjj, bkk + complex(dp) :: ct + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPGST', -info ) + return + end if + if( itype==1 ) then + if( upper ) then + ! compute inv(u**h)*a*inv(u) + ! j1 and jj are the indices of a(1,j) and a(j,j) + jj = 0 + do j = 1, n + j1 = jj + 1 + jj = jj + j + ! compute the j-th column of the upper triangle of a + ap( jj ) = real( ap( jj ),KIND=dp) + bjj = real( bp( jj ),KIND=dp) + call stdlib_ztpsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', j,bp, ap( j1 ), 1 & + ) + call stdlib_zhpmv( uplo, j-1, -cone, ap, bp( j1 ), 1, cone,ap( j1 ), 1 ) + + call stdlib_zdscal( j-1, one / bjj, ap( j1 ), 1 ) + ap( jj ) = ( ap( jj )-stdlib_zdotc( j-1, ap( j1 ), 1, bp( j1 ),1 ) ) / & + bjj + end do + else + ! compute inv(l)*a*inv(l**h) + ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1) + kk = 1 + do k = 1, n + k1k1 = kk + n - k + 1 + ! update the lower triangle of a(k:n,k:n) + akk = real( ap( kk ),KIND=dp) + bkk = real( bp( kk ),KIND=dp) + akk = akk / bkk**2 + ap( kk ) = akk + if( k ZHPTRF: computes the factorization of a complex Hermitian packed + !> matrix A using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_zhptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(dp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt + complex(dp) :: d12, d21, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**h using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( ap( kc+k-1 ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_izamax( k-1, ap( kc ), 1 ) + colmax = cabs1( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=dp) + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_izamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, cabs1( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( ap( kpc+imax-1 ),KIND=dp) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_zswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = conjg( ap( knc+j-1 ) ) + ap( knc+j-1 ) = conjg( ap( kx ) ) + ap( kx ) = t + end do + ap( kx+kk-1 ) = conjg( ap( kx+kk-1 ) ) + r1 = real( ap( knc+kk-1 ),KIND=dp) + ap( knc+kk-1 ) = real( ap( kpc+kp-1 ),KIND=dp) + ap( kpc+kp-1 ) = r1 + if( kstep==2 ) then + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=dp) + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + else + ap( kc+k-1 ) = real( ap( kc+k-1 ),KIND=dp) + if( kstep==2 )ap( kc-1 ) = real( ap( kc-1 ),KIND=dp) + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h + r1 = one / real( ap( kc+k-1 ),KIND=dp) + call stdlib_zhpr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_zdscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h + if( k>2 ) then + d = stdlib_dlapy2( real( ap( k-1+( k-1 )*k / 2 ),KIND=dp),aimag( ap( k-1+( & + k-1 )*k / 2 ) ) ) + d22 = real( ap( k-1+( k-2 )*( k-1 ) / 2 ),KIND=dp) / d + d11 = real( ap( k+( k-1 )*k / 2 ),KIND=dp) / d + tt = one / ( d11*d22-one ) + d12 = ap( k-1+( k-1 )*k / 2 ) / d + d = tt / d + do j = k - 2, 1, -1 + wkm1 = d*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-conjg( d12 )*ap( j+( k-1 )*k & + / 2 ) ) + wk = d*( d22*ap( j+( k-1 )*k / 2 )-d12*ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *conjg( wk ) -ap( i+( k-2 )*( k-1 ) / 2 )*conjg( wkm1 ) + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + ap( j+( j-1 )*j / 2 ) = cmplx( real( ap( j+( j-1 )*j / 2 ),KIND=dp), & + zero,KIND=dp) + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( ap( kc ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( abs( real( ap( kpc ),KIND=dp) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZHPTRI: computes the inverse of a complex Hermitian indefinite matrix + !> A in packed storage using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHPTRF. + + pure subroutine stdlib_zhptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + real(dp) :: ak, akp1, d, t + complex(dp) :: akkp1, temp + ! Intrinsic Functions + intrinsic :: abs,real,conjg + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = one / real( ap( kc+k-1 ),KIND=dp) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_zhpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kc ), 1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -real( stdlib_zdotc( k-1, work, 1, ap( kc ), 1 ),& + KIND=dp) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = abs( ap( kcnext+k-1 ) ) + ak = real( ap( kc+k-1 ),KIND=dp) / t + akp1 = real( ap( kcnext+k ),KIND=dp) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-one ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_zhpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kc ), 1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -real( stdlib_zdotc( k-1, work, 1, ap( kc ), 1 ),& + KIND=dp) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_zdotc( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_zcopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_zhpmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -real( stdlib_zdotc( k-1, work, 1, ap( kcnext & + ),1 ),KIND=dp) + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_zswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = conjg( ap( kc+j-1 ) ) + ap( kc+j-1 ) = conjg( ap( kx ) ) + ap( kx ) = temp + end do + ap( kc+kp-1 ) = conjg( ap( kc+kp-1 ) ) + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**h. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = one / real( ap( kc ),KIND=dp) + ! compute column k of the inverse. + if( k ZLA_GBAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_zla_gbamv( trans, m, n, kl, ku, alpha, ab, ldab, x,incx, beta, y, incy ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, ldab, m, n, kl, ku, trans + ! Array Arguments + complex(dp), intent(in) :: ab(ldab,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke + complex(dp) :: cdum + ! Intrinsic Functions + intrinsic :: max,abs,real,aimag,sign + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( kl<0 .or. kl>m-1 ) then + info = 4 + else if( ku<0 .or. ku>n-1 ) then + info = 5 + else if( ldab0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + kd = ku + 1 + ke = kl + 1 + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( kd+i-j, j ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = max( i-kl, 1 ), min( i+ku, lenx ) + temp = cabs1( ab( ke-i+j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_zla_gbamv + + !> ZLA_GBRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(dp) function stdlib_zla_gbrpvgrw( n, kl, ku, ncols, ab,ldab, afb, ldafb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, kl, ku, ncols, ldab, ldafb + ! Array Arguments + complex(dp), intent(in) :: ab(ldab,*), afb(ldafb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j, kd + real(dp) :: amax, umax, rpvgrw + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + rpvgrw = one + kd = ku + 1 + do j = 1, ncols + amax = zero + umax = zero + do i = max( j-ku, 1 ), min( j+kl, n ) + amax = max( cabs1( ab( kd+i-j, j ) ), amax ) + end do + do i = max( j-ku, 1 ), j + umax = max( cabs1( afb( kd+i-j, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_zla_gbrpvgrw = rpvgrw + end function stdlib_zla_gbrpvgrw + + !> ZLA_GEAMV: performs one of the matrix-vector operations + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> m by n matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_zla_geamv( trans, m, n, alpha, a, lda, x, incx, beta,y, incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, m, n + integer(ilp), intent(in) :: trans + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny + complex(dp) :: cdum + ! Intrinsic Functions + intrinsic :: max,abs,real,aimag,sign + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not.( ( trans==stdlib_ilatrans( 'N' ) ).or. ( trans==stdlib_ilatrans( 'T' ) )& + .or. ( trans==stdlib_ilatrans( 'C' ) ) ) ) then + info = 1 + else if( m<0 )then + info = 2 + else if( n<0 )then + info = 3 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( lenx - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( leny - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(m*n) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = 1, lenx + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + do j = 1, lenx + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if( trans==stdlib_ilatrans( 'N' ) )then + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = 1, lenx + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, leny + if ( beta == czero ) then + symb_zero = .true. + y( iy ) = czero + else if ( y( iy ) == czero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= czero ) then + jx = kx + do j = 1, lenx + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( jx ) == czero .or. temp == czero ) + + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero ) y( iy ) =y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_zla_geamv + + !> ZLA_GERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + pure real(dp) function stdlib_zla_gerpvgrw( n, ncols, a, lda, af,ldaf ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, ncols, lda, ldaf + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: amax, umax, rpvgrw + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: max,min,abs,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + rpvgrw = one + do j = 1, ncols + amax = zero + umax = zero + do i = 1, n + amax = max( cabs1( a( i, j ) ), amax ) + end do + do i = 1, j + umax = max( cabs1( af( i, j ) ), umax ) + end do + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + stdlib_zla_gerpvgrw = rpvgrw + end function stdlib_zla_gerpvgrw + + !> ZLA_SYAMV performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_zla_heamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n, uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: max,abs,sign,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) )then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if (.not.symb_zero)y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_zla_heamv + + !> ZLA_LIN_BERR: computes componentwise relative backward error from + !> the formula + !> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) + !> where abs(Z) is the componentwise absolute value of the matrix + !> or vector Z. + + pure subroutine stdlib_zla_lin_berr( n, nz, nrhs, res, ayb, berr ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n, nz, nrhs + ! Array Arguments + real(dp), intent(in) :: ayb(n,nrhs) + real(dp), intent(out) :: berr(nrhs) + complex(dp), intent(in) :: res(n,nrhs) + ! ===================================================================== + ! Local Scalars + real(dp) :: tmp,safe1 + integer(ilp) :: i, j + complex(dp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + complex(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! adding safe1 to the numerator guards against spuriously zero + ! residuals. a similar safeguard is in the cla_yyamv routine used + ! to compute ayb. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (nz+1)*safe1 + do j = 1, nrhs + berr(j) = zero + do i = 1, n + if (ayb(i,j) /= zero) then + tmp = (safe1 + cabs1(res(i,j)))/ayb(i,j) + berr(j) = max( berr(j), tmp ) + end if + ! if ayb is exactly 0.0_dp (and if computed by cla_yyamv), then we know + ! the true residual also must be exactly zero. + end do + end do + end subroutine stdlib_zla_lin_berr + + !> ZLA_PORPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(dp) function stdlib_zla_porpvgrw( uplo, ncols, a, lda, af,ldaf, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: ncols, lda, ldaf + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: amax, umax, rpvgrw + logical(lk) :: upper + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,max,min,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + ! stdlib_dpotrf will have factored only the ncolsxncols leading minor, so + ! we restrict the growth search to that minor and use only the first + ! 2*ncols workspace entries. + rpvgrw = one + do i = 1, 2*ncols + work( i ) = zero + end do + ! find the max magnitude entry of each column. + if ( upper ) then + do j = 1, ncols + do i = 1, j + work( ncols+j ) =max( cabs1( a( i, j ) ), work( ncols+j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( ncols+j ) =max( cabs1( a( i, j ) ), work( ncols+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of the factor in + ! af. no pivoting, so no permutations. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do j = 1, ncols + do i = 1, j + work( j ) = max( cabs1( af( i, j ) ), work( j ) ) + end do + end do + else + do j = 1, ncols + do i = j, ncols + work( j ) = max( cabs1( af( i, j ) ), work( j ) ) + end do + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( stdlib_lsame( 'UPPER', uplo ) ) then + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( ncols+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_zla_porpvgrw = rpvgrw + end function stdlib_zla_porpvgrw + + !> ZLA_SYAMV: performs the matrix-vector operation + !> y := alpha*abs(A)*abs(x) + beta*abs(y), + !> where alpha and beta are scalars, x and y are vectors and A is an + !> n by n symmetric matrix. + !> This function is primarily used in calculating error bounds. + !> To protect against underflow during evaluation, components in + !> the resulting vector are perturbed away from zero by (N+1) + !> times the underflow threshold. To prevent unnecessarily large + !> errors for block-structure embedded in general matrices, + !> "symbolically" zero components are not perturbed. A zero + !> entry is considered "symbolic" if all multiplications involved + !> in computing that entry have at least one zero multiplicand. + + subroutine stdlib_zla_syamv( uplo, n, alpha, a, lda, x, incx, beta, y,incy ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: alpha, beta + integer(ilp), intent(in) :: incx, incy, lda, n + integer(ilp), intent(in) :: uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + real(dp), intent(inout) :: y(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: symb_zero + real(dp) :: temp, safe1 + integer(ilp) :: i, info, iy, j, jx, kx, ky + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: max,abs,sign,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( uplo/=stdlib_ilauplo( 'U' ) .and.uplo/=stdlib_ilauplo( 'L' ) )then + info = 1 + else if( n<0 )then + info = 2 + else if( lda0 )then + kx = 1 + else + kx = 1 - ( n - 1 )*incx + end if + if( incy>0 )then + ky = 1 + else + ky = 1 - ( n - 1 )*incy + end if + ! set safe1 essentially to be the underflow threshold times the + ! number of additions in each row. + safe1 = stdlib_dlamch( 'SAFE MINIMUM' ) + safe1 = (n+1)*safe1 + ! form y := alpha*abs(a)*abs(x) + beta*abs(y). + ! the o(n^2) symb_zero tests could be replaced by o(n) queries to + ! the inexact flag. still doesn't help change the iteration order + ! to per-column. + iy = ky + if ( incx==1 ) then + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( j ) )*temp + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + else + if ( uplo == stdlib_ilauplo( 'U' ) ) then + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + else + do i = 1, n + if ( beta == zero ) then + symb_zero = .true. + y( iy ) = zero + else if ( y( iy ) == zero ) then + symb_zero = .true. + else + symb_zero = .false. + y( iy ) = beta * abs( y( iy ) ) + end if + jx = kx + if ( alpha /= zero ) then + do j = 1, i + temp = cabs1( a( i, j ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + do j = i+1, n + temp = cabs1( a( j, i ) ) + symb_zero = symb_zero .and.( x( j ) == zero .or. temp == zero ) + y( iy ) = y( iy ) + alpha*cabs1( x( jx ) )*temp + jx = jx + incx + end do + end if + if ( .not.symb_zero )y( iy ) = y( iy ) + sign( safe1, y( iy ) ) + iy = iy + incy + end do + end if + end if + return + end subroutine stdlib_zla_syamv + + !> ZLA_WWADDW: adds a vector W into a doubled-single vector (X, Y). + !> This works for all extant IBM's hex and binary floating point + !> arithmetic, but not for decimal. + + pure subroutine stdlib_zla_wwaddw( n, x, y, w ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(inout) :: x(*), y(*) + complex(dp), intent(in) :: w(*) + ! ===================================================================== + ! Local Scalars + complex(dp) :: s + integer(ilp) :: i + ! Executable Statements + do 10 i = 1, n + s = x(i) + w(i) + s = (s + s) - s + y(i) = ((x(i) - s) + w(i)) + y(i) + x(i) = s + 10 continue + return + end subroutine stdlib_zla_wwaddw + + !> ZLACGV: conjugates a complex vector of length N. + + pure subroutine stdlib_zlacgv( n, x, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + ! Array Arguments + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ioff + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( incx==1 ) then + do i = 1, n + x( i ) = conjg( x( i ) ) + end do + else + ioff = 1 + if( incx<0 )ioff = 1 - ( n-1 )*incx + do i = 1, n + x( ioff ) = conjg( x( ioff ) ) + ioff = ioff + incx + end do + end if + return + end subroutine stdlib_zlacgv + + !> ZLACN2: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + pure subroutine stdlib_zlacn2( n, v, x, est, kase, isave ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: est + ! Array Arguments + integer(ilp), intent(inout) :: isave(3) + complex(dp), intent(out) :: v(*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + ! Local Scalars + integer(ilp) :: i, jlast + real(dp) :: absxi, altsgn, estold, safmin, temp + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag + ! Executable Statements + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + if( kase==0 ) then + do i = 1, n + x( i ) = cmplx( one / real( n,KIND=dp),KIND=dp) + end do + kase = 1 + isave( 1 ) = 1 + return + end if + go to ( 20, 40, 70, 90, 120 )isave( 1 ) + ! ................ entry (isave( 1 ) = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 130 + end if + est = stdlib_dzsum1( n, x, 1 ) + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=dp) / absxi,aimag( x( i ) ) / absxi,KIND=dp) + + else + x( i ) = cone + end if + end do + kase = 2 + isave( 1 ) = 2 + return + ! ................ entry (isave( 1 ) = 2) + ! first iteration. x has been overwritten by ctrans(a)*x. + 40 continue + isave( 2 ) = stdlib_izmax1( n, x, 1 ) + isave( 3 ) = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = czero + end do + x( isave( 2 ) ) = cone + kase = 1 + isave( 1 ) = 3 + return + ! ................ entry (isave( 1 ) = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_zcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_dzsum1( n, v, 1 ) + ! test for cycling. + if( est<=estold )go to 100 + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=dp) / absxi,aimag( x( i ) ) / absxi,KIND=dp) + + else + x( i ) = cone + end if + end do + kase = 2 + isave( 1 ) = 4 + return + ! ................ entry (isave( 1 ) = 4) + ! x has been overwritten by ctrans(a)*x. + 90 continue + jlast = isave( 2 ) + isave( 2 ) = stdlib_izmax1( n, x, 1 ) + if( ( abs( x( jlast ) )/=abs( x( isave( 2 ) ) ) ) .and.( isave( 3 )est ) then + call stdlib_zcopy( n, x, 1, v, 1 ) + est = temp + end if + 130 continue + kase = 0 + return + end subroutine stdlib_zlacn2 + + !> ZLACON: estimates the 1-norm of a square, complex matrix A. + !> Reverse communication is used for evaluating matrix-vector products. + + subroutine stdlib_zlacon( n, v, x, est, kase ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(inout) :: kase + integer(ilp), intent(in) :: n + real(dp), intent(inout) :: est + ! Array Arguments + complex(dp), intent(out) :: v(n) + complex(dp), intent(inout) :: x(n) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + ! Local Scalars + integer(ilp) :: i, iter, j, jlast, jump + real(dp) :: absxi, altsgn, estold, safmin, temp + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag + ! Save Statement + save + ! Executable Statements + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + if( kase==0 ) then + do i = 1, n + x( i ) = cmplx( one / real( n,KIND=dp),KIND=dp) + end do + kase = 1 + jump = 1 + return + end if + go to ( 20, 40, 70, 90, 120 )jump + ! ................ entry (jump = 1) + ! first iteration. x has been overwritten by a*x. + 20 continue + if( n==1 ) then + v( 1 ) = x( 1 ) + est = abs( v( 1 ) ) + ! ... quit + go to 130 + end if + est = stdlib_dzsum1( n, x, 1 ) + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=dp) / absxi,aimag( x( i ) ) / absxi,KIND=dp) + + else + x( i ) = cone + end if + end do + kase = 2 + jump = 2 + return + ! ................ entry (jump = 2) + ! first iteration. x has been overwritten by ctrans(a)*x. + 40 continue + j = stdlib_izmax1( n, x, 1 ) + iter = 2 + ! main loop - iterations 2,3,...,itmax. + 50 continue + do i = 1, n + x( i ) = czero + end do + x( j ) = cone + kase = 1 + jump = 3 + return + ! ................ entry (jump = 3) + ! x has been overwritten by a*x. + 70 continue + call stdlib_zcopy( n, x, 1, v, 1 ) + estold = est + est = stdlib_dzsum1( n, v, 1 ) + ! test for cycling. + if( est<=estold )go to 100 + do i = 1, n + absxi = abs( x( i ) ) + if( absxi>safmin ) then + x( i ) = cmplx( real( x( i ),KIND=dp) / absxi,aimag( x( i ) ) / absxi,KIND=dp) + + else + x( i ) = cone + end if + end do + kase = 2 + jump = 4 + return + ! ................ entry (jump = 4) + ! x has been overwritten by ctrans(a)*x. + 90 continue + jlast = j + j = stdlib_izmax1( n, x, 1 ) + if( ( abs( x( jlast ) )/=abs( x( j ) ) ) .and.( iterest ) then + call stdlib_zcopy( n, x, 1, v, 1 ) + est = temp + end if + 130 continue + kase = 0 + return + end subroutine stdlib_zlacon + + !> ZLACP2: copies all or part of a real two-dimensional matrix A to a + !> complex matrix B. + + pure subroutine stdlib_zlacp2( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_zlacp2 + + !> ZLACPY: copies all or part of a two-dimensional matrix A to another + !> matrix B. + + pure subroutine stdlib_zlacpy( uplo, m, n, a, lda, b, ldb ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldb, m, n + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( j, m ) + b( i, j ) = a( i, j ) + end do + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + do j = 1, n + do i = j, m + b( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = 1, m + b( i, j ) = a( i, j ) + end do + end do + end if + return + end subroutine stdlib_zlacpy + + !> ZLACRM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by N and complex; B is N by N and real; + !> C is M by N and complex. + + pure subroutine stdlib_zlacrm( m, n, a, lda, b, ldb, c, ldc, rwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + ! Array Arguments + real(dp), intent(in) :: b(ldb,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + ! quick return if possible. + if( ( m==0 ) .or. ( n==0 ) )return + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = real( a( i, j ),KIND=dp) + end do + end do + l = m*n + 1 + call stdlib_dgemm( 'N', 'N', m, n, n, one, rwork, m, b, ldb, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = rwork( l+( j-1 )*m+i-1 ) + end do + end do + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = aimag( a( i, j ) ) + end do + end do + call stdlib_dgemm( 'N', 'N', m, n, n, one, rwork, m, b, ldb, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = cmplx( real( c( i, j ),KIND=dp),rwork( l+( j-1 )*m+i-1 ),KIND=dp) + + end do + end do + return + end subroutine stdlib_zlacrm + + !> ZLACRT: performs the operation + !> ( c s )( x ) ==> ( x ) + !> ( -s c )( y ) ( y ) + !> where c and s are complex and the vectors x and y are complex. + + pure subroutine stdlib_zlacrt( n, cx, incx, cy, incy, c, s ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + complex(dp), intent(in) :: c, s + ! Array Arguments + complex(dp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(dp) :: ctemp + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 )go to 20 + ! code for unequal increments or equal increments not equal to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + ctemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - s*cx( ix ) + cx( ix ) = ctemp + ix = ix + incx + iy = iy + incy + end do + return + ! code for both increments equal to 1 + 20 continue + do i = 1, n + ctemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - s*cx( i ) + cx( i ) = ctemp + end do + return + end subroutine stdlib_zlacrt + + !> ZLADIV: := X / Y, where X and Y are complex. The computation of X / Y + !> will not overflow on an intermediary step unless the results + !> overflows. + + pure complex(dp) function stdlib_zladiv( x, y ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: x, y + ! ===================================================================== + ! Local Scalars + real(dp) :: zi, zr + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + call stdlib_dladiv( real( x,KIND=dp), aimag( x ), real( y,KIND=dp), aimag( y ), zr,zi ) + + stdlib_zladiv = cmplx( zr, zi,KIND=dp) + return + end function stdlib_zladiv + + !> ZLAED8: merges the two sets of eigenvalues together into a single + !> sorted set. Then it tries to deflate the size of the problem. + !> There are two ways in which deflation can occur: when two or more + !> eigenvalues are close together or if there is a tiny element in the + !> Z vector. For each such occurrence the order of the related secular + !> equation problem is reduced by one. + + pure subroutine stdlib_zlaed8( k, n, qsiz, q, ldq, d, rho, cutpnt, z, dlamda,q2, ldq2, w, & + indxp, indx, indxq, perm, givptr,givcol, givnum, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: cutpnt, ldq, ldq2, n, qsiz + integer(ilp), intent(out) :: givptr, info, k + real(dp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(out) :: givcol(2,*), indx(*), indxp(*), perm(*) + integer(ilp), intent(inout) :: indxq(*) + real(dp), intent(inout) :: d(*), z(*) + real(dp), intent(out) :: dlamda(*), givnum(2,*), w(*) + complex(dp), intent(inout) :: q(ldq,*) + complex(dp), intent(out) :: q2(ldq2,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: mone = -1.0_dp + + ! Local Scalars + integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2 + real(dp) :: c, eps, s, t, tau, tol + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -2 + else if( qsizn ) then + info = -8 + else if( ldq2n )go to 90 + if( rho*abs( z( j ) )<=tol ) then + ! deflate due to small z component. + k2 = k2 - 1 + indxp( k2 ) = j + else + ! check if eigenvalues are close enough to allow deflation. + s = z( jlam ) + c = z( j ) + ! find sqrt(a**2+b**2) without overflow or + ! destructive underflow. + tau = stdlib_dlapy2( c, s ) + t = d( j ) - d( jlam ) + c = c / tau + s = -s / tau + if( abs( t*c*s )<=tol ) then + ! deflation is possible. + z( j ) = tau + z( jlam ) = zero + ! record the appropriate givens rotation + givptr = givptr + 1 + givcol( 1, givptr ) = indxq( indx( jlam ) ) + givcol( 2, givptr ) = indxq( indx( j ) ) + givnum( 1, givptr ) = c + givnum( 2, givptr ) = s + call stdlib_zdrot( qsiz, q( 1, indxq( indx( jlam ) ) ), 1,q( 1, indxq( indx( j ) & + ) ), 1, c, s ) + t = d( jlam )*c*c + d( j )*s*s + d( j ) = d( jlam )*s*s + d( j )*c*c + d( jlam ) = t + k2 = k2 - 1 + i = 1 + 80 continue + if( k2+i<=n ) then + if( d( jlam ) ZLAESY: computes the eigendecomposition of a 2-by-2 symmetric matrix + !> ( ( A, B );( B, C ) ) + !> provided the norm of the matrix of eigenvectors is larger than + !> some threshold value. + !> RT1 is the eigenvalue of larger absolute value, and RT2 of + !> smaller absolute value. If the eigenvectors are computed, then + !> on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence + !> [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] + !> [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] + + pure subroutine stdlib_zlaesy( a, b, c, rt1, rt2, evscal, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: a, b, c + complex(dp), intent(out) :: cs1, evscal, rt1, rt2, sn1 + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1_dp + + + + + + ! Local Scalars + real(dp) :: babs, evnorm, tabs, z + complex(dp) :: s, t, tmp + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + ! special case: the matrix is actually diagonal. + ! to avoid divide by zero later, we treat this case separately. + if( abs( b )==zero ) then + rt1 = a + rt2 = c + if( abs( rt1 )zero )t = z*sqrt( ( t / z )**2+( b / z )**2 ) + ! compute the two eigenvalues. rt1 and rt2 are exchanged + ! if necessary so that rt1 will have the greater magnitude. + rt1 = s + t + rt2 = s - t + if( abs( rt1 )one ) then + t = tabs*sqrt( ( one / tabs )**2+( sn1 / tabs )**2 ) + else + t = sqrt( cone+sn1*sn1 ) + end if + evnorm = abs( t ) + if( evnorm>=thresh ) then + evscal = cone / t + cs1 = evscal + sn1 = sn1*evscal + else + evscal = zero + end if + end if + return + end subroutine stdlib_zlaesy + + !> ZLAEV2: computes the eigendecomposition of a 2-by-2 Hermitian matrix + !> [ A B ] + !> [ CONJG(B) C ]. + !> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + !> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + !> eigenvector for RT1, giving the decomposition + !> [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] + !> [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. + + pure subroutine stdlib_zlaev2( a, b, c, rt1, rt2, cs1, sn1 ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(out) :: cs1, rt1, rt2 + complex(dp), intent(in) :: a, b, c + complex(dp), intent(out) :: sn1 + ! ===================================================================== + + + ! Local Scalars + real(dp) :: t + complex(dp) :: w + ! Intrinsic Functions + intrinsic :: abs,real,conjg + ! Executable Statements + if( abs( b )==zero ) then + w = one + else + w = conjg( b ) / abs( b ) + end if + call stdlib_dlaev2( real( a,KIND=dp), abs( b ), real( c,KIND=dp), rt1, rt2, cs1, t ) + + sn1 = w*t + return + end subroutine stdlib_zlaev2 + + !> ZLAG2C: converts a COMPLEX*16 matrix, SA, to a COMPLEX matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> ZLAG2C checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_zlag2c( m, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, m, n + ! Array Arguments + complex(sp), intent(out) :: sa(ldsa,*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: rmax + ! Intrinsic Functions + intrinsic :: real,aimag + ! Executable Statements + rmax = stdlib_slamch( 'O' ) + do j = 1, n + do i = 1, m + if( ( real( a( i, j ),KIND=dp)<-rmax ) .or.( real( a( i, j ),KIND=dp)>rmax ) & + .or.( aimag( a( i, j ) )<-rmax ) .or.( aimag( a( i, j ) )>rmax ) ) then + info = 1 + go to 30 + end if + sa( i, j ) = a( i, j ) + end do + end do + info = 0 + 30 continue + return + end subroutine stdlib_zlag2c + + !> ZLAGTM: performs a matrix-vector product of the form + !> B := alpha * A * X + beta * B + !> where A is a tridiagonal matrix of order N, B and X are N by NRHS + !> matrices, and alpha and beta are real scalars, each of which may be + !> 0., 1., or -1. + + pure subroutine stdlib_zlagtm( trans, n, nrhs, alpha, dl, d, du, x, ldx, beta,b, ldb ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(in) :: alpha, beta + ! Array Arguments + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: d(*), dl(*), du(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( n==0 )return + ! multiply b by beta if beta/=1. + if( beta==zero ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = zero + end do + end do + else if( beta==-one ) then + do j = 1, nrhs + do i = 1, n + b( i, j ) = -b( i, j ) + end do + end do + end if + if( alpha==one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b + a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + dl( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + dl( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + du( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! compute b := b + a**t * x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + d( 1 )*x( 1, j ) +dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) + du( n-1 )*x( n-1, j ) +d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + du( i-1 )*x( i-1, j ) +d( i )*x( i, j ) + dl( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'C' ) ) then + ! compute b := b + a**h * x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) + conjg( d( 1 ) )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) + conjg( d( 1 ) )*x( 1, j ) +conjg( dl( 1 ) )*x( 2, & + j ) + b( n, j ) = b( n, j ) + conjg( du( n-1 ) )*x( n-1, j ) + conjg( d( n ) )*x(& + n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) + conjg( du( i-1 ) )*x( i-1, j ) + conjg( d( i ) )& + *x( i, j ) + conjg( dl( i ) )*x( i+1, j ) + end do + end if + end do + end if + else if( alpha==-one ) then + if( stdlib_lsame( trans, 'N' ) ) then + ! compute b := b - a*x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -du( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - dl( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - dl( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - du( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! compute b := b - a**t *x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - d( 1 )*x( 1, j ) -dl( 1 )*x( 2, j ) + b( n, j ) = b( n, j ) - du( n-1 )*x( n-1, j ) -d( n )*x( n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - du( i-1 )*x( i-1, j ) -d( i )*x( i, j ) - dl( i & + )*x( i+1, j ) + end do + end if + end do + else if( stdlib_lsame( trans, 'C' ) ) then + ! compute b := b - a**h *x + do j = 1, nrhs + if( n==1 ) then + b( 1, j ) = b( 1, j ) - conjg( d( 1 ) )*x( 1, j ) + else + b( 1, j ) = b( 1, j ) - conjg( d( 1 ) )*x( 1, j ) -conjg( dl( 1 ) )*x( 2, & + j ) + b( n, j ) = b( n, j ) - conjg( du( n-1 ) )*x( n-1, j ) - conjg( d( n ) )*x(& + n, j ) + do i = 2, n - 1 + b( i, j ) = b( i, j ) - conjg( du( i-1 ) )*x( i-1, j ) - conjg( d( i ) )& + *x( i, j ) - conjg( dl( i ) )*x( i+1, j ) + end do + end if + end do + end if + end if + return + end subroutine stdlib_zlagtm + + !> ZLAHEF: computes a partial factorization of a complex Hermitian + !> matrix A using the Bunch-Kaufman diagonal pivoting method. The + !> partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_zlahef( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(dp) :: absakk, alpha, colmax, r1, rowmax, t + complex(dp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_izamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( a( k, k ),KIND=dp) + else + ! ============================================================ + ! begin pivot search + ! case(1) + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! begin pivot search along imax row + ! copy column imax to column kw-1 of w and update it + call stdlib_zcopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + w( imax, kw-1 ) = real( a( imax, imax ),KIND=dp) + call stdlib_zcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_zlacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + jmax = stdlib_izamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) ) + end if + ! case(2) + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + ! case(3) + else if( abs( real( w( imax, kw-1 ),KIND=dp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_zcopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + ! case(4) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + ! end pivot search along imax row + end if + ! end pivot search + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=dp) + call stdlib_zcopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + call stdlib_zlacgv( kk-1-kp, a( kp, kp+1 ), lda ) + if( kp>1 )call stdlib_zcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(4)) + r1 = one / real( a( k, k ),KIND=dp) + call stdlib_zdscal( k-1, r1, a( 1, k ), 1 ) + ! (2) conjugate column w(kw) + call stdlib_zlacgv( k-1, w( 1, kw ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( conj(d21)*( d11 ) d21*( -1 ) ) + ! ( ( -1 ) ( d22 ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = t/d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0, since in 2x2 pivot case(4) + ! |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=dp)-one ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = conjg( d21 )*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_zlacgv( k-1, w( 1, kw ), 1 ) + call stdlib_zlacgv( k-2, w( 1, kw-1 ), 1 ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=dp) + call stdlib_zgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=dp) + end do + ! update the rectangular superdiagonal block + call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_zswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + kstep = 1 + ! copy column k of a to column k of w and update it + w( k, k ) = real( a( k, k ),KIND=dp) + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! begin pivot search along imax row + ! copy column imax to column k+1 of w and update it + call stdlib_zcopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_zlacgv( imax-k, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( a( imax, imax ),KIND=dp) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + ! case(3) + else if( abs( real( w( imax, k+1 ),KIND=dp) )>=alpha*rowmax )then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_zcopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + ! case(4) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + ! end pivot search along imax row + end if + ! end pivot search + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=dp) + call stdlib_zcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_zlacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_zswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_zswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=dp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_zcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_zswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_zlahef + + !> ZLAHEF_RK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> ZLAHEF_RK is an auxiliary routine called by ZHETRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_zlahef_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*), e(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, k, kk, kkw, kp, kstep, kw, p + real(dp) :: absakk, alpha, colmax, dtemp, r1, rowmax, t, sfmin + complex(dp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_dlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = czero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 )call stdlib_zcopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 ) + w( k, kw ) = real( a( k, k ),KIND=dp) + if( k1 ) then + imax = stdlib_izamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( w( k, kw ),KIND=dp) + if( k>1 )call stdlib_zcopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_zcopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),1 ) + + w( imax, kw-1 ) = real( a( imax, imax ),KIND=dp) + call stdlib_zcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_zlacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + itemp = stdlib_izamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,kw-1 ),KIND=dp) )1 )call stdlib_zcopy( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! interchange rows k and p in the last k+1 to n columns of a + ! (columns k and k-1 of a for 2-by-2 pivot will be + ! later overwritten). interchange rows k and p + ! in last kkw to nb columns of w. + if( k1 )call stdlib_zcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(3)) + ! handle division by a small number + t = real( a( k, k ),KIND=dp) + if( abs( t )>=sfmin ) then + r1 = one / t + call stdlib_zdscal( k-1, r1, a( 1, k ), 1 ) + else + do ii = 1, k-1 + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(kw) + call stdlib_zlacgv( k-1, w( 1, kw ), 1 ) + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! handle division by a small number. (note: order of + ! operations is important) + ! = ( t*(( d11 )/conj(d21)) t*(( -1 )/d21 ) ) + ! ( (( -1 ) ) (( d22 ) ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0 in 2x2 pivot case(4), + ! since |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=dp)-one ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /d21 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /conjg( d21 ) ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = czero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = czero + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_zlacgv( k-1, w( 1, kw ), 1 ) + call stdlib_zlacgv( k-2, w( 1, kw-1 ), 1 ) + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=dp) + call stdlib_zgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=dp) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 (note that conjg(w) is actually stored) + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update column k of w + w( k, k ) = real( a( k, k ),KIND=dp) + if( k1 ) then + call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, 1 ), & + ldw, cone, w( k, k ), 1 ) + w( k, k ) = real( w( k, k ),KIND=dp) + end if + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( w( k, k ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), lda, w( & + imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( w( imax, k+1 ),KIND=dp) + end if + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_izamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,k+1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,k+1 ),KIND=dp) )1 )call stdlib_zswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + call stdlib_zswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw ) + end if + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=dp) + call stdlib_zcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_zlacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_zswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_zswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=dp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_zcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=sfmin ) then + r1 = one / t + call stdlib_zdscal( n-k, r1, a( k+1, k ), 1 ) + else + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(k) + call stdlib_zlacgv( n-k, w( k+1, k ), 1 ) + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2 + ! block d(k:k+1,k:k+1) in columns k and k+1 of a. + ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit + ! block and not stored. + ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1) + ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) = + ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) ) + if( k ZLAHEF_ROOK: computes a partial factorization of a complex Hermitian + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting + !> method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) + !> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**H denotes the conjugate transpose of U. + !> ZLAHEF_ROOK is an auxiliary routine called by ZHETRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_zlahef_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, jp1, jp2, k, kk, kkw, kp, kstep, kw, & + p + real(dp) :: absakk, alpha, colmax, dtemp, r1, rowmax, t, sfmin + complex(dp) :: d11, d21, d22, z + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_dlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 (note that conjg(w) is actually stored) + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 )call stdlib_zcopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 ) + w( k, kw ) = real( a( k, k ),KIND=dp) + if( k1 ) then + imax = stdlib_izamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + a( k, k ) = real( w( k, kw ),KIND=dp) + if( k>1 )call stdlib_zcopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! begin pivot search + ! case(1) + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_zcopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),1 ) + + w( imax, kw-1 ) = real( a( imax, imax ),KIND=dp) + call stdlib_zcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + call stdlib_zlacgv( k-imax, w( imax+1, kw-1 ), 1 ) + if( k1 ) then + itemp = stdlib_izamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,kw-1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,kw-1 ),KIND=dp) )1 )call stdlib_zcopy( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + ! interchange rows k and p in the last k+1 to n columns of a + ! (columns k and k-1 of a for 2-by-2 pivot will be + ! later overwritten). interchange rows k and p + ! in last kkw to nb columns of w. + if( k1 )call stdlib_zcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k1 ) then + ! (note: no need to check if a(k,k) is not zero, + ! since that was ensured earlier in pivot search: + ! case a(k,k) = 0 falls into 2x2 pivot case(3)) + ! handle division by a small number + t = real( a( k, k ),KIND=dp) + if( abs( t )>=sfmin ) then + r1 = one / t + call stdlib_zdscal( k-1, r1, a( 1, k ), 1 ) + else + do ii = 1, k-1 + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(kw) + call stdlib_zlacgv( k-1, w( 1, kw ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold + ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2 + ! block d(k-1:k,k-1:k) in columns k-1 and k of a. + ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit + ! block and not stored) + ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw) + ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) = + ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) ) + if( k>2 ) then + ! factor out the columns of the inverse of 2-by-2 pivot + ! block d, so that each column contains 1, to reduce the + ! number of flops when we multiply panel + ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1). + ! d**(-1) = ( d11 cj(d21) )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = + ! ( (-d21) ( d11 ) ) + ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * + ! * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = + ! ( ( -1 ) ( d11/conj(d21) ) ) + ! = 1/(|d21|**2) * 1/(d22*d11-1) * + ! * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = ( (t/conj(d21))*( d11 ) (t/d21)*( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! handle division by a small number. (note: order of + ! operations is important) + ! = ( t*(( d11 )/conj(d21)) t*(( -1 )/d21 ) ) + ! ( (( -1 ) ) (( d22 ) ) ), + ! where d11 = d22/d21, + ! d22 = d11/conj(d21), + ! d21 = d21, + ! t = 1/(d22*d11-1). + ! (note: no need to check for division by zero, + ! since that was ensured earlier in pivot search: + ! (a) d21 != 0 in 2x2 pivot case(4), + ! since |d21| should be larger than |d11| and |d22|; + ! (b) (d22*d11 - 1) != 0, since from (a), + ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / conjg( d21 ) + d22 = w( k-1, kw-1 ) / d21 + t = one / ( real( d11*d22,KIND=dp)-one ) + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /d21 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /conjg( d21 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + ! (2) conjugate columns w(kw) and w(kw-1) + call stdlib_zlacgv( k-1, w( 1, kw ), 1 ) + call stdlib_zlacgv( k-2, w( 1, kw-1 ), 1 ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h + ! computing blocks of nb columns at a time (note that conjg(w) is + ! actually stored) + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + a( jj, jj ) = real( a( jj, jj ),KIND=dp) + call stdlib_zgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + a( jj, jj ) = real( a( jj, jj ),KIND=dp) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in of rows in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows j and jp2 + ! (or j and jp2, and j+1 and jp1) at each step j + kstep = 1 + jp1 = 1 + ! (here, j is a diagonal index) + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + ! (here, j is a diagonal index) + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_zswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = jj + 1 + if( kstep==2 .and. jp1/=jj .and. j<=n )call stdlib_zswap( n-j+1, a( jp1, j ), & + lda, a( jj, j ), lda ) + if( j=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update column k of w + w( k, k ) = real( a( k, k ),KIND=dp) + if( k1 ) then + call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, 1 ), & + ldw, cone, w( k, k ), 1 ) + w( k, k ) = real( w( k, k ),KIND=dp) + end if + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = abs( real( w( k, k ),KIND=dp) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), lda, w( & + imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + w( imax, k+1 ) = real( w( imax, k+1 ),KIND=dp) + end if + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_izamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! case(2) + ! equivalent to testing for + ! abs( real( w( imax,k+1 ),KIND=dp) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( abs( real( w( imax,k+1 ),KIND=dp) )1 )call stdlib_zswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + call stdlib_zswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw ) + end if + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = real( a( kk, kk ),KIND=dp) + call stdlib_zcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + call stdlib_zlacgv( kp-kk-1, a( kp, kk+1 ), lda ) + if( kp1 )call stdlib_zswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_zswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! (1) store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + ! (note: no need to use for hermitian matrix + ! a( k, k ) = real( w( k, k),KIND=dp) to separately copy diagonal + ! element d(k,k) from w (potentially saves only one load)) + call stdlib_zcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=sfmin ) then + r1 = one / t + call stdlib_zdscal( n-k, r1, a( k+1, k ), 1 ) + else + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / t + end do + end if + ! (2) conjugate column w(k) + call stdlib_zlacgv( n-k, w( k+1, k ), 1 ) + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2 + ! block d(k:k+1,k:k+1) in columns k and k+1 of a. + ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit + ! block and not stored. + ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1) + ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) = + ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) ) + if( k=1 )call stdlib_zswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = jj -1 + if( kstep==2 .and. jp1/=jj .and. j>=1 )call stdlib_zswap( j, a( jp1, 1 ), lda, a(& + jj, 1 ), lda ) + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_zlahef_rook + + !> ZLAIC1: applies one step of incremental condition estimation in + !> its simplest version: + !> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j + !> lower triangular matrix L, such that + !> twonorm(L*x) = sest + !> Then ZLAIC1 computes sestpr, s, c such that + !> the vector + !> [ s*x ] + !> xhat = [ c ] + !> is an approximate singular vector of + !> [ L 0 ] + !> Lhat = [ w**H gamma ] + !> in the sense that + !> twonorm(Lhat*xhat) = sestpr. + !> Depending on JOB, an estimate for the largest or smallest singular + !> value is computed. + !> Note that [s c]**H and sestpr**2 is an eigenpair of the system + !> diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] + !> [ conjg(gamma) ] + !> where alpha = x**H * w. + + pure subroutine stdlib_zlaic1( job, j, x, sest, w, gamma, sestpr, s, c ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: j, job + real(dp), intent(in) :: sest + real(dp), intent(out) :: sestpr + complex(dp), intent(out) :: c, s + complex(dp), intent(in) :: gamma + ! Array Arguments + complex(dp), intent(in) :: w(j), x(j) + ! ===================================================================== + + + ! Local Scalars + real(dp) :: absalp, absest, absgam, b, eps, norma, s1, s2, scl, t, test, tmp, zeta1, & + zeta2 + complex(dp) :: alpha, cosine, sine + ! Intrinsic Functions + intrinsic :: abs,conjg,max,sqrt + ! Executable Statements + eps = stdlib_dlamch( 'EPSILON' ) + alpha = stdlib_zdotc( j, x, 1, w, 1 ) + absalp = abs( alpha ) + absgam = abs( gamma ) + absest = abs( sest ) + if( job==1 ) then + ! estimating largest singular value + ! special cases + if( sest==zero ) then + s1 = max( absgam, absalp ) + if( s1==zero ) then + s = zero + c = one + sestpr = zero + else + s = alpha / s1 + c = gamma / s1 + tmp = real( sqrt( s*conjg( s )+c*conjg( c ) ),KIND=dp) + s = s / tmp + c = c / tmp + sestpr = s1*tmp + end if + return + else if( absgam<=eps*absest ) then + s = one + c = zero + tmp = max( absest, absalp ) + s1 = absest / tmp + s2 = absalp / tmp + sestpr = tmp*sqrt( s1*s1+s2*s2 ) + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = one + c = zero + sestpr = s2 + else + s = zero + c = one + sestpr = s1 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + scl = sqrt( one+tmp*tmp ) + sestpr = s2*scl + s = ( alpha / s2 ) / scl + c = ( gamma / s2 ) / scl + else + tmp = s2 / s1 + scl = sqrt( one+tmp*tmp ) + sestpr = s1*scl + s = ( alpha / s1 ) / scl + c = ( gamma / s1 ) / scl + end if + return + else + ! normal case + zeta1 = absalp / absest + zeta2 = absgam / absest + b = ( one-zeta1*zeta1-zeta2*zeta2 )*half + c = zeta1*zeta1 + if( b>zero ) then + t = real( c / ( b+sqrt( b*b+c ) ),KIND=dp) + else + t = real( sqrt( b*b+c ) - b,KIND=dp) + end if + sine = -( alpha / absest ) / t + cosine = -( gamma / absest ) / ( one+t ) + tmp = real( sqrt( sine * conjg( sine )+ cosine * conjg( cosine ) ),KIND=dp) + + s = sine / tmp + c = cosine / tmp + sestpr = sqrt( t+one )*absest + return + end if + else if( job==2 ) then + ! estimating smallest singular value + ! special cases + if( sest==zero ) then + sestpr = zero + if( max( absgam, absalp )==zero ) then + sine = one + cosine = zero + else + sine = -conjg( gamma ) + cosine = conjg( alpha ) + end if + s1 = max( abs( sine ), abs( cosine ) ) + s = sine / s1 + c = cosine / s1 + tmp = real( sqrt( s*conjg( s )+c*conjg( c ) ),KIND=dp) + s = s / tmp + c = c / tmp + return + else if( absgam<=eps*absest ) then + s = zero + c = one + sestpr = absgam + return + else if( absalp<=eps*absest ) then + s1 = absgam + s2 = absest + if( s1<=s2 ) then + s = zero + c = one + sestpr = s1 + else + s = one + c = zero + sestpr = s2 + end if + return + else if( absest<=eps*absalp .or. absest<=eps*absgam ) then + s1 = absgam + s2 = absalp + if( s1<=s2 ) then + tmp = s1 / s2 + scl = sqrt( one+tmp*tmp ) + sestpr = absest*( tmp / scl ) + s = -( conjg( gamma ) / s2 ) / scl + c = ( conjg( alpha ) / s2 ) / scl + else + tmp = s2 / s1 + scl = sqrt( one+tmp*tmp ) + sestpr = absest / scl + s = -( conjg( gamma ) / s1 ) / scl + c = ( conjg( alpha ) / s1 ) / scl + end if + return + else + ! normal case + zeta1 = absalp / absest + zeta2 = absgam / absest + norma = max( one+zeta1*zeta1+zeta1*zeta2,zeta1*zeta2+zeta2*zeta2 ) + ! see if root is closer to zero or to one + test = one + two*( zeta1-zeta2 )*( zeta1+zeta2 ) + if( test>=zero ) then + ! root is close to zero, compute directly + b = ( zeta1*zeta1+zeta2*zeta2+one )*half + c = zeta2*zeta2 + t = real( c / ( b+sqrt( abs( b*b-c ) ) ),KIND=dp) + sine = ( alpha / absest ) / ( one-t ) + cosine = -( gamma / absest ) / t + sestpr = sqrt( t+four*eps*eps*norma )*absest + else + ! root is closer to one, shift by that amount + b = ( zeta2*zeta2+zeta1*zeta1-one )*half + c = zeta1*zeta1 + if( b>=zero ) then + t = -c / ( b+sqrt( b*b+c ) ) + else + t = b - sqrt( b*b+c ) + end if + sine = -( alpha / absest ) / t + cosine = -( gamma / absest ) / ( one+t ) + sestpr = sqrt( one+t+four*eps*eps*norma )*absest + end if + tmp = real( sqrt( sine * conjg( sine )+ cosine * conjg( cosine ) ),KIND=dp) + + s = sine / tmp + c = cosine / tmp + return + end if + end if + return + end subroutine stdlib_zlaic1 + + !> ZLAPMR: rearranges the rows of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. + !> If FORWRD = .TRUE., forward permutation: + !> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. + !> If FORWRD = .FALSE., backward permutation: + !> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. + + pure subroutine stdlib_zlapmr( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, in, j, jj + complex(dp) :: temp + ! Executable Statements + if( m<=1 )return + do i = 1, m + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, m + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do jj = 1, n + temp = x( j, jj ) + x( j, jj ) = x( in, jj ) + x( in, jj ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, m + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do jj = 1, n + temp = x( i, jj ) + x( i, jj ) = x( j, jj ) + x( j, jj ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_zlapmr + + !> ZLAPMT: rearranges the columns of the M by N matrix X as specified + !> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. + !> If FORWRD = .TRUE., forward permutation: + !> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. + !> If FORWRD = .FALSE., backward permutation: + !> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. + + pure subroutine stdlib_zlapmt( forwrd, m, n, x, ldx, k ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: forwrd + integer(ilp), intent(in) :: ldx, m, n + ! Array Arguments + integer(ilp), intent(inout) :: k(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ii, in, j + complex(dp) :: temp + ! Executable Statements + if( n<=1 )return + do i = 1, n + k( i ) = -k( i ) + end do + if( forwrd ) then + ! forward permutation + do i = 1, n + if( k( i )>0 )go to 40 + j = i + k( j ) = -k( j ) + in = k( j ) + 20 continue + if( k( in )>0 )go to 40 + do ii = 1, m + temp = x( ii, j ) + x( ii, j ) = x( ii, in ) + x( ii, in ) = temp + end do + k( in ) = -k( in ) + j = in + in = k( in ) + go to 20 + 40 continue + end do + else + ! backward permutation + do i = 1, n + if( k( i )>0 )go to 80 + k( i ) = -k( i ) + j = k( i ) + 60 continue + if( j==i )go to 80 + do ii = 1, m + temp = x( ii, i ) + x( ii, i ) = x( ii, j ) + x( ii, j ) = temp + end do + k( j ) = -k( j ) + j = k( j ) + go to 60 + 80 continue + end do + end if + return + end subroutine stdlib_zlapmt + + !> ZLAQGB: equilibrates a general M by N band matrix A with KL + !> subdiagonals and KU superdiagonals using the row and scaling factors + !> in the vectors R and C. + + pure subroutine stdlib_zlaqgb( m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd,amax, equed ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: kl, ku, ldab, m, n + real(dp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(in) :: c(*), r(*) + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*ab( ku+1+i-j, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = max( 1, j-ku ), min( m, j+kl ) + ab( ku+1+i-j, j ) = cj*r( i )*ab( ku+1+i-j, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_zlaqgb + + !> ZLAQGE: equilibrates a general M by N matrix A using the row and + !> column scaling factors in the vectors R and C. + + pure subroutine stdlib_zlaqge( m, n, a, lda, r, c, rowcnd, colcnd, amax,equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + integer(ilp), intent(in) :: lda, m, n + real(dp), intent(in) :: amax, colcnd, rowcnd + ! Array Arguments + real(dp), intent(in) :: c(*), r(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( rowcnd>=thresh .and. amax>=small .and. amax<=large )then + ! no row scaling + if( colcnd>=thresh ) then + ! no column scaling + equed = 'N' + else + ! column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*a( i, j ) + end do + end do + equed = 'C' + end if + else if( colcnd>=thresh ) then + ! row scaling, no column scaling + do j = 1, n + do i = 1, m + a( i, j ) = r( i )*a( i, j ) + end do + end do + equed = 'R' + else + ! row and column scaling + do j = 1, n + cj = c( j ) + do i = 1, m + a( i, j ) = cj*r( i )*a( i, j ) + end do + end do + equed = 'B' + end if + return + end subroutine stdlib_zlaqge + + !> ZLAQHB: equilibrates a Hermitian band matrix A + !> using the scaling factors in the vector S. + + pure subroutine stdlib_zlaqhb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(out) :: s(*) + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j - 1 + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + ab( kd+1, j ) = cj*cj*real( ab( kd+1, j ),KIND=dp) + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + ab( 1, j ) = cj*cj*real( ab( 1, j ),KIND=dp) + do i = j + 1, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_zlaqhb + + !> ZLAQHE: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_zlaqhe( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j - 1 + a( i, j ) = cj*s( i )*a( i, j ) + end do + a( j, j ) = cj*cj*real( a( j, j ),KIND=dp) + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + a( j, j ) = cj*cj*real( a( j, j ),KIND=dp) + do i = j + 1, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_zlaqhe + + !> ZLAQHP: equilibrates a Hermitian matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_zlaqhp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j - 1 + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + ap( jc+j-1 ) = cj*cj*real( ap( jc+j-1 ),KIND=dp) + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + ap( jc ) = cj*cj*real( ap( jc ),KIND=dp) + do i = j + 1, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_zlaqhp + + !> Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1: sets v to a + !> scalar multiple of the first column of the product + !> (*) K = (H - s1*I)*(H - s2*I) + !> scaling to avoid overflows and most underflows. + !> This is useful for starting double implicit shift bulges + !> in the QR algorithm. + + pure subroutine stdlib_zlaqr1( n, h, ldh, s1, s2, v ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + complex(dp), intent(in) :: s1, s2 + integer(ilp), intent(in) :: ldh, n + ! Array Arguments + complex(dp), intent(in) :: h(ldh,*) + complex(dp), intent(out) :: v(*) + ! ================================================================ + ! Parameters + real(dp), parameter :: rzero = 0.0_dp + + + ! Local Scalars + complex(dp) :: cdum, h21s, h31s + real(dp) :: s + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! quick return if possible + if( n/=2 .and. n/=3 ) then + return + end if + if( n==2 ) then + s = cabs1( h( 1, 1 )-s2 ) + cabs1( h( 2, 1 ) ) + if( s==rzero ) then + v( 1 ) = czero + v( 2 ) = czero + else + h21s = h( 2, 1 ) / s + v( 1 ) = h21s*h( 1, 2 ) + ( h( 1, 1 )-s1 )*( ( h( 1, 1 )-s2 ) / s ) + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-s1-s2 ) + end if + else + s = cabs1( h( 1, 1 )-s2 ) + cabs1( h( 2, 1 ) ) +cabs1( h( 3, 1 ) ) + if( s==czero ) then + v( 1 ) = czero + v( 2 ) = czero + v( 3 ) = czero + else + h21s = h( 2, 1 ) / s + h31s = h( 3, 1 ) / s + v( 1 ) = ( h( 1, 1 )-s1 )*( ( h( 1, 1 )-s2 ) / s ) +h( 1, 2 )*h21s + h( 1, 3 )& + *h31s + v( 2 ) = h21s*( h( 1, 1 )+h( 2, 2 )-s1-s2 ) + h( 2, 3 )*h31s + v( 3 ) = h31s*( h( 1, 1 )+h( 3, 3 )-s1-s2 ) + h21s*h( 3, 2 ) + end if + end if + end subroutine stdlib_zlaqr1 + + !> ZLAQSB: equilibrates a symmetric band matrix A using the scaling + !> factors in the vector S. + + pure subroutine stdlib_zlaqsb( uplo, n, kd, ab, ldab, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored in band format. + do j = 1, n + cj = s( j ) + do i = max( 1, j-kd ), j + ab( kd+1+i-j, j ) = cj*s( i )*ab( kd+1+i-j, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, min( n, j+kd ) + ab( 1+i-j, j ) = cj*s( i )*ab( 1+i-j, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_zlaqsb + + !> ZLAQSP: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_zlaqsp( uplo, n, ap, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j, jc + real(dp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = 1, j + ap( jc+i-1 ) = cj*s( i )*ap( jc+i-1 ) + end do + jc = jc + j + end do + else + ! lower triangle of a is stored. + jc = 1 + do j = 1, n + cj = s( j ) + do i = j, n + ap( jc+i-j ) = cj*s( i )*ap( jc+i-j ) + end do + jc = jc + n - j + 1 + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_zlaqsp + + !> ZLAQSY: equilibrates a symmetric matrix A using the scaling factors + !> in the vector S. + + pure subroutine stdlib_zlaqsy( uplo, n, a, lda, s, scond, amax, equed ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(out) :: equed + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: amax, scond + ! Array Arguments + real(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: thresh = 0.1e+0_dp + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: cj, large, small + ! Executable Statements + ! quick return if possible + if( n<=0 ) then + equed = 'N' + return + end if + ! initialize large and small. + small = stdlib_dlamch( 'SAFE MINIMUM' ) / stdlib_dlamch( 'PRECISION' ) + large = one / small + if( scond>=thresh .and. amax>=small .and. amax<=large ) then + ! no equilibration + equed = 'N' + else + ! replace a by diag(s) * a * diag(s). + if( stdlib_lsame( uplo, 'U' ) ) then + ! upper triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = 1, j + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + else + ! lower triangle of a is stored. + do j = 1, n + cj = s( j ) + do i = j, n + a( i, j ) = cj*s( i )*a( i, j ) + end do + end do + end if + equed = 'Y' + end if + return + end subroutine stdlib_zlaqsy + + !> ZLAR1V: computes the (scaled) r-th column of the inverse of + !> the sumbmatrix in rows B1 through BN of the tridiagonal matrix + !> L D L**T - sigma I. When sigma is close to an eigenvalue, the + !> computed vector is an accurate eigenvector. Usually, r corresponds + !> to the index where the eigenvector is largest in magnitude. + !> The following steps accomplish this computation : + !> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, + !> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, + !> (c) Computation of the diagonal elements of the inverse of + !> L D L**T - sigma I by combining the above transforms, and choosing + !> r as the index where the diagonal of the inverse is (one of the) + !> largest in magnitude. + !> (d) Computation of the (scaled) r-th column of the inverse using the + !> twisted factorization obtained by combining the top part of the + !> the stationary and the bottom part of the progressive transform. + + pure subroutine stdlib_zlar1v( n, b1, bn, lambda, d, l, ld, lld,pivmin, gaptol, z, wantnc, & + negcnt, ztz, mingma,r, isuppz, nrminv, resid, rqcorr, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantnc + integer(ilp), intent(in) :: b1, bn, n + integer(ilp), intent(out) :: negcnt + integer(ilp), intent(inout) :: r + real(dp), intent(in) :: gaptol, lambda, pivmin + real(dp), intent(out) :: mingma, nrminv, resid, rqcorr, ztz + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*) + real(dp), intent(in) :: d(*), l(*), ld(*), lld(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: sawnan1, sawnan2 + integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2 + real(dp) :: dminus, dplus, eps, s, tmp + ! Intrinsic Functions + intrinsic :: abs,real + ! Executable Statements + eps = stdlib_dlamch( 'PRECISION' ) + if( r==0 ) then + r1 = b1 + r2 = bn + else + r1 = r + r2 = r + end if + ! storage for lplus + indlpl = 0 + ! storage for uminus + indumn = n + inds = 2*n + 1 + indp = 3*n + 1 + if( b1==1 ) then + work( inds ) = zero + else + work( inds+b1-1 ) = lld( b1-1 ) + end if + ! compute the stationary transform (using the differential form) + ! until the index r2. + sawnan1 = .false. + neg1 = 0 + s = work( inds+b1-1 ) - lambda + do i = b1, r1 - 1 + dplus = d( i ) + s + work( indlpl+i ) = ld( i ) / dplus + if(dplus ZLAR2V: applies a vector of complex plane rotations with real cosines + !> from both sides to a sequence of 2-by-2 complex Hermitian matrices, + !> defined by the elements of the vectors x, y and z. For i = 1,2,...,n + !> ( x(i) z(i) ) := + !> ( conjg(z(i)) y(i) ) + !> ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) + !> ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) + + pure subroutine stdlib_zlar2v( n, x, y, z, incx, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, n + ! Array Arguments + real(dp), intent(in) :: c(*) + complex(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: x(*), y(*), z(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix + real(dp) :: ci, sii, sir, t1i, t1r, t5, t6, xi, yi, zii, zir + complex(dp) :: si, t2, t3, t4, zi + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag + ! Executable Statements + ix = 1 + ic = 1 + do i = 1, n + xi = real( x( ix ),KIND=dp) + yi = real( y( ix ),KIND=dp) + zi = z( ix ) + zir = real( zi,KIND=dp) + zii = aimag( zi ) + ci = c( ic ) + si = s( ic ) + sir = real( si,KIND=dp) + sii = aimag( si ) + t1r = sir*zir - sii*zii + t1i = sir*zii + sii*zir + t2 = ci*zi + t3 = t2 - conjg( si )*xi + t4 = conjg( t2 ) + si*yi + t5 = ci*xi + t1r + t6 = ci*yi - t1r + x( ix ) = ci*t5 + ( sir*real( t4,KIND=dp)+sii*aimag( t4 ) ) + y( ix ) = ci*t6 - ( sir*real( t3,KIND=dp)-sii*aimag( t3 ) ) + z( ix ) = ci*t3 + conjg( si )*cmplx( t6, t1i,KIND=dp) + ix = ix + incx + ic = ic + incc + end do + return + end subroutine stdlib_zlar2v + + !> ZLARCM: performs a very simple matrix-matrix multiplication: + !> C := A * B, + !> where A is M by M and real; B is M by N and complex; + !> C is M by N and complex. + + pure subroutine stdlib_zlarcm( m, n, a, lda, b, ldb, c, ldc, rwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldb, ldc, m, n + ! Array Arguments + real(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: b(ldb,*) + complex(dp), intent(out) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + ! quick return if possible. + if( ( m==0 ) .or. ( n==0 ) )return + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = real( b( i, j ),KIND=dp) + end do + end do + l = m*n + 1 + call stdlib_dgemm( 'N', 'N', m, n, m, one, a, lda, rwork, m, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = rwork( l+( j-1 )*m+i-1 ) + end do + end do + do j = 1, n + do i = 1, m + rwork( ( j-1 )*m+i ) = aimag( b( i, j ) ) + end do + end do + call stdlib_dgemm( 'N', 'N', m, n, m, one, a, lda, rwork, m, zero,rwork( l ), m ) + + do j = 1, n + do i = 1, m + c( i, j ) = cmplx( real( c( i, j ),KIND=dp),rwork( l+( j-1 )*m+i-1 ),KIND=dp) + + end do + end do + return + end subroutine stdlib_zlarcm + + !> ZLARF: applies a complex elementary reflector H to a complex M-by-N + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H, supply conjg(tau) instead + !> tau. + + pure subroutine stdlib_zlarf( side, m, n, v, incv, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, ldc, m, n + complex(dp), intent(in) :: tau + ! Array Arguments + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(in) :: v(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: applyleft + integer(ilp) :: i, lastv, lastc + ! Executable Statements + applyleft = stdlib_lsame( side, 'L' ) + lastv = 0 + lastc = 0 + if( tau/=czero ) then + ! set up variables for scanning v. lastv begins pointing to the end + ! of v. + if( applyleft ) then + lastv = m + else + lastv = n + end if + if( incv>0 ) then + i = 1 + (lastv-1) * incv + else + i = 1 + end if + ! look for the last non-czero row in v. + do while( lastv>0 .and. v( i )==czero ) + lastv = lastv - 1 + i = i - incv + end do + if( applyleft ) then + ! scan for the last non-czero column in c(1:lastv,:). + lastc = stdlib_ilazlc(lastv, n, c, ldc) + else + ! scan for the last non-czero row in c(:,1:lastv). + lastc = stdlib_ilazlr(m, lastv, c, ldc) + end if + end if + ! note that lastc.eq.0_dp renders the blas operations null; no special + ! case is needed at this level. + if( applyleft ) then + ! form h * c + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastv,1:lastc)**h * v(1:lastv,1) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', lastv, lastc, cone,c, ldc, v, incv, & + czero, work, 1 ) + ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**h + call stdlib_zgerc( lastv, lastc, -tau, v, incv, work, 1, c, ldc ) + end if + else + ! form c * h + if( lastv>0 ) then + ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1) + call stdlib_zgemv( 'NO TRANSPOSE', lastc, lastv, cone, c, ldc,v, incv, czero, & + work, 1 ) + ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**h + call stdlib_zgerc( lastc, lastv, -tau, work, 1, v, incv, c, ldc ) + end if + end if + return + end subroutine stdlib_zlarf + + !> ZLARFB: applies a complex block reflector H or its transpose H**H to a + !> complex M-by-N matrix C, from either the left or the right. + + pure subroutine stdlib_zlarfb( side, trans, direct, storev, m, n, k, v, ldv,t, ldt, c, ldc, & + work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(in) :: t(ldt,*), v(ldv,*) + complex(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'C' + else + transt = 'N' + end if + if( stdlib_lsame( storev, 'C' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 ) (first k rows) + ! ( v2 ) + ! where v1 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v = (c1**h * v1 + c2**h * v2) (stored in work) + ! w := c1**h + do j = 1, k + call stdlib_zcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_zlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v, & + ldv, work, ldwork ) + if( m>k ) then + ! w := w + c2**h * v2 + call stdlib_zgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', n,k, m-k, cone, & + c( k+1, 1 ), ldc,v( k+1, 1 ), ldv, cone, work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ztrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v * w**h + if( m>k ) then + ! c2 := c2 - v2 * w**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',m-k, n, k, -cone, & + v( k+1, 1 ), ldv, work,ldwork, cone, c( k+1, 1 ), ldc ) + end if + ! w := w * v1**h + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v, ldv, work, ldwork ) + ! c1 := c1 - w**h + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_zcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1 + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v, & + ldv, work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,cone, c( 1, k+& + 1 ), ldc, v( k+1, 1 ), ldv,cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ztrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v**h + if( n>k ) then + ! c2 := c2 - w * v2**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,n-k, k, -cone, & + work, ldwork, v( k+1, 1 ),ldv, cone, c( 1, k+1 ), ldc ) + end if + ! w := w * v1**h + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v, ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 ) + ! ( v2 ) (last k rows) + ! where v2 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v = (c1**h * v1 + c2**h * v2) (stored in work) + ! w := c2**h + do j = 1, k + call stdlib_zcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_zlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v( m-& + k+1, 1 ), ldv, work, ldwork ) + if( m>k ) then + ! w := w + c1**h * v1 + call stdlib_zgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', n,k, m-k, cone, & + c, ldc, v, ldv, cone, work,ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ztrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v * w**h + if( m>k ) then + ! c1 := c1 - v1 * w**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',m-k, n, k, -cone, & + v, ldv, work, ldwork,cone, c, ldc ) + end if + ! w := w * v2**h + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v( m-k+1, 1 ), ldv, work,ldwork ) + ! c2 := c2 - w**h + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) -conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v = (c1*v1 + c2*v2) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_zcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2 + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v( n-& + k+1, 1 ), ldv, work, ldwork ) + if( n>k ) then + ! w := w + c1 * v1 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, k, n-k,cone, c, ldc, & + v, ldv, cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ztrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v**h + if( n>k ) then + ! c1 := c1 - w * v1**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,n-k, k, -cone, & + work, ldwork, v, ldv, cone,c, ldc ) + end if + ! w := w * v2**h + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v( n-k+1, 1 ), ldv, work,ldwork ) + ! c2 := c2 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + else if( stdlib_lsame( storev, 'R' ) ) then + if( stdlib_lsame( direct, 'F' ) ) then + ! let v = ( v1 v2 ) (v1: first k columns) + ! where v1 is unit upper triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v**h = (c1**h * v1**h + c2**h * v2**h) (stored in work) + ! w := c1**h + do j = 1, k + call stdlib_zcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_zlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v1**h + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v, ldv, work, ldwork ) + if( m>k ) then + ! w := w + c2**h * v2**h + call stdlib_zgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', n, k, m-k, & + cone,c( k+1, 1 ), ldc, v( 1, k+1 ), ldv, cone,work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ztrmm( 'RIGHT', 'UPPER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v**h * w**h + if( m>k ) then + ! c2 := c2 - v2**h * w**h + call stdlib_zgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', m-k, n, k, & + -cone,v( 1, k+1 ), ldv, work, ldwork, cone,c( k+1, 1 ), ldc ) + end if + ! w := w * v1 + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v, & + ldv, work, ldwork ) + ! c1 := c1 - w**h + do j = 1, k + do i = 1, n + c( j, i ) = c( j, i ) - conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v**h = (c1*v1**h + c2*v2**h) (stored in work) + ! w := c1 + do j = 1, k + call stdlib_zcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v1**h + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v, ldv, work, ldwork ) + if( n>k ) then + ! w := w + c2 * v2**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,k, n-k, cone, & + c( 1, k+1 ), ldc,v( 1, k+1 ), ldv, cone, work, ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ztrmm( 'RIGHT', 'UPPER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c2 := c2 - w * v2 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-cone, work, & + ldwork, v( 1, k+1 ), ldv, cone,c( 1, k+1 ), ldc ) + end if + ! w := w * v1 + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v, & + ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + end if + else + ! let v = ( v1 v2 ) (v2: last k columns) + ! where v2 is unit lower triangular. + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c where c = ( c1 ) + ! ( c2 ) + ! w := c**h * v**h = (c1**h * v1**h + c2**h * v2**h) (stored in work) + ! w := c2**h + do j = 1, k + call stdlib_zcopy( n, c( m-k+j, 1 ), ldc, work( 1, j ), 1 ) + call stdlib_zlacgv( n, work( 1, j ), 1 ) + end do + ! w := w * v2**h + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', n, k, cone,& + v( 1, m-k+1 ), ldv, work,ldwork ) + if( m>k ) then + ! w := w + c1**h * v1**h + call stdlib_zgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', n, k, m-k, & + cone, c,ldc, v, ldv, cone, work, ldwork ) + end if + ! w := w * t**h or w * t + call stdlib_ztrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k,cone, t, ldt, & + work, ldwork ) + ! c := c - v**h * w**h + if( m>k ) then + ! c1 := c1 - v1**h * w**h + call stdlib_zgemm( 'CONJUGATE TRANSPOSE','CONJUGATE TRANSPOSE', m-k, n, k, & + -cone, v,ldv, work, ldwork, cone, c, ldc ) + end if + ! w := w * v2 + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', n,k, cone, v( 1, & + m-k+1 ), ldv, work, ldwork ) + ! c2 := c2 - w**h + do j = 1, k + do i = 1, n + c( m-k+j, i ) = c( m-k+j, i ) -conjg( work( i, j ) ) + end do + end do + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h where c = ( c1 c2 ) + ! w := c * v**h = (c1*v1**h + c2*v2**h) (stored in work) + ! w := c2 + do j = 1, k + call stdlib_zcopy( m, c( 1, n-k+j ), 1, work( 1, j ), 1 ) + end do + ! w := w * v2**h + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', m, k, cone,& + v( 1, n-k+1 ), ldv, work,ldwork ) + if( n>k ) then + ! w := w + c1 * v1**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m,k, n-k, cone, & + c, ldc, v, ldv, cone, work,ldwork ) + end if + ! w := w * t or w * t**h + call stdlib_ztrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k,cone, t, ldt, & + work, ldwork ) + ! c := c - w * v + if( n>k ) then + ! c1 := c1 - w * v1 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, n-k, k,-cone, work, & + ldwork, v, ldv, cone, c, ldc ) + end if + ! w := w * v2 + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'UNIT', m,k, cone, v( 1, & + n-k+1 ), ldv, work, ldwork ) + ! c1 := c1 - w + do j = 1, k + do i = 1, m + c( i, n-k+j ) = c( i, n-k+j ) - work( i, j ) + end do + end do + end if + end if + end if + return + end subroutine stdlib_zlarfb + + !> ZLARFB_GETT: applies a complex Householder block reflector H from the + !> left to a complex (K+M)-by-N "triangular-pentagonal" matrix + !> composed of two block matrices: an upper trapezoidal K-by-N matrix A + !> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored + !> in the array B. The block reflector H is stored in a compact + !> WY-representation, where the elementary reflectors are in the + !> arrays A, B and T. See Further Details section. + + pure subroutine stdlib_zlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: ident + integer(ilp), intent(in) :: k, lda, ldb, ldt, ldwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(in) :: t(ldt,*) + complex(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnotident + integer(ilp) :: i, j + ! Executable Statements + ! quick return if possible + if( m<0 .or. n<=0 .or. k==0 .or. k>n )return + lnotident = .not.stdlib_lsame( ident, 'I' ) + ! ------------------------------------------------------------------ + ! first step. computation of the column block 2: + ! ( a2 ) := h * ( a2 ) + ! ( b2 ) ( b2 ) + ! ------------------------------------------------------------------ + if( n>k ) then + ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n) + ! into w2=work(1:k, 1:n-k) column-by-column. + do j = 1, n-k + call stdlib_zcopy( k, a( 1, k+j ), 1, work( 1, j ), 1 ) + end do + if( lnotident ) then + ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2, + ! v1 is not an identy matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_ztrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork ) + + end if + ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2 + ! v2 stored in b1. + if( m>0 ) then + call stdlib_zgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1, k+1 ), ldb, cone, & + work, ldwork ) + end if + ! col2_(4) compute w2: = t * w2, + ! t is upper-triangular. + call stdlib_ztrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork ) + ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2, + ! v2 stored in b1. + if( m>0 ) then + call stdlib_zgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1, & + k+1 ), ldb ) + end if + if( lnotident ) then + ! col2_(6) compute w2: = v1 * w2 = a1 * w2, + ! v1 is not an identity matrix, but unit lower-triangular, + ! v1 stored in a1 (diagonal ones are not stored). + call stdlib_ztrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork ) + + end if + ! col2_(7) compute a2: = a2 - w2 = + ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k), + ! column-by-column. + do j = 1, n-k + do i = 1, k + a( i, k+j ) = a( i, k+j ) - work( i, j ) + end do + end do + end if + ! ------------------------------------------------------------------ + ! second step. computation of the column block 1: + ! ( a1 ) := h * ( a1 ) + ! ( b1 ) ( 0 ) + ! ------------------------------------------------------------------ + ! col1_(1) compute w1: = a1. copy the upper-triangular + ! a1 = a(1:k, 1:k) into the upper-triangular + ! w1 = work(1:k, 1:k) column-by-column. + do j = 1, k + call stdlib_zcopy( j, a( 1, j ), 1, work( 1, j ), 1 ) + end do + ! set the subdiagonal elements of w1 to zero column-by-column. + do j = 1, k - 1 + do i = j + 1, k + work( i, j ) = czero + end do + end do + if( lnotident ) then + ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_ztrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork ) + end if + ! col1_(3) compute w1: = t * w1, + ! t is upper-triangular, + ! w1 is upper-triangular with zeroes below the diagonal. + call stdlib_ztrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork ) + ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1, + ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal. + if( m>0 ) then + call stdlib_ztrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb ) + end if + if( lnotident ) then + ! col1_(5) compute w1: = v1 * w1 = a1 * w1, + ! v1 is not an identity matrix, but unit lower-triangular + ! v1 stored in a1 (diagonal ones are not stored), + ! w1 is upper-triangular on input with zeroes below the diagonal, + ! and square on output. + call stdlib_ztrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork ) + ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k) + ! column-by-column. a1 is upper-triangular on input. + ! if ident, a1 is square on output, and w1 is square, + ! if not ident, a1 is upper-triangular on output, + ! w1 is upper-triangular. + ! col1_(6)_a compute elements of a1 below the diagonal. + do j = 1, k - 1 + do i = j + 1, k + a( i, j ) = - work( i, j ) + end do + end do + end if + ! col1_(6)_b compute elements of a1 on and above the diagonal. + do j = 1, k + do i = 1, j + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + return + end subroutine stdlib_zlarfb_gett + + !> ZLARFG: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, with beta real, and x is an + !> (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + !> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + pure subroutine stdlib_zlarfg( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + complex(dp), intent(inout) :: alpha + complex(dp), intent(out) :: tau + ! Array Arguments + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(dp) :: alphi, alphr, beta, rsafmn, safmin, xnorm + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_dznrm2( n-1, x, incx ) + alphr = real( alpha,KIND=dp) + alphi = aimag( alpha ) + if( xnorm==zero .and. alphi==zero ) then + ! h = i + tau = zero + else + ! general case + beta = -sign( stdlib_dlapy3( alphr, alphi, xnorm ), alphr ) + safmin = stdlib_dlamch( 'S' ) / stdlib_dlamch( 'E' ) + rsafmn = one / safmin + knt = 0 + if( abs( beta ) ZLARFGP: generates a complex elementary reflector H of order n, such + !> that + !> H**H * ( alpha ) = ( beta ), H**H * H = I. + !> ( x ) ( 0 ) + !> where alpha and beta are scalars, beta is real and non-negative, and + !> x is an (n-1)-element complex vector. H is represented in the form + !> H = I - tau * ( 1 ) * ( 1 v**H ) , + !> ( v ) + !> where tau is a complex scalar and v is a complex (n-1)-element + !> vector. Note that H is not hermitian. + !> If the elements of x are all zero and alpha is real, then tau = 0 + !> and H is taken to be the unit matrix. + + subroutine stdlib_zlarfgp( n, alpha, x, incx, tau ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + complex(dp), intent(inout) :: alpha + complex(dp), intent(out) :: tau + ! Array Arguments + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, knt + real(dp) :: alphi, alphr, beta, bignum, smlnum, xnorm + complex(dp) :: savealpha + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign + ! Executable Statements + if( n<=0 ) then + tau = zero + return + end if + xnorm = stdlib_dznrm2( n-1, x, incx ) + alphr = real( alpha,KIND=dp) + alphi = aimag( alpha ) + if( xnorm==zero ) then + ! h = [1-alpha/abs(alpha) 0; 0 i], sign chosen so alpha >= 0. + if( alphi==zero ) then + if( alphr>=zero ) then + ! when tau.eq.zero, the vector is special-cased to be + ! all zeros in the application routines. we do not need + ! to clear it. + tau = zero + else + ! however, the application routines rely on explicit + ! zero checks when tau.ne.zero, and we must clear x. + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + alpha = -alpha + end if + else + ! only "reflecting" the diagonal entry to be real and non-negative. + xnorm = stdlib_dlapy2( alphr, alphi ) + tau = cmplx( one - alphr / xnorm, -alphi / xnorm,KIND=dp) + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + alpha = xnorm + end if + else + ! general case + beta = sign( stdlib_dlapy3( alphr, alphi, xnorm ), alphr ) + smlnum = stdlib_dlamch( 'S' ) / stdlib_dlamch( 'E' ) + bignum = one / smlnum + knt = 0 + if( abs( beta )=zero ) then + tau = zero + else + tau = two + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + beta = real( -savealpha,KIND=dp) + end if + else + xnorm = stdlib_dlapy2( alphr, alphi ) + tau = cmplx( one - alphr / xnorm, -alphi / xnorm,KIND=dp) + do j = 1, n-1 + x( 1 + (j-1)*incx ) = zero + end do + beta = xnorm + end if + else + ! this is the general case. + call stdlib_zscal( n-1, alpha, x, incx ) + end if + ! if beta is subnormal, it may lose relative accuracy + do j = 1, knt + beta = beta*smlnum + end do + alpha = beta + end if + return + end subroutine stdlib_zlarfgp + + !> ZLARFT: forms the triangular factor T of a complex block reflector H + !> of order n, which is defined as a product of k elementary reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + + pure subroutine stdlib_zlarft( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + complex(dp), intent(out) :: t(ldt,*) + complex(dp), intent(in) :: tau(*), v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, prevlastv, lastv + ! Executable Statements + ! quick return if possible + if( n==0 )return + if( stdlib_lsame( direct, 'F' ) ) then + prevlastv = n + do i = 1, k + prevlastv = max( prevlastv, i ) + if( tau( i )==czero ) then + ! h(i) = i + do j = 1, i + t( j, i ) = czero + end do + else + ! general case + if( stdlib_lsame( storev, 'C' ) ) then + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( lastv, i )/=czero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * conjg( v( i , j ) ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**h * v(i:j,i) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', j-i, i-1,-tau( i ), v( i+1, 1 ), & + ldv,v( i+1, i ), 1, cone, t( 1, i ), 1 ) + else + ! skip any trailing zeros. + do lastv = n, i+1, -1 + if( v( i, lastv )/=czero ) exit + end do + do j = 1, i-1 + t( j, i ) = -tau( i ) * v( j , i ) + end do + j = min( lastv, prevlastv ) + ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**h + call stdlib_zgemm( 'N', 'C', i-1, 1, j-i, -tau( i ),v( 1, i+1 ), ldv, v( i,& + i+1 ), ldv,cone, t( 1, i ), ldt ) + end if + ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i) + call stdlib_ztrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', i-1, t,ldt, t( 1, i ),& + 1 ) + t( i, i ) = tau( i ) + if( i>1 ) then + prevlastv = max( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + end do + else + prevlastv = 1 + do i = k, 1, -1 + if( tau( i )==czero ) then + ! h(i) = i + do j = i, k + t( j, i ) = czero + end do + else + ! general case + if( i1 ) then + prevlastv = min( prevlastv, lastv ) + else + prevlastv = lastv + end if + end if + t( i, i ) = tau( i ) + end if + end do + end if + return + end subroutine stdlib_zlarft + + !> ZLARFX: applies a complex elementary reflector H to a complex m by n + !> matrix C, from either the left or the right. H is represented in the + !> form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix + !> This version uses inline code if H has order < 11. + + pure subroutine stdlib_zlarfx( side, m, n, v, tau, c, ldc, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: ldc, m, n + complex(dp), intent(in) :: tau + ! Array Arguments + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(in) :: v(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j + complex(dp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, & + v6, v7, v8, v9 + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( tau==czero )return + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c, where h has order m. + go to ( 10, 30, 50, 70, 90, 110, 130, 150,170, 190 )m + ! code for general m + call stdlib_zlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 10 continue + ! special code for 1 x 1 householder + t1 = cone - tau*v( 1 )*conjg( v( 1 ) ) + do j = 1, n + c( 1, j ) = t1*c( 1, j ) + end do + go to 410 + 30 continue + ! special code for 2 x 2 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + end do + go to 410 + 50 continue + ! special code for 3 x 3 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + end do + go to 410 + 70 continue + ! special code for 4 x 4 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + end do + go to 410 + 90 continue + ! special code for 5 x 5 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + end do + go to 410 + 110 continue + ! special code for 6 x 6 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + end do + go to 410 + 130 continue + ! special code for 7 x 7 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + end do + go to 410 + 150 continue + ! special code for 8 x 8 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + end do + go to 410 + 170 continue + ! special code for 9 x 9 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + v9 = conjg( v( 9 ) ) + t9 = tau*conjg( v9 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + end do + go to 410 + 190 continue + ! special code for 10 x 10 householder + v1 = conjg( v( 1 ) ) + t1 = tau*conjg( v1 ) + v2 = conjg( v( 2 ) ) + t2 = tau*conjg( v2 ) + v3 = conjg( v( 3 ) ) + t3 = tau*conjg( v3 ) + v4 = conjg( v( 4 ) ) + t4 = tau*conjg( v4 ) + v5 = conjg( v( 5 ) ) + t5 = tau*conjg( v5 ) + v6 = conjg( v( 6 ) ) + t6 = tau*conjg( v6 ) + v7 = conjg( v( 7 ) ) + t7 = tau*conjg( v7 ) + v8 = conjg( v( 8 ) ) + t8 = tau*conjg( v8 ) + v9 = conjg( v( 9 ) ) + t9 = tau*conjg( v9 ) + v10 = conjg( v( 10 ) ) + t10 = tau*conjg( v10 ) + do j = 1, n + sum = v1*c( 1, j ) + v2*c( 2, j ) + v3*c( 3, j ) +v4*c( 4, j ) + v5*c( 5, j ) + & + v6*c( 6, j ) +v7*c( 7, j ) + v8*c( 8, j ) + v9*c( 9, j ) +v10*c( 10, j ) + c( 1, j ) = c( 1, j ) - sum*t1 + c( 2, j ) = c( 2, j ) - sum*t2 + c( 3, j ) = c( 3, j ) - sum*t3 + c( 4, j ) = c( 4, j ) - sum*t4 + c( 5, j ) = c( 5, j ) - sum*t5 + c( 6, j ) = c( 6, j ) - sum*t6 + c( 7, j ) = c( 7, j ) - sum*t7 + c( 8, j ) = c( 8, j ) - sum*t8 + c( 9, j ) = c( 9, j ) - sum*t9 + c( 10, j ) = c( 10, j ) - sum*t10 + end do + go to 410 + else + ! form c * h, where h has order n. + go to ( 210, 230, 250, 270, 290, 310, 330, 350,370, 390 )n + ! code for general n + call stdlib_zlarf( side, m, n, v, 1, tau, c, ldc, work ) + go to 410 + 210 continue + ! special code for 1 x 1 householder + t1 = cone - tau*v( 1 )*conjg( v( 1 ) ) + do j = 1, m + c( j, 1 ) = t1*c( j, 1 ) + end do + go to 410 + 230 continue + ! special code for 2 x 2 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + end do + go to 410 + 250 continue + ! special code for 3 x 3 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + end do + go to 410 + 270 continue + ! special code for 4 x 4 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + end do + go to 410 + 290 continue + ! special code for 5 x 5 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + end do + go to 410 + 310 continue + ! special code for 6 x 6 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + end do + go to 410 + 330 continue + ! special code for 7 x 7 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + end do + go to 410 + 350 continue + ! special code for 8 x 8 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + end do + go to 410 + 370 continue + ! special code for 9 x 9 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + v9 = v( 9 ) + t9 = tau*conjg( v9 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + end do + go to 410 + 390 continue + ! special code for 10 x 10 householder + v1 = v( 1 ) + t1 = tau*conjg( v1 ) + v2 = v( 2 ) + t2 = tau*conjg( v2 ) + v3 = v( 3 ) + t3 = tau*conjg( v3 ) + v4 = v( 4 ) + t4 = tau*conjg( v4 ) + v5 = v( 5 ) + t5 = tau*conjg( v5 ) + v6 = v( 6 ) + t6 = tau*conjg( v6 ) + v7 = v( 7 ) + t7 = tau*conjg( v7 ) + v8 = v( 8 ) + t8 = tau*conjg( v8 ) + v9 = v( 9 ) + t9 = tau*conjg( v9 ) + v10 = v( 10 ) + t10 = tau*conjg( v10 ) + do j = 1, m + sum = v1*c( j, 1 ) + v2*c( j, 2 ) + v3*c( j, 3 ) +v4*c( j, 4 ) + v5*c( j, 5 ) + & + v6*c( j, 6 ) +v7*c( j, 7 ) + v8*c( j, 8 ) + v9*c( j, 9 ) +v10*c( j, 10 ) + c( j, 1 ) = c( j, 1 ) - sum*t1 + c( j, 2 ) = c( j, 2 ) - sum*t2 + c( j, 3 ) = c( j, 3 ) - sum*t3 + c( j, 4 ) = c( j, 4 ) - sum*t4 + c( j, 5 ) = c( j, 5 ) - sum*t5 + c( j, 6 ) = c( j, 6 ) - sum*t6 + c( j, 7 ) = c( j, 7 ) - sum*t7 + c( j, 8 ) = c( j, 8 ) - sum*t8 + c( j, 9 ) = c( j, 9 ) - sum*t9 + c( j, 10 ) = c( j, 10 ) - sum*t10 + end do + go to 410 + end if + 410 continue + return + end subroutine stdlib_zlarfx + + !> ZLARFY: applies an elementary reflector, or Householder matrix, H, + !> to an n x n Hermitian matrix C, from both the left and the right. + !> H is represented in the form + !> H = I - tau * v * v' + !> where tau is a scalar and v is a vector. + !> If tau is zero, then H is taken to be the unit matrix. + + pure subroutine stdlib_zlarfy( uplo, n, v, incv, tau, c, ldc, work ) + ! -- lapack test routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incv, ldc, n + complex(dp), intent(in) :: tau + ! Array Arguments + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(in) :: v(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: alpha + ! Executable Statements + if( tau==czero )return + ! form w:= c * v + call stdlib_zhemv( uplo, n, cone, c, ldc, v, incv, czero, work, 1 ) + alpha = -chalf*tau*stdlib_zdotc( n, work, 1, v, incv ) + call stdlib_zaxpy( n, alpha, v, incv, work, 1 ) + ! c := c - v * w' - w * v' + call stdlib_zher2( uplo, n, -tau, v, incv, work, 1, c, ldc ) + return + end subroutine stdlib_zlarfy + + !> ZLARNV: returns a vector of n random complex numbers from a uniform or + !> normal distribution. + + pure subroutine stdlib_zlarnv( idist, iseed, n, x ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: idist, n + ! Array Arguments + integer(ilp), intent(inout) :: iseed(4) + complex(dp), intent(out) :: x(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: lv = 128 + real(dp), parameter :: twopi = 6.28318530717958647692528676655900576839e+0_dp + + + + ! Local Scalars + integer(ilp) :: i, il, iv + ! Local Arrays + real(dp) :: u(lv) + ! Intrinsic Functions + intrinsic :: cmplx,exp,log,min,sqrt + ! Executable Statements + do 60 iv = 1, n, lv / 2 + il = min( lv / 2, n-iv+1 ) + ! call stdlib_dlaruv to generate 2*il realnumbers from a uniform (0,1,KIND=dp) + ! distribution (2*il <= lv) + call stdlib_dlaruv( iseed, 2*il, u ) + if( idist==1 ) then + ! copy generated numbers + do i = 1, il + x( iv+i-1 ) = cmplx( u( 2*i-1 ), u( 2*i ),KIND=dp) + end do + else if( idist==2 ) then + ! convert generated numbers to uniform (-1,1) distribution + do i = 1, il + x( iv+i-1 ) = cmplx( two*u( 2*i-1 )-one,two*u( 2*i )-one,KIND=dp) + end do + else if( idist==3 ) then + ! convert generated numbers to normal (0,1) distribution + do i = 1, il + x( iv+i-1 ) = sqrt( -two*log( u( 2*i-1 ) ) )*exp( cmplx( zero, twopi*u( 2*i ),& + KIND=dp) ) + end do + else if( idist==4 ) then + ! convert generated numbers to complex numbers uniformly + ! distributed on the unit disk + do i = 1, il + x( iv+i-1 ) = sqrt( u( 2*i-1 ) )*exp( cmplx( zero, twopi*u( 2*i ),KIND=dp) ) + + end do + else if( idist==5 ) then + ! convert generated numbers to complex numbers uniformly + ! distributed on the unit circle + do i = 1, il + x( iv+i-1 ) = exp( cmplx( zero, twopi*u( 2*i ),KIND=dp) ) + end do + end if + 60 continue + return + end subroutine stdlib_zlarnv + + !> ! + !> + !> ZLARTG: generates a plane rotation so that + !> [ C S ] . [ F ] = [ R ] + !> [ -conjg(S) C ] [ G ] [ 0 ] + !> where C is real and C**2 + |S|**2 = 1. + !> The mathematical formulas used for C and S are + !> sgn(x) = { x / |x|, x != 0 + !> { 1, x = 0 + !> R = sgn(F) * sqrt(|F|**2 + |G|**2) + !> C = |F| / sqrt(|F|**2 + |G|**2) + !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) + !> When F and G are real, the formulas simplify to C = F/R and + !> S = G/R, and the returned values of C, S, and R should be + !> identical to those returned by DLARTG. + !> The algorithm used to compute these quantities incorporates scaling + !> to avoid overflow or underflow in computing the square root of the + !> sum of squares. + !> This is a faster version of the BLAS1 routine ZROTG, except for + !> the following differences: + !> F and G are unchanged on return. + !> If G=0, then C=1 and S=0. + !> If F=0, then C=0 and S is chosen so that R is real. + !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. + + pure subroutine stdlib_zlartg( f, g, c, s, r ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! february 2021 + ! Scalar Arguments + real(dp), intent(out) :: c + complex(dp), intent(in) :: f, g + complex(dp), intent(out) :: r, s + ! Local Scalars + real(dp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + complex(dp) :: fs, gs, t + ! Intrinsic Functions + intrinsic :: abs,aimag,conjg,max,min,real,sqrt + ! Statement Functions + real(dp) :: abssq + ! Statement Function Definitions + abssq( t ) = real( t,KIND=dp)**2 + aimag( t )**2 + ! Executable Statements + if( g == czero ) then + c = one + s = czero + r = f + else if( f == czero ) then + c = zero + g1 = max( abs(real(g,KIND=dp)), abs(aimag(g)) ) + if( g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + g2 = abssq( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else + ! use scaled algorithm + u = min( safmax, max( safmin, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if + else + f1 = max( abs(real(f,KIND=dp)), abs(aimag(f)) ) + g1 = max( abs(real(g,KIND=dp)), abs(aimag(g)) ) + if( f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax ) then + ! use unscaled algorithm + f2 = abssq( f ) + g2 = abssq( g ) + h2 = f2 + g2 + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = f2*p + s = conjg( g )*( f*p ) + r = f*( h2*p ) + else + ! use scaled algorithm + u = min( safmax, max( safmin, f1, g1 ) ) + uu = one / u + gs = g*uu + g2 = abssq( gs ) + if( f1*uu < rtmin ) then + ! f is not well-scaled when scaled by g1. + ! use a different scaling for f. + v = min( safmax, max( safmin, f1 ) ) + vv = one / v + w = v * uu + fs = f*vv + f2 = abssq( fs ) + h2 = f2*w**2 + g2 + else + ! otherwise use the same scaling for f and g. + w = one + fs = f*uu + f2 = abssq( fs ) + h2 = f2 + g2 + end if + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + p = 1 / d + c = ( f2*p )*w + s = conjg( gs )*( fs*p ) + r = ( fs*( h2*p ) )*u + end if + end if + return + end subroutine stdlib_zlartg + + !> ZLARTV: applies a vector of complex plane rotations with real cosines + !> to elements of the complex vectors x and y. For i = 1,2,...,n + !> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) + !> ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) + + pure subroutine stdlib_zlartv( n, x, incx, y, incy, c, s, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(dp), intent(in) :: c(*) + complex(dp), intent(in) :: s(*) + complex(dp), intent(inout) :: x(*), y(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ic, ix, iy + complex(dp) :: xi, yi + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ix = 1 + iy = 1 + ic = 1 + do i = 1, n + xi = x( ix ) + yi = y( iy ) + x( ix ) = c( ic )*xi + s( ic )*yi + y( iy ) = c( ic )*yi - conjg( s( ic ) )*xi + ix = ix + incx + iy = iy + incy + ic = ic + incc + end do + return + end subroutine stdlib_zlartv + + !> ZLARZ: applies a complex elementary reflector H to a complex + !> M-by-N matrix C, from either the left or the right. H is represented + !> in the form + !> H = I - tau * v * v**H + !> where tau is a complex scalar and v is a complex vector. + !> If tau = 0, then H is taken to be the unit matrix. + !> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead + !> tau. + !> H is a product of k elementary reflectors as returned by ZTZRZF. + + pure subroutine stdlib_zlarz( side, m, n, l, v, incv, tau, c, ldc, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side + integer(ilp), intent(in) :: incv, l, ldc, m, n + complex(dp), intent(in) :: tau + ! Array Arguments + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(in) :: v(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Executable Statements + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c + if( tau/=czero ) then + ! w( 1:n ) = conjg( c( 1, 1:n ) ) + call stdlib_zcopy( n, c, ldc, work, 1 ) + call stdlib_zlacgv( n, work, 1 ) + ! w( 1:n ) = conjg( w( 1:n ) + c( m-l+1:m, 1:n )**h * v( 1:l ) ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', l, n, cone, c( m-l+1, 1 ),ldc, v, incv,& + cone, work, 1 ) + call stdlib_zlacgv( n, work, 1 ) + ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n ) + call stdlib_zaxpy( n, -tau, work, 1, c, ldc ) + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! tau * v( 1:l ) * w( 1:n )**h + call stdlib_zgeru( l, n, -tau, v, incv, work, 1, c( m-l+1, 1 ),ldc ) + end if + else + ! form c * h + if( tau/=czero ) then + ! w( 1:m ) = c( 1:m, 1 ) + call stdlib_zcopy( m, c, 1, work, 1 ) + ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l ) + call stdlib_zgemv( 'NO TRANSPOSE', m, l, cone, c( 1, n-l+1 ), ldc,v, incv, cone, & + work, 1 ) + ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m ) + call stdlib_zaxpy( m, -tau, work, 1, c, 1 ) + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! tau * w( 1:m ) * v( 1:l )**h + call stdlib_zgerc( m, l, -tau, work, 1, v, incv, c( 1, n-l+1 ),ldc ) + end if + end if + return + end subroutine stdlib_zlarz + + !> ZLARZB: applies a complex block reflector H or its transpose H**H + !> to a complex distributed M-by-N C from the left or the right. + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_zlarzb( side, trans, direct, storev, m, n, k, l, v,ldv, t, ldt, c, & + ldc, work, ldwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, ldc, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: c(ldc,*), t(ldt,*), v(ldv,*) + complex(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + character :: transt + integer(ilp) :: i, info, j + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -3 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLARZB', -info ) + return + end if + if( stdlib_lsame( trans, 'N' ) ) then + transt = 'C' + else + transt = 'N' + end if + if( stdlib_lsame( side, 'L' ) ) then + ! form h * c or h**h * c + ! w( 1:n, 1:k ) = c( 1:k, 1:n )**h + do j = 1, k + call stdlib_zcopy( n, c( j, 1 ), ldc, work( 1, j ), 1 ) + end do + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ... + ! c( m-l+1:m, 1:n )**h * v( 1:k, 1:l )**t + if( l>0 )call stdlib_zgemm( 'TRANSPOSE', 'CONJUGATE TRANSPOSE', n, k, l,cone, c( m-& + l+1, 1 ), ldc, v, ldv, cone, work,ldwork ) + ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t or w( 1:m, 1:k ) * t + call stdlib_ztrmm( 'RIGHT', 'LOWER', transt, 'NON-UNIT', n, k, cone, t,ldt, work, & + ldwork ) + ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**h + do j = 1, n + do i = 1, k + c( i, j ) = c( i, j ) - work( j, i ) + end do + end do + ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ... + ! v( 1:k, 1:l )**h * w( 1:n, 1:k )**h + if( l>0 )call stdlib_zgemm( 'TRANSPOSE', 'TRANSPOSE', l, n, k, -cone, v, ldv,work, & + ldwork, cone, c( m-l+1, 1 ), ldc ) + else if( stdlib_lsame( side, 'R' ) ) then + ! form c * h or c * h**h + ! w( 1:m, 1:k ) = c( 1:m, 1:k ) + do j = 1, k + call stdlib_zcopy( m, c( 1, j ), 1, work( 1, j ), 1 ) + end do + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ... + ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**h + if( l>0 )call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', m, k, l, cone,c( 1, n-l+1 )& + , ldc, v, ldv, cone, work, ldwork ) + ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * conjg( t ) or + ! w( 1:m, 1:k ) * t**h + do j = 1, k + call stdlib_zlacgv( k-j+1, t( j, j ), 1 ) + end do + call stdlib_ztrmm( 'RIGHT', 'LOWER', trans, 'NON-UNIT', m, k, cone, t,ldt, work, & + ldwork ) + do j = 1, k + call stdlib_zlacgv( k-j+1, t( j, j ), 1 ) + end do + ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k ) + do j = 1, k + do i = 1, m + c( i, j ) = c( i, j ) - work( i, j ) + end do + end do + ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ... + ! w( 1:m, 1:k ) * conjg( v( 1:k, 1:l ) ) + do j = 1, l + call stdlib_zlacgv( k, v( 1, j ), 1 ) + end do + if( l>0 )call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m, l, k, -cone,work, & + ldwork, v, ldv, cone, c( 1, n-l+1 ), ldc ) + do j = 1, l + call stdlib_zlacgv( k, v( 1, j ), 1 ) + end do + end if + return + end subroutine stdlib_zlarzb + + !> ZLARZT: forms the triangular factor T of a complex block reflector + !> H of order > n, which is defined as a product of k elementary + !> reflectors. + !> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + !> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + !> If STOREV = 'C', the vector which defines the elementary reflector + !> H(i) is stored in the i-th column of the array V, and + !> H = I - V * T * V**H + !> If STOREV = 'R', the vector which defines the elementary reflector + !> H(i) is stored in the i-th row of the array V, and + !> H = I - V**H * T * V + !> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. + + pure subroutine stdlib_zlarzt( direct, storev, n, k, v, ldv, tau, t, ldt ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, storev + integer(ilp), intent(in) :: k, ldt, ldv, n + ! Array Arguments + complex(dp), intent(out) :: t(ldt,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(inout) :: v(ldv,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + ! Executable Statements + ! check for currently supported options + info = 0 + if( .not.stdlib_lsame( direct, 'B' ) ) then + info = -1 + else if( .not.stdlib_lsame( storev, 'R' ) ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLARZT', -info ) + return + end if + do i = k, 1, -1 + if( tau( i )==czero ) then + ! h(i) = i + do j = i, k + t( j, i ) = czero + end do + else + ! general case + if( i ZLASCL: multiplies the M by N complex matrix A by the real scalar + !> CTO/CFROM. This is done without over/underflow as long as the final + !> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + !> A may be full, upper triangular, lower triangular, upper Hessenberg, + !> or banded. + + pure subroutine stdlib_zlascl( type, kl, ku, cfrom, cto, m, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: type + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, lda, m, n + real(dp), intent(in) :: cfrom, cto + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: i, itype, j, k1, k2, k3, k4 + real(dp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( stdlib_lsame( type, 'G' ) ) then + itype = 0 + else if( stdlib_lsame( type, 'L' ) ) then + itype = 1 + else if( stdlib_lsame( type, 'U' ) ) then + itype = 2 + else if( stdlib_lsame( type, 'H' ) ) then + itype = 3 + else if( stdlib_lsame( type, 'B' ) ) then + itype = 4 + else if( stdlib_lsame( type, 'Q' ) ) then + itype = 5 + else if( stdlib_lsame( type, 'Z' ) ) then + itype = 6 + else + itype = -1 + end if + if( itype==-1 ) then + info = -1 + else if( cfrom==zero .or. stdlib_disnan(cfrom) ) then + info = -4 + else if( stdlib_disnan(cto) ) then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 .or. ( itype==4 .and. n/=m ) .or.( itype==5 .and. n/=m ) ) then + info = -7 + else if( itype<=3 .and. lda=4 ) then + if( kl<0 .or. kl>max( m-1, 0 ) ) then + info = -2 + else if( ku<0 .or. ku>max( n-1, 0 ) .or.( ( itype==4 .or. itype==5 ) .and. kl/=ku ) & + )then + info = -3 + else if( ( itype==4 .and. ldaabs( ctoc ) .and. ctoc/=zero ) then + mul = smlnum + done = .false. + cfromc = cfrom1 + else if( abs( cto1 )>abs( cfromc ) ) then + mul = bignum + done = .false. + ctoc = cto1 + else + mul = ctoc / cfromc + done = .true. + end if + end if + if( itype==0 ) then + ! full matrix + do j = 1, n + do i = 1, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==1 ) then + ! lower triangular matrix + do j = 1, n + do i = j, m + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==2 ) then + ! upper triangular matrix + do j = 1, n + do i = 1, min( j, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==3 ) then + ! upper hessenberg matrix + do j = 1, n + do i = 1, min( j+1, m ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==4 ) then + ! lower chalf of a symmetric band matrix + k3 = kl + 1 + k4 = n + 1 + do j = 1, n + do i = 1, min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==5 ) then + ! upper chalf of a symmetric band matrix + k1 = ku + 2 + k3 = ku + 1 + do j = 1, n + do i = max( k1-j, 1 ), k3 + a( i, j ) = a( i, j )*mul + end do + end do + else if( itype==6 ) then + ! band matrix + k1 = kl + ku + 2 + k2 = kl + 1 + k3 = 2*kl + ku + 1 + k4 = kl + ku + 1 + m + do j = 1, n + do i = max( k1-j, k2 ), min( k3, k4-j ) + a( i, j ) = a( i, j )*mul + end do + end do + end if + if( .not.done )go to 10 + return + end subroutine stdlib_zlascl + + !> ZLASET: initializes a 2-D array A to BETA on the diagonal and + !> ALPHA on the offdiagonals. + + pure subroutine stdlib_zlaset( uplo, m, n, alpha, beta, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, m, n + complex(dp), intent(in) :: alpha, beta + ! Array Arguments + complex(dp), intent(out) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + if( stdlib_lsame( uplo, 'U' ) ) then + ! set the diagonal to beta and the strictly upper triangular + ! part of the array to alpha. + do j = 2, n + do i = 1, min( j-1, m ) + a( i, j ) = alpha + end do + end do + do i = 1, min( n, m ) + a( i, i ) = beta + end do + else if( stdlib_lsame( uplo, 'L' ) ) then + ! set the diagonal to beta and the strictly lower triangular + ! part of the array to alpha. + do j = 1, min( m, n ) + do i = j + 1, m + a( i, j ) = alpha + end do + end do + do i = 1, min( n, m ) + a( i, i ) = beta + end do + else + ! set the array to beta on the diagonal and alpha on the + ! offdiagonal. + do j = 1, n + do i = 1, m + a( i, j ) = alpha + end do + end do + do i = 1, min( m, n ) + a( i, i ) = beta + end do + end if + return + end subroutine stdlib_zlaset + + !> ZLASR: applies a sequence of real plane rotations to a complex matrix + !> A, from either the left or the right. + !> When SIDE = 'L', the transformation takes the form + !> A := P*A + !> and when SIDE = 'R', the transformation takes the form + !> A := A*P**T + !> where P is an orthogonal matrix consisting of a sequence of z plane + !> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + !> and P**T is the transpose of P. + !> When DIRECT = 'F' (Forward sequence), then + !> P = P(z-1) * ... * P(2) * P(1) + !> and when DIRECT = 'B' (Backward sequence), then + !> P = P(1) * P(2) * ... * P(z-1) + !> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + !> R(k) = ( c(k) s(k) ) + !> = ( -s(k) c(k) ). + !> When PIVOT = 'V' (Variable pivot), the rotation is performed + !> for the plane (k,k+1), i.e., P(k) has the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears as a rank-2 modification to the identity matrix in + !> rows and columns k and k+1. + !> When PIVOT = 'T' (Top pivot), the rotation is performed for the + !> plane (1,k+1), so P(k) has the form + !> P(k) = ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> where R(k) appears in rows and columns 1 and k+1. + !> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + !> performed for the plane (k,z), giving P(k) the form + !> P(k) = ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( c(k) s(k) ) + !> ( 1 ) + !> ( ... ) + !> ( 1 ) + !> ( -s(k) c(k) ) + !> where R(k) appears in rows and columns k and z. The rotations are + !> performed without ever forming P(k) explicitly. + + pure subroutine stdlib_zlasr( side, pivot, direct, m, n, c, s, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, pivot, side + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(in) :: c(*), s(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, j + real(dp) :: ctemp, stemp + complex(dp) :: temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + if( .not.( stdlib_lsame( side, 'L' ) .or. stdlib_lsame( side, 'R' ) ) ) then + info = 1 + else if( .not.( stdlib_lsame( pivot, 'V' ) .or. stdlib_lsame( pivot,'T' ) .or. & + stdlib_lsame( pivot, 'B' ) ) ) then + info = 2 + else if( .not.( stdlib_lsame( direct, 'F' ) .or. stdlib_lsame( direct, 'B' ) ) )& + then + info = 3 + else if( m<0 ) then + info = 4 + else if( n<0 ) then + info = 5 + else if( lda ! + !> + !> ZLASSQ: returns the values scl and smsq such that + !> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + !> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + !> assumed to be non-negative. + !> scale and sumsq must be supplied in SCALE and SUMSQ and + !> scl and smsq are overwritten on SCALE and SUMSQ respectively. + !> If scale * sqrt( sumsq ) > tbig then + !> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, + !> and if 0 < scale * sqrt( sumsq ) < tsml then + !> we require: scale <= sqrt( HUGE ) / ssml on entry, + !> where + !> tbig -- upper threshold for values whose square is representable; + !> sbig -- scaling constant for big numbers; \see la_constants.f90 + !> tsml -- lower threshold for values whose square is representable; + !> ssml -- scaling constant for small numbers; \see la_constants.f90 + !> and + !> TINY*EPS -- tiniest representable number; + !> HUGE -- biggest representable number. + + pure subroutine stdlib_zlassq( n, x, incx, scl, sumsq ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, n + real(dp), intent(inout) :: scl, sumsq + ! Array Arguments + complex(dp), intent(in) :: x(*) + ! Local Scalars + integer(ilp) :: i, ix + logical(lk) :: notbig + real(dp) :: abig, amed, asml, ax, ymax, ymin + ! quick return if possible + if( ieee_is_nan(scl) .or. ieee_is_nan(sumsq) ) return + if( sumsq == zero ) scl = one + if( scl == zero ) then + scl = one + sumsq = zero + end if + if (n <= 0) then + return + end if + ! compute the sum of squares in 3 accumulators: + ! abig -- sums of squares scaled down to avoid overflow + ! asml -- sums of squares scaled up to avoid underflow + ! amed -- sums of squares that do not require scaling + ! the thresholds and multipliers are + ! tbig -- values bigger than this are scaled down by sbig + ! tsml -- values smaller than this are scaled up by ssml + notbig = .true. + asml = zero + amed = zero + abig = zero + ix = 1 + if( incx < 0 ) ix = 1 - (n-1)*incx + do i = 1, n + ax = abs(real(x(ix),KIND=dp)) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ax = abs(aimag(x(ix))) + if (ax > tbig) then + abig = abig + (ax*sbig)**2 + notbig = .false. + else if (ax < tsml) then + if (notbig) asml = asml + (ax*ssml)**2 + else + amed = amed + ax**2 + end if + ix = ix + incx + end do + ! put the existing sum of squares into one of the accumulators + if( sumsq > zero ) then + ax = scl*sqrt( sumsq ) + if (ax > tbig) then + ! we assume scl >= sqrt( tiny*eps ) / sbig + abig = abig + (scl*sbig)**2 * sumsq + else if (ax < tsml) then + ! we assume scl <= sqrt( huge ) / ssml + if (notbig) asml = asml + (scl*ssml)**2 * sumsq + else + amed = amed + scl**2 * sumsq + end if + end if + ! combine abig and amed or amed and asml if more than one + ! accumulator was used. + if (abig > zero) then + ! combine abig and amed if abig > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + abig = abig + (amed*sbig)*sbig + end if + scl = one / sbig + sumsq = abig + else if (asml > zero) then + ! combine amed and asml if asml > 0. + if (amed > zero .or. ieee_is_nan(amed)) then + amed = sqrt(amed) + asml = sqrt(asml) / ssml + if (asml > amed) then + ymin = amed + ymax = asml + else + ymin = asml + ymax = amed + end if + scl = one + sumsq = ymax**2*( one + (ymin/ymax)**2 ) + else + scl = one / ssml + sumsq = asml + end if + else + ! otherwise all values are mid-range or zero + scl = one + sumsq = amed + end if + return + end subroutine stdlib_zlassq + + !> ZLASWP: performs a series of row interchanges on the matrix A. + !> One row interchange is initiated for each of rows K1 through K2 of A. + + pure subroutine stdlib_zlaswp( n, a, lda, k1, k2, ipiv, incx ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, k1, k2, lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32 + complex(dp) :: temp + ! Executable Statements + ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows + ! k1 through k2. + if( incx>0 ) then + ix0 = k1 + i1 = k1 + i2 = k2 + inc = 1 + else if( incx<0 ) then + ix0 = k1 + ( k1-k2 )*incx + i1 = k2 + i2 = k1 + inc = -1 + else + return + end if + n32 = ( n / 32 )*32 + if( n32/=0 ) then + do j = 1, n32, 32 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = j, j + 31 + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end do + end if + if( n32/=n ) then + n32 = n32 + 1 + ix = ix0 + do i = i1, i2, inc + ip = ipiv( ix ) + if( ip/=i ) then + do k = n32, n + temp = a( i, k ) + a( i, k ) = a( ip, k ) + a( ip, k ) = temp + end do + end if + ix = ix + incx + end do + end if + return + end subroutine stdlib_zlaswp + + !> ZLASYF: computes a partial factorization of a complex symmetric matrix + !> A using the Bunch-Kaufman diagonal pivoting method. The partial + !> factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> Note that U**T denotes the transpose of U. + !> ZLASYF is an auxiliary routine called by ZSYTRF. It uses blocked code + !> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or + !> A22 (if UPLO = 'L'). + + pure subroutine stdlib_zlasyf( uplo, n, nb, kb, a, lda, ipiv, w, ldw, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw + real(dp) :: absakk, alpha, colmax, rowmax + complex(dp) :: d11, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + ! kw is the column of w which corresponds to column k of a + k = n + 10 continue + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_izamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column kw-1 of w and update it + call stdlib_zcopy( imax, a( 1, imax ), 1, w( 1, kw-1 ), 1 ) + call stdlib_zcopy( k-imax, a( imax, imax+1 ), lda,w( imax+1, kw-1 ), 1 ) + + if( k1 ) then + jmax = stdlib_izamax( imax-1, w( 1, kw-1 ), 1 ) + rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( w( imax, kw-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column kw-1 of w to column kw of w + call stdlib_zcopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 ) + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k - kstep + 1 + ! kkw is the column of w which corresponds to column kk of a + kkw = nb + kk - n + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kkw of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k-1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_zcopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + if( kp>1 )call stdlib_zcopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + ! interchange rows kk and kp in last k+1 to n columns of a + ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be + ! later overwritten). interchange rows kk and kp + ! in last kkw to nb columns of w. + if( k2 ) then + ! compose the columns of the inverse of 2-by-2 pivot + ! block d in the following way to reduce the number + ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by + ! this inverse + ! d**(-1) = ( d11 d21 )**(-1) = + ! ( d21 d22 ) + ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = + ! ( (-d21 ) ( d11 ) ) + ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * + ! * ( ( d22/d21 ) ( -1 ) ) = + ! ( ( -1 ) ( d11/d21 ) ) + ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) ( -1 ) ) = + ! ( ( -1 ) ( d22 ) ) + ! = 1/d21 * t * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + ! = d21 * ( ( d11 ) ( -1 ) ) + ! ( ( -1 ) ( d22 ) ) + d21 = w( k-1, kw ) + d11 = w( k, kw ) / d21 + d22 = w( k-1, kw-1 ) / d21 + t = cone / ( d11*d22-cone ) + d21 = t / d21 + ! update elements in columns a(k-1) and a(k) as + ! dot products of rows of ( w(kw-1) w(kw) ) and columns + ! of d**(-1) + do j = 1, k - 2 + a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) ) + a( j, k ) = d21*( d22*w( j, kw )-w( j, kw-1 ) ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_zgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb, n-k,-cone, a( 1, k+1 ), & + lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n looping backwards from k+1 to n + j = k + 1 + 60 continue + ! undo the interchanges (if any) of rows jj and jp at each + ! step j + ! (here, j is a diagonal index) + jj = j + jp = ipiv( j ) + if( jp<0 ) then + jp = -jp + ! (here, j is a diagonal index) + j = j + 1 + end if + ! (note: here, j is used to determine row length. length n-j+1 + ! of the rows to swap back doesn't include diagonal element) + j = j + 1 + if( jp/=jj .and. j<=n )call stdlib_zswap( n-j+1, a( jp, j ), lda, a( jj, j ), & + lda ) + if( j=nb .and. nbn )go to 90 + ! copy column k of a to column k of w and update it + call stdlib_zcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ), lda,w( k, 1 ), ldw,& + cone, w( k, k ), 1 ) + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! copy column imax to column k+1 of w and update it + call stdlib_zcopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 ) + call stdlib_zcopy( n-imax+1, a( imax, imax ), 1, w( imax, k+1 ),1 ) + call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( imax, & + 1 ), ldw, cone, w( k, k+1 ),1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_izamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( w( imax, k+1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + ! copy column k+1 of w to column k of w + call stdlib_zcopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 ) + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + ! ============================================================ + ! kk is the column of a where pivoting step stopped + kk = k + kstep - 1 + ! interchange rows and columns kp and kk. + ! updated column kp is already stored in column kk of w. + if( kp/=kk ) then + ! copy non-updated column kk to column kp of submatrix a + ! at step k. no need to copy element into column k + ! (or k and k+1 for 2-by-2 pivot) of a, since these columns + ! will be later overwritten. + a( kp, kp ) = a( kk, kk ) + call stdlib_zcopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),lda ) + if( kp1 )call stdlib_zswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + call stdlib_zswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw ) + end if + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k of w now holds + ! w(k) = l(k)*d(k), + ! where l(k) is the k-th column of l + ! store subdiag. elements of column l(k) + ! and 1-by-1 block d(k) in column k of a. + ! (note: diagonal element l(k,k) is a unit element + ! and not stored) + ! a(k,k) := d(k,k) = w(k,k) + ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k) + call stdlib_zcopy( n-k+1, w( k, k ), 1, a( k, k ), 1 ) + if( k=1 )call stdlib_zswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda ) + + if( j>1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_zlasyf + + !> ZLASYF_RK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman (rook) diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> ZLASYF_RK is an auxiliary routine called by ZSYTRF_RK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_zlasyf_rk( uplo, n, nb, kb, a, lda, e, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*), w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii + real(dp) :: absakk, alpha, colmax, rowmax, sfmin, dtemp + complex(dp) :: d11, d12, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_dlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! initialize the first entry of array e, where superdiagonal + ! elements of d are stored + e( 1 ) = czero + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_izamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_zcopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_izamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(cabs1( w( imax, kw-1 ) )1 ) then + if( cabs1( a( k, k ) )>=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_zscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = cone / ( d11*d22-cone ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy diagonal elements of d(k) to a, + ! copy superdiagonal element of d(k) to e(k) and + ! zero out superdiagonal entry of a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = czero + a( k, k ) = w( k, kw ) + e( k ) = w( k-1, kw ) + e( k-1 ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_zgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -cone, a( & + 1, k+1 ), lda, w( j, kw+1 ),ldw, cone, a( 1, j ), lda ) + end do + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_zcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, & + 1 ), ldw, cone, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), & + lda, w( imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_izamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( cabs1( w( imax, k+1 ) )=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_zscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k ZLASYF_ROOK: computes a partial factorization of a complex symmetric + !> matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. The partial factorization has the form: + !> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: + !> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) + !> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' + !> ( L21 I ) ( 0 A22 ) ( 0 I ) + !> where the order of D is at most NB. The actual order is returned in + !> the argument KB, and is either NB or NB-1, or N if N <= NB. + !> ZLASYF_ROOK is an auxiliary routine called by ZSYTRF_ROOK. It uses + !> blocked code (calling Level 3 BLAS) to update the submatrix + !> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). + + pure subroutine stdlib_zlasyf_rook( uplo, n, nb, kb, a, lda, ipiv, w, ldw,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, kb + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: w(ldw,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: done + integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, & + ii + real(dp) :: absakk, alpha, colmax, rowmax, dtemp, sfmin + complex(dp) :: d11, d12, d21, d22, r1, t, z + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt,aimag,real + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + info = 0 + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + ! compute machine safe minimum + sfmin = stdlib_dlamch( 'S' ) + if( stdlib_lsame( uplo, 'U' ) ) then + ! factorize the trailing columns of a using the upper triangle + ! of a and working backwards, and compute the matrix w = u12*d + ! for use in updating a11 + ! k is the main loop index, decreasing from n in steps of 1 or 2 + k = n + 10 continue + ! kw is the column of w which corresponds to column k of a + kw = nb + k - n + ! exit from loop + if( ( k<=n-nb+1 .and. nb1 ) then + imax = stdlib_izamax( k-1, w( 1, kw ), 1 ) + colmax = cabs1( w( imax, kw ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + call stdlib_zcopy( k, w( 1, kw ), 1, a( 1, k ), 1 ) + else + ! ============================================================ + ! test for interchange + ! equivalent to testing for absakk>=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 ) then + itemp = stdlib_izamax( imax-1, w( 1, kw-1 ), 1 ) + dtemp = cabs1( w( itemp, kw-1 ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.(cabs1( w( imax, kw-1 ) )1 ) then + if( cabs1( a( k, k ) )>=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_zscal( k-1, r1, a( 1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now + ! hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + if( k>2 ) then + ! store u(k) and u(k-1) in columns k and k-1 of a + d12 = w( k-1, kw ) + d11 = w( k, kw ) / d12 + d22 = w( k-1, kw-1 ) / d12 + t = cone / ( d11*d22-cone ) + do j = 1, k - 2 + a( j, k-1 ) = t*( (d11*w( j, kw-1 )-w( j, kw ) ) /d12 ) + a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /d12 ) + end do + end if + ! copy d(k) to a + a( k-1, k-1 ) = w( k-1, kw-1 ) + a( k-1, k ) = w( k-1, kw ) + a( k, k ) = w( k, kw ) + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 30 continue + ! update the upper triangle of a11 (= a(1:k,1:k)) as + ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t + ! computing blocks of nb columns at a time + do j = ( ( k-1 ) / nb )*nb + 1, 1, -nb + jb = min( nb, k-j+1 ) + ! update the upper triangle of the diagonal block + do jj = j, j + jb - 1 + call stdlib_zgemv( 'NO TRANSPOSE', jj-j+1, n-k, -cone,a( j, k+1 ), lda, w( jj,& + kw+1 ), ldw, cone,a( j, jj ), 1 ) + end do + ! update the rectangular superdiagonal block + if( j>=2 )call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE', j-1, jb,n-k, -cone, a( & + 1, k+1 ), lda, w( j, kw+1 ), ldw,cone, a( 1, j ), lda ) + end do + ! put u12 in standard form by partially undoing the interchanges + ! in columns k+1:n + j = k + 1 + 60 continue + kstep = 1 + jp1 = 1 + jj = j + jp2 = ipiv( j ) + if( jp2<0 ) then + jp2 = -jp2 + j = j + 1 + jp1 = -ipiv( j ) + kstep = 2 + end if + j = j + 1 + if( jp2/=jj .and. j<=n )call stdlib_zswap( n-j+1, a( jp2, j ), lda, a( jj, j ), & + lda ) + jj = j - 1 + if( jp1/=jj .and. kstep==2 )call stdlib_zswap( n-j+1, a( jp1, j ), lda, a( jj, j & + ), lda ) + if( j<=n )go to 60 + ! set kb to the number of columns factorized + kb = n - k + else + ! factorize the leading columns of a using the lower triangle + ! of a and working forwards, and compute the matrix w = l21*d + ! for use in updating a22 + ! k is the main loop index, increasing from 1 in steps of 1 or 2 + k = 1 + 70 continue + ! exit from loop + if( ( k>=nb .and. nbn )go to 90 + kstep = 1 + p = k + ! copy column k of a to column k of w and update it + call stdlib_zcopy( n-k+1, a( k, k ), 1, w( k, k ), 1 ) + if( k>1 )call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone, a( k, 1 ),lda, w( k, & + 1 ), ldw, cone, w( k, k ), 1 ) + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( w( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + ! (used to handle nan and inf) + if( .not.( absakk1 )call stdlib_zgemv( 'NO TRANSPOSE', n-k+1, k-1, -cone,a( k, 1 ), & + lda, w( imax, 1 ), ldw,cone, w( k, k+1 ), 1 ) + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value. + ! determine both rowmax and jmax. + if( imax/=k ) then + jmax = k - 1 + stdlib_izamax( imax-k, w( k, k+1 ), 1 ) + rowmax = cabs1( w( jmax, k+1 ) ) + else + rowmax = zero + end if + if( imaxrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for + ! cabs1( w( imax, k+1 ) )>=alpha*rowmax + ! (used to handle nan and inf) + if( .not.( cabs1( w( imax, k+1 ) )=sfmin ) then + r1 = cone / a( k, k ) + call stdlib_zscal( n-k, r1, a( k+1, k ), 1 ) + else if( a( k, k )/=czero ) then + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / a( k, k ) + end do + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + if( k=1 )call stdlib_zswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda ) + + jj = j + 1 + if( jp1/=jj .and. kstep==2 )call stdlib_zswap( j, a( jp1, 1 ), lda, a( jj, 1 ), & + lda ) + if( j>=1 )go to 120 + ! set kb to the number of columns factorized + kb = k - 1 + end if + return + end subroutine stdlib_zlasyf_rook + + !> ZLAT2C: converts a COMPLEX*16 triangular matrix, SA, to a COMPLEX + !> triangular matrix, A. + !> RMAX is the overflow for the SINGLE PRECISION arithmetic + !> ZLAT2C checks that all the entries of A are between -RMAX and + !> RMAX. If not the conversion is aborted and a flag is raised. + !> This is an auxiliary routine so there is no argument checking. + + pure subroutine stdlib_zlat2c( uplo, n, a, lda, sa, ldsa, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldsa, n + ! Array Arguments + complex(sp), intent(out) :: sa(ldsa,*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: rmax + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: real,aimag + ! Executable Statements + rmax = stdlib_slamch( 'O' ) + upper = stdlib_lsame( uplo, 'U' ) + if( upper ) then + do j = 1, n + do i = 1, j + if( ( real( a( i, j ),KIND=dp)<-rmax ) .or.( real( a( i, j ),KIND=dp)>rmax ) & + .or.( aimag( a( i, j ) )<-rmax ) .or.( aimag( a( i, j ) )>rmax ) ) & + then + info = 1 + go to 50 + end if + sa( i, j ) = a( i, j ) + end do + end do + else + do j = 1, n + do i = j, n + if( ( real( a( i, j ),KIND=dp)<-rmax ) .or.( real( a( i, j ),KIND=dp)>rmax ) & + .or.( aimag( a( i, j ) )<-rmax ) .or.( aimag( a( i, j ) )>rmax ) ) & + then + info = 1 + go to 50 + end if + sa( i, j ) = a( i, j ) + end do + end do + end if + 50 continue + return + end subroutine stdlib_zlat2c + + !> ZLATBS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular band matrix. Here A**T denotes the transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine ZTBSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_zlatbs( uplo, trans, diag, normin, n, kd, ab, ldab, x,scale, cnorm, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(inout) :: cnorm(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind + real(dp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(dp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(dp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=dp) / 2._dp ) +abs( aimag( zdum ) / 2._dp ) + + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( kd<0 ) then + info = -6 + else if( ldab0 ) then + cnorm( j ) = stdlib_dzasum( jlen, ab( 2, j ), 1 ) + else + cnorm( j ) = zero + end if + end do + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum/2. + imax = stdlib_idamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum*half ) then + tscal = one + else + tscal = half / ( smlnum*tmax ) + call stdlib_dscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_ztbsv can be used. + xmax = zero + do j = 1, n + xmax = max( xmax, cabs2( x( j ) ) ) + end do + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + maind = kd + 1 + else + jfirst = 1 + jlast = n + jinc = 1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 60 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + tjjs = ab( maind, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + maind = kd + 1 + else + jfirst = n + jlast = 1 + jinc = -1 + maind = 1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = ab( maind, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_ztbsv( uplo, trans, diag, n, kd, ab, ldab, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_zdscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + loop_120: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 110 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 110 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_zdscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - + ! x(j)* a(max(1,j-kd):j-1,j) + jlen = min( kd, j-1 ) + call stdlib_zaxpy( jlen, -x( j )*tscal,ab( kd+1-jlen, j ), 1, x( j-jlen & + ), 1 ) + i = stdlib_izamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + else if( j0 )call stdlib_zaxpy( jlen, -x( j )*tscal, ab( 2, j ), 1,x( j+1 ),& + 1 ) + i = j + stdlib_izamax( n-j, x( j+1 ), 1 ) + xmax = cabs1( x( i ) ) + end if + end do loop_120 + else if( stdlib_lsame( trans, 'T' ) ) then + ! solve a**t * x = b + loop_170: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_zladiv( uscal, tjjs ) + end if + if( rec1 )csumj = stdlib_zdotu( jlen, ab( 2, j ), 1, x( j+1 ),1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + csumj = csumj + ( ab( kd+i-jlen, j )*uscal )*x( j-jlen-1+i ) + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + csumj = csumj + ( ab( i+1, j )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==cmplx( tscal,KIND=dp) ) then + ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - csumj + xj = cabs1( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = ab( maind, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 160 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 160 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_zladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_170 + else + ! solve a**h * x = b + loop_220: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( ab( maind, j ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_zladiv( uscal, tjjs ) + end if + if( rec1 )csumj = stdlib_zdotc( jlen, ab( 2, j ), 1, x( j+1 ),1 ) + + end if + else + ! otherwise, use in-line code for the dot product. + if( upper ) then + jlen = min( kd, j-1 ) + do i = 1, jlen + csumj = csumj + ( conjg( ab( kd+i-jlen, j ) )*uscal )*x( j-jlen-1+i ) + + end do + else + jlen = min( kd, n-j ) + do i = 1, jlen + csumj = csumj + ( conjg( ab( i+1, j ) )*uscal )*x( j+i ) + end do + end if + end if + if( uscal==cmplx( tscal,KIND=dp) ) then + ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j) + ! was not used to scale the dotproduct. + x( j ) = x( j ) - csumj + xj = cabs1( x( j ) ) + if( nounit ) then + ! compute x(j) = x(j) / a(j,j), scaling if necessary. + tjjs = conjg( ab( maind, j ) )*tscal + else + tjjs = tscal + if( tscal==one )go to 210 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 210 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_zladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_220 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_dscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_zlatbs + + !> ZLATPS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow, where A is an upper or lower + !> triangular matrix stored in packed form. Here A**T denotes the + !> transpose of A, A**H denotes the conjugate transpose of A, x and b + !> are n-element vectors, and s is a scaling factor, usually less than + !> or equal to 1, chosen so that the components of x will be less than + !> the overflow threshold. If the unscaled problem will not cause + !> overflow, the Level 2 BLAS routine ZTPSV is called. If the matrix A + !> is singular (A(j,j) = 0 for some j), then s is set to 0 and a + !> non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_zlatps( uplo, trans, diag, normin, n, ap, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(inout) :: cnorm(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen + real(dp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(dp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(dp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=dp) / 2._dp ) +abs( aimag( zdum ) / 2._dp ) + + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLATPS', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! determine machine dependent parameters to control overflow. + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + smlnum = smlnum / stdlib_dlamch( 'PRECISION' ) + bignum = one / smlnum + scale = one + if( stdlib_lsame( normin, 'N' ) ) then + ! compute the 1-norm of each column, not including the diagonal. + if( upper ) then + ! a is upper triangular. + ip = 1 + do j = 1, n + cnorm( j ) = stdlib_dzasum( j-1, ap( ip ), 1 ) + ip = ip + j + end do + else + ! a is lower triangular. + ip = 1 + do j = 1, n - 1 + cnorm( j ) = stdlib_dzasum( n-j, ap( ip+1 ), 1 ) + ip = ip + n - j + 1 + end do + cnorm( n ) = zero + end if + end if + ! scale the column norms by tscal if the maximum element in cnorm is + ! greater than bignum/2. + imax = stdlib_idamax( n, cnorm, 1 ) + tmax = cnorm( imax ) + if( tmax<=bignum*half ) then + tscal = one + else + tscal = half / ( smlnum*tmax ) + call stdlib_dscal( n, tscal, cnorm, 1 ) + end if + ! compute a bound on the computed solution vector to see if the + ! level 2 blas routine stdlib_ztpsv can be used. + xmax = zero + do j = 1, n + xmax = max( xmax, cabs2( x( j ) ) ) + end do + xbnd = xmax + if( notran ) then + ! compute the growth in a * x = b. + if( upper ) then + jfirst = n + jlast = 1 + jinc = -1 + else + jfirst = 1 + jlast = n + jinc = 1 + end if + if( tscal/=one ) then + grow = zero + go to 60 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, g(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = n + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + tjjs = ap( ip ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + ip = ip + jinc*jlen + jlen = jlen - 1 + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = ap( ip ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + jlen = jlen + 1 + ip = ip + jinc*jlen + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_ztpsv( uplo, trans, diag, n, ap, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_zdscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + ip = jfirst*( jfirst+1 ) / 2 + loop_120: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + if( tscal==one )go to 110 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 110 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_zdscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_zaxpy( j-1, -x( j )*tscal, ap( ip-j+1 ), 1, x,1 ) + i = stdlib_izamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + ip = ip - j + else + if( jj + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = ap( ip )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_zladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 160 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_zladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_170 + else + ! solve a**h * x = b + ip = jfirst*( jfirst+1 ) / 2 + jlen = 1 + loop_220: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( ap( ip ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_zladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 210 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_zladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + jlen = jlen + 1 + ip = ip + jinc*jlen + end do loop_220 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_dscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_zlatps + + !> ZLATRD: reduces NB rows and columns of a complex Hermitian matrix A to + !> Hermitian tridiagonal form by a unitary similarity + !> transformation Q**H * A * Q, and returns the matrices V and W which are + !> needed to apply the transformation to the unreduced part of A. + !> If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a + !> matrix, of which the upper triangle is supplied; + !> if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a + !> matrix, of which the lower triangle is supplied. + !> This is an auxiliary routine called by ZHETRD. + + pure subroutine stdlib_zlatrd( uplo, n, nb, a, lda, e, tau, w, ldw ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: lda, ldw, n, nb + ! Array Arguments + real(dp), intent(out) :: e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), w(ldw,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iw + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: real,min + ! Executable Statements + ! quick return if possible + if( n<=0 )return + if( stdlib_lsame( uplo, 'U' ) ) then + ! reduce last nb columns of upper triangle + loop_10: do i = n, n - nb + 1, -1 + iw = i - n + nb + if( i1 ) then + ! generate elementary reflector h(i) to annihilate + ! a(1:i-2,i) + alpha = a( i-1, i ) + call stdlib_zlarfg( i-1, alpha, a( 1, i ), 1, tau( i-1 ) ) + e( i-1 ) = real( alpha,KIND=dp) + a( i-1, i ) = cone + ! compute w(1:i-1,i) + call stdlib_zhemv( 'UPPER', i-1, cone, a, lda, a( 1, i ), 1,czero, w( 1, iw ),& + 1 ) + if( i ZLATRS: solves one of the triangular systems + !> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + !> with scaling to prevent overflow. Here A is an upper or lower + !> triangular matrix, A**T denotes the transpose of A, A**H denotes the + !> conjugate transpose of A, x and b are n-element vectors, and s is a + !> scaling factor, usually less than or equal to 1, chosen so that the + !> components of x will be less than the overflow threshold. If the + !> unscaled problem will not cause overflow, the Level 2 BLAS routine + !> ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + !> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + pure subroutine stdlib_zlatrs( uplo, trans, diag, normin, n, a, lda, x, scale,cnorm, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, normin, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: scale + ! Array Arguments + real(dp), intent(inout) :: cnorm(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran, nounit, upper + integer(ilp) :: i, imax, j, jfirst, jinc, jlast + real(dp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax + complex(dp) :: csumj, tjjs, uscal, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(dp) :: cabs1, cabs2 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + cabs2( zdum ) = abs( real( zdum,KIND=dp) / 2._dp ) +abs( aimag( zdum ) / 2._dp ) + + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + ! test the input parameters. + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( .not.stdlib_lsame( normin, 'Y' ) .and. .not.stdlib_lsame( normin, 'N' ) ) & + then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda=smlnum ) then + ! m(j) = g(j-1) / abs(a(j,j)) + xbnd = min( xbnd, min( one, tjj )*grow ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + if( tjj+cnorm( j )>=smlnum ) then + ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) ) + grow = grow*( tjj / ( tjj+cnorm( j ) ) ) + else + ! g(j) could overflow, set grow to 0. + grow = zero + end if + end do + grow = xbnd + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 60 + ! g(j) = g(j-1)*( 1 + cnorm(j) ) + grow = grow*( one / ( one+cnorm( j ) ) ) + end do + end if + 60 continue + else + ! compute the growth in a**t * x = b or a**h * x = b. + if( upper ) then + jfirst = 1 + jlast = n + jinc = 1 + else + jfirst = n + jlast = 1 + jinc = -1 + end if + if( tscal/=one ) then + grow = zero + go to 90 + end if + if( nounit ) then + ! a is non-unit triangular. + ! compute grow = 1/g(j) and xbnd = 1/m(j). + ! initially, m(0) = max{x(i), i=1,...,n}. + grow = half / max( xbnd, smlnum ) + xbnd = grow + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) ) + xj = one + cnorm( j ) + grow = min( grow, xbnd / xj ) + tjjs = a( j, j ) + tjj = cabs1( tjjs ) + if( tjj>=smlnum ) then + ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j)) + if( xj>tjj )xbnd = xbnd*( tjj / xj ) + else + ! m(j) could overflow, set xbnd to 0. + xbnd = zero + end if + end do + grow = min( grow, xbnd ) + else + ! a is unit triangular. + ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}. + grow = min( one, half / max( xbnd, smlnum ) ) + do j = jfirst, jlast, jinc + ! exit the loop if the growth factor is too small. + if( grow<=smlnum )go to 90 + ! g(j) = ( 1 + cnorm(j) )*g(j-1) + xj = one + cnorm( j ) + grow = grow / xj + end do + end if + 90 continue + end if + if( ( grow*tscal )>smlnum ) then + ! use the level 2 blas solve if the reciprocal of the bound on + ! elements of x is not too small. + call stdlib_ztrsv( uplo, trans, diag, n, a, lda, x, 1 ) + else + ! use a level 1 blas solve, scaling intermediate results. + if( xmax>bignum*half ) then + ! scale x so that its components are less than or equal to + ! bignum in absolute value. + scale = ( bignum*half ) / xmax + call stdlib_zdscal( n, scale, x, 1 ) + xmax = bignum + else + xmax = xmax*two + end if + if( notran ) then + ! solve a * x = b + loop_120: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) / a(j,j), scaling x if necessary. + xj = cabs1( x( j ) ) + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + if( tscal==one )go to 110 + end if + tjj = cabs1( tjjs ) + if( tjj>smlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/b(j). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum + ! to avoid overflow when dividing by a(j,j). + rec = ( tjj*bignum ) / xj + if( cnorm( j )>one ) then + ! scale by 1/cnorm(j) to avoid overflow when + ! multiplying x(j) times column j. + rec = rec / cnorm( j ) + end if + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + xj = cabs1( x( j ) ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0, and compute a solution to a*x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + xj = one + scale = zero + xmax = zero + end if + 110 continue + ! scale x if necessary to avoid overflow when adding a + ! multiple of column j of a. + if( xj>one ) then + rec = one / xj + if( cnorm( j )>( bignum-xmax )*rec ) then + ! scale x by 1/(2*abs(x(j))). + rec = rec*half + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + end if + else if( xj*cnorm( j )>( bignum-xmax ) ) then + ! scale x by 1/2. + call stdlib_zdscal( n, half, x, 1 ) + scale = scale*half + end if + if( upper ) then + if( j>1 ) then + ! compute the update + ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j) + call stdlib_zaxpy( j-1, -x( j )*tscal, a( 1, j ), 1, x,1 ) + i = stdlib_izamax( j-1, x, 1 ) + xmax = cabs1( x( i ) ) + end if + else + if( jj + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = a( j, j )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_zladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**t *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 160 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_zladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_170 + else + ! solve a**h * x = b + loop_220: do j = jfirst, jlast, jinc + ! compute x(j) = b(j) - sum a(k,j)*x(k). + ! k<>j + xj = cabs1( x( j ) ) + uscal = tscal + rec = one / max( xmax, one ) + if( cnorm( j )>( bignum-xj )*rec ) then + ! if x(j) could overflow, scale x by 1/(2*xmax). + rec = rec*half + if( nounit ) then + tjjs = conjg( a( j, j ) )*tscal + else + tjjs = tscal + end if + tjj = cabs1( tjjs ) + if( tjj>one ) then + ! divide by a(j,j) when scaling x if a(j,j) > 1. + rec = min( one, rec*tjj ) + uscal = stdlib_zladiv( uscal, tjjs ) + end if + if( recsmlnum ) then + ! abs(a(j,j)) > smlnum: + if( tjjtjj*bignum ) then + ! scale x by 1/abs(x(j)). + rec = one / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else if( tjj>zero ) then + ! 0 < abs(a(j,j)) <= smlnum: + if( xj>tjj*bignum ) then + ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum. + rec = ( tjj*bignum ) / xj + call stdlib_zdscal( n, rec, x, 1 ) + scale = scale*rec + xmax = xmax*rec + end if + x( j ) = stdlib_zladiv( x( j ), tjjs ) + else + ! a(j,j) = 0: set x(1:n) = 0, x(j) = 1, and + ! scale = 0 and compute a solution to a**h *x = 0. + do i = 1, n + x( i ) = zero + end do + x( j ) = one + scale = zero + xmax = zero + end if + 210 continue + else + ! compute x(j) := x(j) / a(j,j) - csumj if the dot + ! product has already been divided by 1/a(j,j). + x( j ) = stdlib_zladiv( x( j ), tjjs ) - csumj + end if + xmax = max( xmax, cabs1( x( j ) ) ) + end do loop_220 + end if + scale = scale / tscal + end if + ! scale the column norms by 1/tscal for return. + if( tscal/=one ) then + call stdlib_dscal( n, one / tscal, cnorm, 1 ) + end if + return + end subroutine stdlib_zlatrs + + !> ZLATRZ: factors the M-by-(M+L) complex upper trapezoidal matrix + !> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means + !> of unitary transformations, where Z is an (M+L)-by-(M+L) unitary + !> matrix and, R and A1 are M-by-M upper triangular matrices. + + pure subroutine stdlib_zlatrz( m, n, l, a, lda, tau, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: l, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m==0 ) then + return + else if( m==n ) then + do i = 1, n + tau( i ) = czero + end do + return + end if + do i = m, 1, -1 + ! generate elementary reflector h(i) to annihilate + ! [ a(i,i) a(i,n-l+1:n) ] + call stdlib_zlacgv( l, a( i, n-l+1 ), lda ) + alpha = conjg( a( i, i ) ) + call stdlib_zlarfg( l+1, alpha, a( i, n-l+1 ), lda, tau( i ) ) + tau( i ) = conjg( tau( i ) ) + ! apply h(i) to a(1:i-1,i:n) from the right + call stdlib_zlarz( 'RIGHT', i-1, n-i+1, l, a( i, n-l+1 ), lda,conjg( tau( i ) ), a( & + 1, i ), lda, work ) + a( i, i ) = conjg( alpha ) + end do + return + end subroutine stdlib_zlatrz + + !> ZLAUNHR_COL_GETRFNP2: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is at + !> least one in absolute value (so that division-by-zero not + !> possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the recursive version of the LU factorization algorithm. + !> Denote A - S by B. The algorithm divides the matrix B into four + !> submatrices: + !> [ B11 | B12 ] where B11 is n1 by n1, + !> B = [ -----|----- ] B21 is (m-n1) by n1, + !> [ B21 | B22 ] B12 is n1 by n2, + !> B22 is (m-n1) by n2, + !> with n1 = min(m,n)/2, n2 = n-n1. + !> The subroutine calls itself to factor B11, solves for B21, + !> solves for B12, updates B22, then calls itself to factor B22. + !> For more details on the recursive LU algorithm, see [2]. + !> ZLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked + !> routine ZLAUNHR_COL_GETRFNP, which uses blocked code calling + !> Level 3 BLAS to update the submatrix. However, ZLAUNHR_COL_GETRFNP2 + !> is self-sufficient and can be used without ZLAUNHR_COL_GETRFNP. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + !> [2] "Recursion leads to automatic variable blocking for dense linear + !> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev., + !> vol. 41, no. 6, pp. 737-755, 1997. + + pure recursive subroutine stdlib_zlaunhr_col_getrfnp2( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: d(*) + ! ===================================================================== + + + ! Local Scalars + real(dp) :: sfmin + integer(ilp) :: i, iinfo, n1, n2 + complex(dp) :: z + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,sign,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_zscal( m-1, cone / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 2, m + a( i, 1 ) = a( i, 1 ) / a( 1, 1 ) + end do + end if + else + ! divide the matrix b into four submatrices + n1 = min( m, n ) / 2 + n2 = n-n1 + ! factor b11, recursive call + call stdlib_zlaunhr_col_getrfnp2( n1, n1, a, lda, d, iinfo ) + ! solve for b21 + call stdlib_ztrsm( 'R', 'U', 'N', 'N', m-n1, n1, cone, a, lda,a( n1+1, 1 ), lda ) + + ! solve for b12 + call stdlib_ztrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1, n1+1 ), lda ) + + ! update b22, i.e. compute the schur complement + ! b22 := b22 - b21*b12 + call stdlib_zgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, cone, a( n1+1, n1+1 ), lda ) + ! factor b22, recursive call + call stdlib_zlaunhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo ) + + end if + return + end subroutine stdlib_zlaunhr_col_getrfnp2 + + !> ZLAUU2: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the unblocked form of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zlauu2( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + real(dp) :: aii + ! Intrinsic Functions + intrinsic :: real,cmplx,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLAUUM: computes the product U * U**H or L**H * L, where the triangular + !> factor U or L is stored in the upper or lower triangular part of + !> the array A. + !> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + !> overwriting the factor U in A. + !> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + !> overwriting the factor L in A. + !> This is the blocked form of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zlauum( uplo, n, a, lda, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ib, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_zlauu2( uplo, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute the product u * u**h. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', i-1, & + ib, cone, a( i, i ), lda,a( 1, i ), lda ) + call stdlib_zlauu2( 'UPPER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',i-1, ib, n-i-ib+1,& + cone, a( 1, i+ib ),lda, a( i, i+ib ), lda, cone, a( 1, i ),lda ) + call stdlib_zherk( 'UPPER', 'NO TRANSPOSE', ib, n-i-ib+1,one, a( i, i+ib ),& + lda, one, a( i, i ),lda ) + end if + end do + else + ! compute the product l**h * l. + do i = 1, n, nb + ib = min( nb, n-i+1 ) + call stdlib_ztrmm( 'LEFT', 'LOWER', 'CONJUGATE TRANSPOSE','NON-UNIT', ib, i-1,& + cone, a( i, i ), lda,a( i, 1 ), lda ) + call stdlib_zlauu2( 'LOWER', ib, a( i, i ), lda, info ) + if( i+ib<=n ) then + call stdlib_zgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', ib,i-1, n-i-ib+1,& + cone, a( i+ib, i ), lda,a( i+ib, 1 ), lda, cone, a( i, 1 ), lda ) + call stdlib_zherk( 'LOWER', 'CONJUGATE TRANSPOSE', ib,n-i-ib+1, one, a( i+& + ib, i ), lda, one,a( i, i ), lda ) + end if + end do + end if + end if + return + end subroutine stdlib_zlauum + + !> ZPBCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite band matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> ZPBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zpbcon( uplo, n, kd, ab, ldab, anorm, rcond, work,rwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(dp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab ZPBEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite band matrix A and reduce its condition + !> number (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_zpbequ( uplo, n, kd, ab, ldab, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(out) :: s(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j + real(dp) :: smin + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab ZPBSTF: computes a split Cholesky factorization of a complex + !> Hermitian positive definite band matrix A. + !> This routine is designed to be used in conjunction with ZHBGST. + !> The factorization has the form A = S**H*S where S is a band matrix + !> of the same bandwidth as A and the following structure: + !> S = ( U ) + !> ( M L ) + !> where U is upper triangular of order m = (n+kd)/2, and L is lower + !> triangular of order n-m. + + pure subroutine stdlib_zpbstf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, km, m + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_zdscal( km, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_zlacgv( km, ab( kd, j+1 ), kld ) + call stdlib_zher( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + call stdlib_zlacgv( km, ab( kd, j+1 ), kld ) + end if + end do + else + ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m). + do j = n, m + 1, -1 + ! compute s(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=dp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 50 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( j-1, kd ) + ! compute elements j-km:j-1 of the j-th row and update the + ! trailing submatrix within the band. + call stdlib_zdscal( km, one / ajj, ab( km+1, j-km ), kld ) + call stdlib_zlacgv( km, ab( km+1, j-km ), kld ) + call stdlib_zher( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1, j-km ), kld ) + + call stdlib_zlacgv( km, ab( km+1, j-km ), kld ) + end do + ! factorize the updated submatrix a(1:m,1:m) as u**h*u. + do j = 1, m + ! compute s(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=dp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 50 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + km = min( kd, m-j ) + ! compute elements j+1:j+km of the j-th column and update the + ! trailing submatrix within the band. + if( km>0 ) then + call stdlib_zdscal( km, one / ajj, ab( 2, j ), 1 ) + call stdlib_zher( 'LOWER', km, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 50 continue + info = j + return + end subroutine stdlib_zpbstf + + !> ZPBTF2: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U , if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix, U**H is the conjugate transpose + !> of U, and L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zpbtf2( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, kld, kn + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldab0 ) then + call stdlib_zdscal( kn, one / ajj, ab( kd, j+1 ), kld ) + call stdlib_zlacgv( kn, ab( kd, j+1 ), kld ) + call stdlib_zher( 'UPPER', kn, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld ) + + call stdlib_zlacgv( kn, ab( kd, j+1 ), kld ) + end if + end do + else + ! compute the cholesky factorization a = l*l**h. + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = real( ab( 1, j ),KIND=dp) + if( ajj<=zero ) then + ab( 1, j ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ab( 1, j ) = ajj + ! compute elements j+1:j+kn of column j and update the + ! trailing submatrix within the band. + kn = min( kd, n-j ) + if( kn>0 ) then + call stdlib_zdscal( kn, one / ajj, ab( 2, j ), 1 ) + call stdlib_zher( 'LOWER', kn, -one, ab( 2, j ), 1,ab( 1, j+1 ), kld ) + end if + end do + end if + return + 30 continue + info = j + return + end subroutine stdlib_zpbtf2 + + !> ZPBTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite band matrix A using the Cholesky factorization + !> A = U**H *U or A = L*L**H computed by ZPBTRF. + + pure subroutine stdlib_zpbtrs( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab ZPOCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite matrix using the + !> Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zpocon( uplo, n, a, lda, anorm, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(dp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZPOEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_zpoequ( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(out) :: s(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: smin + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( lda ZPOEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A and reduce its condition number + !> (with respect to the two-norm). S contains the scale factors, + !> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with + !> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This + !> choice of S puts the condition number of B within a factor N of the + !> smallest possible condition number over all possible diagonal + !> scalings. + !> This routine differs from ZPOEQU by restricting the scaling factors + !> to a power of the radix. Barring over- and underflow, scaling by + !> these factors introduces no additional rounding errors. However, the + !> scaled diagonal entries are no longer approximately 1 but lie + !> between sqrt(radix) and 1/sqrt(radix). + + pure subroutine stdlib_zpoequb( n, a, lda, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: smin, base, tmp + ! Intrinsic Functions + intrinsic :: max,min,sqrt,log,int,real,aimag + ! Executable Statements + ! test the input parameters. + ! positive definite only performs 1 pass of equilibration. + info = 0 + if( n<0 ) then + info = -1 + else if( lda ZPOTF2: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U , if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zpotf2( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: real,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZPOTRF2: computes the Cholesky factorization of a Hermitian + !> positive definite matrix A using the recursive algorithm. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = n/2 + !> [ A21 | A22 ] n2 = n-n1 + !> The subroutine calls itself to factor A11. Update and scale A21 + !> or A12, update A22 then call itself to factor A22. + + pure recursive subroutine stdlib_zpotrf2( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: n1, n2, iinfo + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: max,real,sqrt + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZPOTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H * U or A = L * L**H computed by ZPOTRF. + + pure subroutine stdlib_zpotrs( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZPPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite packed matrix using + !> the Cholesky factorization A = U**H*U or A = L*L**H computed by + !> ZPPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zppcon( uplo, n, ap, anorm, rcond, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + character :: normin + integer(ilp) :: ix, kase + real(dp) :: ainvnm, scale, scalel, scaleu, smlnum + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm ZPPEQU: computes row and column scalings intended to equilibrate a + !> Hermitian positive definite matrix A in packed storage and reduce + !> its condition number (with respect to the two-norm). S contains the + !> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix + !> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. + !> This choice of S puts the condition number of B within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_zppequ( uplo, n, ap, s, scond, amax, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: amax, scond + ! Array Arguments + real(dp), intent(out) :: s(*) + complex(dp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, jj + real(dp) :: smin + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPPEQU', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + scond = one + amax = zero + return + end if + ! initialize smin and amax. + s( 1 ) = real( ap( 1 ),KIND=dp) + smin = s( 1 ) + amax = s( 1 ) + if( upper ) then + ! uplo = 'u': upper triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + i + s( i ) = real( ap( jj ),KIND=dp) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + else + ! uplo = 'l': lower triangle of a is stored. + ! find the minimum and maximum diagonal elements. + jj = 1 + do i = 2, n + jj = jj + n - i + 2 + s( i ) = real( ap( jj ),KIND=dp) + smin = min( smin, s( i ) ) + amax = max( amax, s( i ) ) + end do + end if + if( smin<=zero ) then + ! find the first non-positive diagonal element and return. + do i = 1, n + if( s( i )<=zero ) then + info = i + return + end if + end do + else + ! set the scale factors to the reciprocals + ! of the diagonal elements. + do i = 1, n + s( i ) = one / sqrt( s( i ) ) + end do + ! compute scond = min(s(i)) / max(s(i)) + scond = sqrt( smin ) / sqrt( amax ) + end if + return + end subroutine stdlib_zppequ + + !> ZPPTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A stored in packed format. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_zpptrf( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: real,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPPTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( upper ) then + ! compute the cholesky factorization a = u**h * u. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + ! compute elements 1:j-1 of column j. + if( j>1 )call stdlib_ztpsv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',j-1, ap, & + ap( jc ), 1 ) + ! compute u(j,j) and test for non-positive-definiteness. + ajj = real( ap( jj ),KIND=dp) - real( stdlib_zdotc( j-1,ap( jc ), 1, ap( jc ), 1 & + ),KIND=dp) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ap( jj ) = sqrt( ajj ) + end do + else + ! compute the cholesky factorization a = l * l**h. + jj = 1 + do j = 1, n + ! compute l(j,j) and test for non-positive-definiteness. + ajj = real( ap( jj ),KIND=dp) + if( ajj<=zero ) then + ap( jj ) = ajj + go to 30 + end if + ajj = sqrt( ajj ) + ap( jj ) = ajj + ! compute elements j+1:n of column j and update the trailing + ! submatrix. + if( j ZPPTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A in packed storage using the Cholesky + !> factorization A = U**H * U or A = L * L**H computed by ZPPTRF. + + pure subroutine stdlib_zpptrs( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZPSTF2: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 2 BLAS. + + pure subroutine stdlib_zpstf2( uplo, n, a, lda, piv, rank, tol, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: tol + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(2*n) + integer(ilp), intent(out) :: piv(n) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: ztemp + real(dp) :: ajj, dstop, dtemp + integer(ilp) :: i, itemp, j, pvt + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: real,conjg,max,sqrt + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + work( i ) = work( i ) +real( conjg( a( j-1, i ) )*a( j-1, i ),KIND=dp) + + end if + work( n+i ) = real( a( i, i ),KIND=dp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_disnan( ajj ) ) then + a( j, j ) = ajj + go to 190 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_zswap( j-1, a( 1, j ), 1, a( 1, pvt ), 1 ) + if( pvt1 ) then + work( i ) = work( i ) +real( conjg( a( i, j-1 ) )*a( i, j-1 ),KIND=dp) + + end if + work( n+i ) = real( a( i, i ),KIND=dp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_disnan( ajj ) ) then + a( j, j ) = ajj + go to 190 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_zswap( j-1, a( j, 1 ), lda, a( pvt, 1 ), lda ) + if( pvt ZPSTRF: computes the Cholesky factorization with complete + !> pivoting of a complex Hermitian positive semidefinite matrix A. + !> The factorization has the form + !> P**T * A * P = U**H * U , if UPLO = 'U', + !> P**T * A * P = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular, and + !> P is stored as vector PIV. + !> This algorithm does not attempt to check that A is positive + !> semidefinite. This version of the algorithm calls level 3 BLAS. + + pure subroutine stdlib_zpstrf( uplo, n, a, lda, piv, rank, tol, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: tol + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, n + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: work(2*n) + integer(ilp), intent(out) :: piv(n) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: ztemp + real(dp) :: ajj, dstop, dtemp + integer(ilp) :: i, itemp, j, jb, k, nb, pvt + logical(lk) :: upper + ! Intrinsic Functions + intrinsic :: real,conjg,max,min,sqrt,maxloc + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code + call stdlib_zpstf2( uplo, n, a( 1, 1 ), lda, piv, rank, tol, work,info ) + go to 230 + else + ! initialize piv + do i = 1, n + piv( i ) = i + end do + ! compute stopping value + do i = 1, n + work( i ) = real( a( i, i ),KIND=dp) + end do + pvt = maxloc( work( 1:n ), 1 ) + ajj = real( a( pvt, pvt ),KIND=dp) + if( ajj<=zero.or.stdlib_disnan( ajj ) ) then + rank = 0 + info = 1 + go to 230 + end if + ! compute stopping value if not supplied + if( tolk ) then + work( i ) = work( i ) +real( conjg( a( j-1, i ) )*a( j-1, i ),& + KIND=dp) + end if + work( n+i ) = real( a( i, i ),KIND=dp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_disnan( ajj ) ) then + a( j, j ) = ajj + go to 220 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_zswap( j-1, a( 1, j ), 1, a( 1, pvt ), 1 ) + if( pvtk ) then + work( i ) = work( i ) +real( conjg( a( i, j-1 ) )*a( i, j-1 ),& + KIND=dp) + end if + work( n+i ) = real( a( i, i ),KIND=dp) - work( i ) + end do + if( j>1 ) then + itemp = maxloc( work( (n+j):(2*n) ), 1 ) + pvt = itemp + j - 1 + ajj = work( n+pvt ) + if( ajj<=dstop.or.stdlib_disnan( ajj ) ) then + a( j, j ) = ajj + go to 220 + end if + end if + if( j/=pvt ) then + ! pivot ok, so can now swap pivot rows and columns + a( pvt, pvt ) = a( j, j ) + call stdlib_zswap( j-1, a( j, 1 ), lda, a( pvt, 1 ), lda ) + if( pvt ZPTCON: computes the reciprocal of the condition number (in the + !> 1-norm) of a complex Hermitian positive definite tridiagonal matrix + !> using the factorization A = L*D*L**H or A = U**H*D*U computed by + !> ZPTTRF. + !> Norm(inv(A)) is computed by a direct method, and the reciprocal of + !> the condition number is computed as + !> RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zptcon( n, d, e, anorm, rcond, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(in) :: d(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ix + real(dp) :: ainvnm + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input arguments. + info = 0 + if( n<0 ) then + info = -1 + else if( anorm ZPTTRF: computes the L*D*L**H factorization of a complex Hermitian + !> positive definite tridiagonal matrix A. The factorization may also + !> be regarded as having the form A = U**H *D*U. + + pure subroutine stdlib_zpttrf( n, d, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(inout) :: d(*) + complex(dp), intent(inout) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i4 + real(dp) :: eii, eir, f, g + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag,mod + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + call stdlib_xerbla( 'ZPTTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! compute the l*d*l**h (or u**h *d*u) factorization of a. + i4 = mod( n-1, 4 ) + do i = 1, i4 + if( d( i )<=zero ) then + info = i + go to 30 + end if + eir = real( e( i ),KIND=dp) + eii = aimag( e( i ) ) + f = eir / d( i ) + g = eii / d( i ) + e( i ) = cmplx( f, g,KIND=dp) + d( i+1 ) = d( i+1 ) - f*eir - g*eii + end do + loop_20: do i = i4 + 1, n - 4, 4 + ! drop out of the loop if d(i) <= 0: the matrix is not positive + ! definite. + if( d( i )<=zero ) then + info = i + go to 30 + end if + ! solve for e(i) and d(i+1). + eir = real( e( i ),KIND=dp) + eii = aimag( e( i ) ) + f = eir / d( i ) + g = eii / d( i ) + e( i ) = cmplx( f, g,KIND=dp) + d( i+1 ) = d( i+1 ) - f*eir - g*eii + if( d( i+1 )<=zero ) then + info = i + 1 + go to 30 + end if + ! solve for e(i+1) and d(i+2). + eir = real( e( i+1 ),KIND=dp) + eii = aimag( e( i+1 ) ) + f = eir / d( i+1 ) + g = eii / d( i+1 ) + e( i+1 ) = cmplx( f, g,KIND=dp) + d( i+2 ) = d( i+2 ) - f*eir - g*eii + if( d( i+2 )<=zero ) then + info = i + 2 + go to 30 + end if + ! solve for e(i+2) and d(i+3). + eir = real( e( i+2 ),KIND=dp) + eii = aimag( e( i+2 ) ) + f = eir / d( i+2 ) + g = eii / d( i+2 ) + e( i+2 ) = cmplx( f, g,KIND=dp) + d( i+3 ) = d( i+3 ) - f*eir - g*eii + if( d( i+3 )<=zero ) then + info = i + 3 + go to 30 + end if + ! solve for e(i+3) and d(i+4). + eir = real( e( i+3 ),KIND=dp) + eii = aimag( e( i+3 ) ) + f = eir / d( i+3 ) + g = eii / d( i+3 ) + e( i+3 ) = cmplx( f, g,KIND=dp) + d( i+4 ) = d( i+4 ) - f*eir - g*eii + end do loop_20 + ! check d(n) for positive definiteness. + if( d( n )<=zero )info = n + 30 continue + return + end subroutine stdlib_zpttrf + + !> ZPTTS2: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H *D*U or A = L*D*L**H computed by ZPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + + pure subroutine stdlib_zptts2( iuplo, n, nrhs, d, e, b, ldb ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: iuplo, ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: d(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: e(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, j + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + if( n==1 )call stdlib_zdscal( nrhs, 1._dp / d( 1 ), b, ldb ) + return + end if + if( iuplo==1 ) then + ! solve a * x = b using the factorization a = u**h *d*u, + ! overwriting each right hand side vector with its solution. + if( nrhs<=2 ) then + j = 1 + 10 continue + ! solve u**h * x = b. + do i = 2, n + b( i, j ) = b( i, j ) - b( i-1, j )*conjg( e( i-1 ) ) + end do + ! solve d * u * x = b. + do i = 1, n + b( i, j ) = b( i, j ) / d( i ) + end do + do i = n - 1, 1, -1 + b( i, j ) = b( i, j ) - b( i+1, j )*e( i ) + end do + if( j ZROT: applies a plane rotation, where the cos (C) is real and the + !> sin (S) is complex, and the vectors CX and CY are complex. + + pure subroutine stdlib_zrot( n, cx, incx, cy, incy, c, s ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(dp), intent(in) :: c + complex(dp), intent(in) :: s + ! Array Arguments + complex(dp), intent(inout) :: cx(*), cy(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ix, iy + complex(dp) :: stemp + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + if( n<=0 )return + if( incx==1 .and. incy==1 )go to 20 + ! code for unequal increments or equal increments not equal to 1 + ix = 1 + iy = 1 + if( incx<0 )ix = ( -n+1 )*incx + 1 + if( incy<0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + stemp = c*cx( ix ) + s*cy( iy ) + cy( iy ) = c*cy( iy ) - conjg( s )*cx( ix ) + cx( ix ) = stemp + ix = ix + incx + iy = iy + incy + end do + return + ! code for both increments equal to 1 + 20 continue + do i = 1, n + stemp = c*cx( i ) + s*cy( i ) + cy( i ) = c*cy( i ) - conjg( s )*cx( i ) + cx( i ) = stemp + end do + return + end subroutine stdlib_zrot + + !> ZSPMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_zspmv( uplo, n, alpha, ap, x, incx, beta, y, incy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, incy, n + complex(dp), intent(in) :: alpha, beta + ! Array Arguments + complex(dp), intent(in) :: ap(*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky + complex(dp) :: temp1, temp2 + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 6 + else if( incy==0 ) then + info = 9 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPMV ', info ) + return + end if + ! quick return if possible. + if( ( n==0 ) .or. ( ( alpha==czero ) .and. ( beta==cone ) ) )return + ! set up the start points in x and y. + if( incx>0 ) then + kx = 1 + else + kx = 1 - ( n-1 )*incx + end if + if( incy>0 ) then + ky = 1 + else + ky = 1 - ( n-1 )*incy + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + ! first form y := beta*y. + if( beta/=cone ) then + if( incy==1 ) then + if( beta==czero ) then + do i = 1, n + y( i ) = czero + end do + else + do i = 1, n + y( i ) = beta*y( i ) + end do + end if + else + iy = ky + if( beta==czero ) then + do i = 1, n + y( iy ) = czero + iy = iy + incy + end do + else + do i = 1, n + y( iy ) = beta*y( iy ) + iy = iy + incy + end do + end if + end if + end if + if( alpha==czero )return + kk = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! form y when ap contains the upper triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + k = kk + do i = 1, j - 1 + y( i ) = y( i ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( i ) + k = k + 1 + end do + y( j ) = y( j ) + temp1*ap( kk+j-1 ) + alpha*temp2 + kk = kk + j + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + ix = kx + iy = ky + do k = kk, kk + j - 2 + y( iy ) = y( iy ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( ix ) + ix = ix + incx + iy = iy + incy + end do + y( jy ) = y( jy ) + temp1*ap( kk+j-1 ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + j + end do + end if + else + ! form y when ap contains the lower triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + y( j ) = y( j ) + temp1*ap( kk ) + k = kk + 1 + do i = j + 1, n + y( i ) = y( i ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( i ) + k = k + 1 + end do + y( j ) = y( j ) + alpha*temp2 + kk = kk + ( n-j+1 ) + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + y( jy ) = y( jy ) + temp1*ap( kk ) + ix = jx + iy = jy + do k = kk + 1, kk + n - j + ix = ix + incx + iy = iy + incy + y( iy ) = y( iy ) + temp1*ap( k ) + temp2 = temp2 + ap( k )*x( ix ) + end do + y( jy ) = y( jy ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + kk = kk + ( n-j+1 ) + end do + end if + end if + return + end subroutine stdlib_zspmv + + !> ZSPR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix, supplied in packed form. + + pure subroutine stdlib_zspr( uplo, n, alpha, x, incx, ap ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, n + complex(dp), intent(in) :: alpha + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, ix, j, jx, k, kk, kx + complex(dp) :: temp + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 5 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPR ', info ) + return + end if + ! quick return if possible. + if( ( n==0 ) .or. ( alpha==czero ) )return + ! set the start point in x if the increment is not unity. + if( incx<=0 ) then + kx = 1 - ( n-1 )*incx + else if( incx/=1 ) then + kx = 1 + end if + ! start the operations. in this version the elements of the array ap + ! are accessed sequentially with cone pass through ap. + kk = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! form a when upper triangle is stored in ap. + if( incx==1 ) then + do j = 1, n + if( x( j )/=czero ) then + temp = alpha*x( j ) + k = kk + do i = 1, j - 1 + ap( k ) = ap( k ) + x( i )*temp + k = k + 1 + end do + ap( kk+j-1 ) = ap( kk+j-1 ) + x( j )*temp + else + ap( kk+j-1 ) = ap( kk+j-1 ) + end if + kk = kk + j + end do + else + jx = kx + do j = 1, n + if( x( jx )/=czero ) then + temp = alpha*x( jx ) + ix = kx + do k = kk, kk + j - 2 + ap( k ) = ap( k ) + x( ix )*temp + ix = ix + incx + end do + ap( kk+j-1 ) = ap( kk+j-1 ) + x( jx )*temp + else + ap( kk+j-1 ) = ap( kk+j-1 ) + end if + jx = jx + incx + kk = kk + j + end do + end if + else + ! form a when lower triangle is stored in ap. + if( incx==1 ) then + do j = 1, n + if( x( j )/=czero ) then + temp = alpha*x( j ) + ap( kk ) = ap( kk ) + temp*x( j ) + k = kk + 1 + do i = j + 1, n + ap( k ) = ap( k ) + x( i )*temp + k = k + 1 + end do + else + ap( kk ) = ap( kk ) + end if + kk = kk + n - j + 1 + end do + else + jx = kx + do j = 1, n + if( x( jx )/=czero ) then + temp = alpha*x( jx ) + ap( kk ) = ap( kk ) + temp*x( jx ) + ix = jx + do k = kk + 1, kk + n - j + ix = ix + incx + ap( k ) = ap( k ) + x( ix )*temp + end do + else + ap( kk ) = ap( kk ) + end if + jx = jx + incx + kk = kk + n - j + 1 + end do + end if + end if + return + end subroutine stdlib_zspr + + !> ZSPTRF: computes the factorization of a complex symmetric matrix A + !> stored in packed format using the Bunch-Kaufman diagonal pivoting + !> method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + + pure subroutine stdlib_zsptrf( uplo, n, ap, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp + real(dp) :: absakk, alpha, colmax, rowmax + complex(dp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPTRF', -info ) + return + end if + ! initialize alpha for use in choosing pivot block size. + alpha = ( one+sqrt( sevten ) ) / eight + if( upper ) then + ! factorize a as u*d*u**t using the upper triangle of a + ! k is the main loop index, decreasing from n to 1 in steps of + ! 1 or 2 + k = n + kc = ( n-1 )*n / 2 + 1 + 10 continue + knc = kc + ! if k < 1, exit from loop + if( k<1 )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( ap( kc+k-1 ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k>1 ) then + imax = stdlib_izamax( k-1, ap( kc ), 1 ) + colmax = cabs1( ap( kc+imax-1 ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero ) then + ! column k is zero: set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + rowmax = zero + jmax = imax + kx = imax*( imax+1 ) / 2 + imax + do j = imax + 1, k + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + j + end do + kpc = ( imax-1 )*imax / 2 + 1 + if( imax>1 ) then + jmax = stdlib_izamax( imax-1, ap( kpc ), 1 ) + rowmax = max( rowmax, cabs1( ap( kpc+jmax-1 ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( ap( kpc+imax-1 ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kstep==2 )knc = knc - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_zswap( kp-1, ap( knc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, kk - 1 + kx = kx + j - 1 + t = ap( knc+j-1 ) + ap( knc+j-1 ) = ap( kx ) + ap( kx ) = t + end do + t = ap( knc+kk-1 ) + ap( knc+kk-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = t + if( kstep==2 ) then + t = ap( kc+k-2 ) + ap( kc+k-2 ) = ap( kc+kp-1 ) + ap( kc+kp-1 ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = cone / ap( kc+k-1 ) + call stdlib_zspr( uplo, k-1, -r1, ap( kc ), 1, ap ) + ! store u(k) in column k + call stdlib_zscal( k-1, r1, ap( kc ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = ap( k-1+( k-1 )*k / 2 ) + d22 = ap( k-1+( k-2 )*( k-1 ) / 2 ) / d12 + d11 = ap( k+( k-1 )*k / 2 ) / d12 + t = cone / ( d11*d22-cone ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*ap( j+( k-2 )*( k-1 ) / 2 )-ap( j+( k-1 )*k / 2 ) ) + + wk = d12*( d22*ap( j+( k-1 )*k / 2 )-ap( j+( k-2 )*( k-1 ) / 2 ) ) + + do i = j, 1, -1 + ap( i+( j-1 )*j / 2 ) = ap( i+( j-1 )*j / 2 ) -ap( i+( k-1 )*k / 2 )& + *wk -ap( i+( k-2 )*( k-1 ) / 2 )*wkm1 + end do + ap( j+( k-1 )*k / 2 ) = wk + ap( j+( k-2 )*( k-1 ) / 2 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + kc = knc - k + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + kc = 1 + npp = n*( n+1 ) / 2 + 60 continue + knc = kc + ! if k > n, exit from loop + if( k>n )go to 110 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( ap( kc ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + rowmax = zero + kx = kc + imax - k + do j = k, imax - 1 + if( cabs1( ap( kx ) )>rowmax ) then + rowmax = cabs1( ap( kx ) ) + jmax = j + end if + kx = kx + n - j + end do + kpc = npp - ( n-imax+1 )*( n-imax+2 ) / 2 + 1 + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( ap( kpc ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kstep==2 )knc = knc + n - k + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZSPTRI: computes the inverse of a complex symmetric indefinite matrix + !> A in packed storage using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSPTRF. + + pure subroutine stdlib_zsptri( uplo, n, ap, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp + complex(dp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZSPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! check that the diagonal matrix d is nonsingular. + if( upper ) then + ! upper triangular storage: examine d from bottom to top + kp = n*( n+1 ) / 2 + do info = n, 1, -1 + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp - info + end do + else + ! lower triangular storage: examine d from top to bottom. + kp = 1 + do info = 1, n + if( ipiv( info )>0 .and. ap( kp )==czero )return + kp = kp + n - info + 1 + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + kcnext = kc + k + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc+k-1 ) = cone / ap( kc+k-1 ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_zspmv( uplo, k-1, -cone, ap, work, 1, czero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_zdotu( k-1, work, 1, ap( kc ), 1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = ap( kcnext+k-1 ) + ak = ap( kc+k-1 ) / t + akp1 = ap( kcnext+k ) / t + akkp1 = ap( kcnext+k-1 ) / t + d = t*( ak*akp1-cone ) + ap( kc+k-1 ) = akp1 / d + ap( kcnext+k ) = ak / d + ap( kcnext+k-1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, ap( kc ), 1, work, 1 ) + call stdlib_zspmv( uplo, k-1, -cone, ap, work, 1, czero, ap( kc ),1 ) + ap( kc+k-1 ) = ap( kc+k-1 ) -stdlib_zdotu( k-1, work, 1, ap( kc ), 1 ) + ap( kcnext+k-1 ) = ap( kcnext+k-1 ) -stdlib_zdotu( k-1, ap( kc ), 1, ap( & + kcnext ),1 ) + call stdlib_zcopy( k-1, ap( kcnext ), 1, work, 1 ) + call stdlib_zspmv( uplo, k-1, -cone, ap, work, 1, czero,ap( kcnext ), 1 ) + + ap( kcnext+k ) = ap( kcnext+k ) -stdlib_zdotu( k-1, work, 1, ap( kcnext ), 1 ) + + end if + kstep = 2 + kcnext = kcnext + k + 1 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + kpc = ( kp-1 )*kp / 2 + 1 + call stdlib_zswap( kp-1, ap( kc ), 1, ap( kpc ), 1 ) + kx = kpc + kp - 1 + do j = kp + 1, k - 1 + kx = kx + j - 1 + temp = ap( kc+j-1 ) + ap( kc+j-1 ) = ap( kx ) + ap( kx ) = temp + end do + temp = ap( kc+k-1 ) + ap( kc+k-1 ) = ap( kpc+kp-1 ) + ap( kpc+kp-1 ) = temp + if( kstep==2 ) then + temp = ap( kc+k+k-1 ) + ap( kc+k+k-1 ) = ap( kc+k+kp-1 ) + ap( kc+k+kp-1 ) = temp + end if + end if + k = k + kstep + kc = kcnext + go to 30 + 50 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + npp = n*( n+1 ) / 2 + k = n + kc = npp + 60 continue + ! if k < 1, exit from loop. + if( k<1 )go to 80 + kcnext = kc - ( n-k+2 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + ap( kc ) = cone / ap( kc ) + ! compute column k of the inverse. + if( k ZSPTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A stored in packed format using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. + + pure subroutine stdlib_zsptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgeru( k-1, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + call stdlib_zscal( nrhs, cone / ap( kc+k-1 ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_zgeru( k-2, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_zgeru( k-2, nrhs, -cone, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, & + 1 ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, ap( kc ),1, cone, b( k,& + 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, ap( kc ),1, cone, b( k,& + 1 ), ldb ) + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb,ap( kc+k ), 1, cone, b( & + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZSTEIN: computes the eigenvectors of a real symmetric tridiagonal + !> matrix T corresponding to specified eigenvalues, using inverse + !> iteration. + !> The maximum number of iterations allowed for each eigenvector is + !> specified by an internal parameter MAXITS (currently set to 5). + !> Although the eigenvectors are real, they are stored in a complex + !> array, which may be passed to ZUNMTR or ZUPMTR for back + !> transformation to the eigenvectors of a complex Hermitian matrix + !> which was reduced to tridiagonal form. + + pure subroutine stdlib_zstein( n, d, e, m, w, iblock, isplit, z, ldz, work,iwork, ifail, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, m, n + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), isplit(*) + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(in) :: d(*), e(*), w(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: odm3 = 1.0e-3_dp + real(dp), parameter :: odm1 = 1.0e-1_dp + integer(ilp), parameter :: maxits = 5 + integer(ilp), parameter :: extra = 2 + + + + ! Local Scalars + integer(ilp) :: b1, blksiz, bn, gpind, i, iinfo, indrv1, indrv2, indrv3, indrv4, & + indrv5, its, j, j1, jblk, jmax, jr, nblk, nrmchk + real(dp) :: dtpcrt, eps, eps1, nrm, onenrm, ortol, pertol, scl, sep, tol, xj, xjm, & + ztr + ! Local Arrays + integer(ilp) :: iseed(4) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + do i = 1, m + ifail( i ) = 0 + end do + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -4 + else if( ldz1 ) then + eps1 = abs( eps*xj ) + pertol = ten*eps1 + sep = xj - xjm + if( sepmaxits )go to 120 + ! normalize and scale the righthand side vector pb. + jmax = stdlib_idamax( blksiz, work( indrv1+1 ), 1 ) + scl = blksiz*onenrm*max( eps,abs( work( indrv4+blksiz ) ) ) /abs( work( indrv1+& + jmax ) ) + call stdlib_dscal( blksiz, scl, work( indrv1+1 ), 1 ) + ! solve the system lu = pb. + call stdlib_dlagts( -1, blksiz, work( indrv4+1 ), work( indrv2+2 ),work( indrv3+& + 1 ), work( indrv5+1 ), iwork,work( indrv1+1 ), tol, iinfo ) + ! reorthogonalize by modified gram-schmidt if eigenvalues are + ! close enough. + if( jblk==1 )go to 110 + if( abs( xj-xjm )>ortol )gpind = j + if( gpind/=j ) then + do i = gpind, j - 1 + ztr = zero + do jr = 1, blksiz + ztr = ztr + work( indrv1+jr )*real( z( b1-1+jr, i ),KIND=dp) + end do + do jr = 1, blksiz + work( indrv1+jr ) = work( indrv1+jr ) -ztr*real( z( b1-1+jr, i ),& + KIND=dp) + end do + end do + end if + ! check the infinity norm of the iterate. + 110 continue + jmax = stdlib_idamax( blksiz, work( indrv1+1 ), 1 ) + nrm = abs( work( indrv1+jmax ) ) + ! continue for additional iterations after norm reaches + ! stopping criterion. + if( nrm ZSTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the implicit QL or QR method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + !> matrix to tridiagonal form. + + pure subroutine stdlib_zsteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 30 + + + + ! Local Scalars + integer(ilp) :: i, icompz, ii, iscale, j, jtot, k, l, l1, lend, lendm1, lendp1, lendsv,& + lm1, lsv, m, mm, mm1, nm1, nmaxit + real(dp) :: anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, s, safmax, safmin, ssfmax, & + ssfmin, tst + ! Intrinsic Functions + intrinsic :: abs,max,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldzn )go to 160 + if( l1>1 )e( l1-1 ) = zero + if( l1<=nm1 ) then + do m = l1, nm1 + tst = abs( e( m ) ) + if( tst==zero )go to 30 + if( tst<=( sqrt( abs( d( m ) ) )*sqrt( abs( d( m+1 ) ) ) )*eps ) then + e( m ) = zero + go to 30 + end if + end do + end if + m = n + 30 continue + l = l1 + lsv = l + lend = m + lendsv = lend + l1 = m + 1 + if( lend==l )go to 10 + ! scale submatrix in rows and columns l to lend + anorm = stdlib_dlanst( 'I', lend-l+1, d( l ), e( l ) ) + iscale = 0 + if( anorm==zero )go to 10 + if( anorm>ssfmax ) then + iscale = 1 + call stdlib_dlascl( 'G', 0, 0, anorm, ssfmax, lend-l+1, 1, d( l ), n,info ) + call stdlib_dlascl( 'G', 0, 0, anorm, ssfmax, lend-l, 1, e( l ), n,info ) + else if( anorml ) then + ! ql iteration + ! look for small subdiagonal element. + 40 continue + if( l/=lend ) then + lendm1 = lend - 1 + do m = l, lendm1 + tst = abs( e( m ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m+1 ) )+safmin )go to 60 + end do + end if + m = lend + 60 continue + if( m0 ) then + call stdlib_dlaev2( d( l ), e( l ), d( l+1 ), rt1, rt2, c, s ) + work( l ) = c + work( n-1+l ) = s + call stdlib_zlasr( 'R', 'V', 'B', n, 2, work( l ),work( n-1+l ), z( 1, l ), & + ldz ) + else + call stdlib_dlae2( d( l ), e( l ), d( l+1 ), rt1, rt2 ) + end if + d( l ) = rt1 + d( l+1 ) = rt2 + e( l ) = zero + l = l + 2 + if( l<=lend )go to 40 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l+1 )-p ) / ( two*e( l ) ) + r = stdlib_dlapy2( g, one ) + g = d( m ) - p + ( e( l ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + mm1 = m - 1 + do i = mm1, l, -1 + f = s*e( i ) + b = c*e( i ) + call stdlib_dlartg( g, f, c, s, r ) + if( i/=m-1 )e( i+1 ) = r + g = d( i+1 ) - p + r = ( d( i )-g )*s + two*c*b + p = s*r + d( i+1 ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = -s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = m - l + 1 + call stdlib_zlasr( 'R', 'V', 'B', n, mm, work( l ), work( n-1+l ),z( 1, l ), ldz & + ) + end if + d( l ) = d( l ) - p + e( l ) = g + go to 40 + ! eigenvalue found. + 80 continue + d( l ) = p + l = l + 1 + if( l<=lend )go to 40 + go to 140 + else + ! qr iteration + ! look for small superdiagonal element. + 90 continue + if( l/=lend ) then + lendp1 = lend + 1 + do m = l, lendp1, -1 + tst = abs( e( m-1 ) )**2 + if( tst<=( eps2*abs( d( m ) ) )*abs( d( m-1 ) )+safmin )go to 110 + end do + end if + m = lend + 110 continue + if( m>lend )e( m-1 ) = zero + p = d( l ) + if( m==l )go to 130 + ! if remaining matrix is 2-by-2, use stdlib_dlae2 or stdlib_slaev2 + ! to compute its eigensystem. + if( m==l-1 ) then + if( icompz>0 ) then + call stdlib_dlaev2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2, c, s ) + work( m ) = c + work( n-1+m ) = s + call stdlib_zlasr( 'R', 'V', 'F', n, 2, work( m ),work( n-1+m ), z( 1, l-1 ), & + ldz ) + else + call stdlib_dlae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 ) + end if + d( l-1 ) = rt1 + d( l ) = rt2 + e( l-1 ) = zero + l = l - 2 + if( l>=lend )go to 90 + go to 140 + end if + if( jtot==nmaxit )go to 140 + jtot = jtot + 1 + ! form shift. + g = ( d( l-1 )-p ) / ( two*e( l-1 ) ) + r = stdlib_dlapy2( g, one ) + g = d( m ) - p + ( e( l-1 ) / ( g+sign( r, g ) ) ) + s = one + c = one + p = zero + ! inner loop + lm1 = l - 1 + do i = m, lm1 + f = s*e( i ) + b = c*e( i ) + call stdlib_dlartg( g, f, c, s, r ) + if( i/=m )e( i-1 ) = r + g = d( i ) - p + r = ( d( i+1 )-g )*s + two*c*b + p = s*r + d( i ) = g + p + g = c*r - b + ! if eigenvectors are desired, then save rotations. + if( icompz>0 ) then + work( i ) = c + work( n-1+i ) = s + end if + end do + ! if eigenvectors are desired, then apply saved rotations. + if( icompz>0 ) then + mm = l - m + 1 + call stdlib_zlasr( 'R', 'V', 'F', n, mm, work( m ), work( n-1+m ),z( 1, m ), ldz & + ) + end if + d( l ) = d( l ) - p + e( lm1 ) = g + go to 90 + ! eigenvalue found. + 130 continue + d( l ) = p + l = l - 1 + if( l>=lend )go to 90 + go to 140 + end if + ! undo scaling if necessary + 140 continue + if( iscale==1 ) then + call stdlib_dlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_dlascl( 'G', 0, 0, ssfmax, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + else if( iscale==2 ) then + call stdlib_dlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv+1, 1,d( lsv ), n, info ) + + call stdlib_dlascl( 'G', 0, 0, ssfmin, anorm, lendsv-lsv, 1, e( lsv ),n, info ) + + end if + ! check for no convergence to an eigenvalue after a total + ! of n*maxit iterations. + if( jtot==nmaxit ) then + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + return + end if + go to 10 + ! order eigenvalues and eigenvectors. + 160 continue + if( icompz==0 ) then + ! use quick sort + call stdlib_dlasrt( 'I', n, d, info ) + else + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

ZSYCONV: converts A given by ZHETRF into L and D or vice-versa. + !> Get nondiagonal elements of D (returned in workspace) and + !> apply or reverse permutation done in TRF. + + pure subroutine stdlib_zsyconv( uplo, way, n, a, lda, ipiv, e, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, j + complex(dp) :: temp + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda 1 ) + if( ipiv(i) < 0 ) then + e(i)=a(i-1,i) + e(i-1)=czero + a(i-1,i)=czero + i=i-1 + else + e(i)=czero + endif + i=i-1 + end do + ! convert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + i=i-1 + endif + i=i-1 + end do + else + ! revert a (a is upper) + ! revert permutations + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i+1 + if( i < n) then + do j= i+1,n + temp=a(ip,j) + a(ip,j)=a(i-1,j) + a(i-1,j)=temp + end do + endif + endif + i=i+1 + end do + ! revert value + i=n + do while ( i > 1 ) + if( ipiv(i) < 0 ) then + a(i-1,i)=e(i) + i=i-1 + endif + i=i-1 + end do + end if + else + ! a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + i=1 + e(n)=czero + do while ( i <= n ) + if( i 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i,j) + a(i,j)=temp + end do + endif + else + ip=-ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(ip,j) + a(ip,j)=a(i+1,j) + a(i+1,j)=temp + end do + endif + i=i+1 + endif + i=i+1 + end do + else + ! revert a (a is lower) + ! revert permutations + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + ip=ipiv(i) + if (i > 1) then + do j= 1,i-1 + temp=a(i,j) + a(i,j)=a(ip,j) + a(ip,j)=temp + end do + endif + else + ip=-ipiv(i) + i=i-1 + if (i > 1) then + do j= 1,i-1 + temp=a(i+1,j) + a(i+1,j)=a(ip,j) + a(ip,j)=temp + end do + endif + endif + i=i-1 + end do + ! revert value + i=1 + do while ( i <= n-1 ) + if( ipiv(i) < 0 ) then + a(i+1,i)=e(i) + i=i+1 + endif + i=i+1 + end do + end if + end if + return + end subroutine stdlib_zsyconv + + !> If parameter WAY = 'C': + !> ZSYCONVF: converts the factorization output format used in + !> ZSYTRF provided on entry in parameter A into the factorization + !> output format used in ZSYTRF_RK (or ZSYTRF_BK) that is stored + !> on exit in parameters A and E. It also converts in place details of + !> the intechanges stored in IPIV from the format used in ZSYTRF into + !> the format used in ZSYTRF_RK (or ZSYTRF_BK). + !> If parameter WAY = 'R': + !> ZSYCONVF performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in ZSYTRF_RK + !> (or ZSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in ZSYTRF that is stored + !> on exit in parameter A. It also converts in place details of + !> the intechanges stored in IPIV from the format used in ZSYTRF_RK + !> (or ZSYTRF_BK) into the format used in ZSYTRF. + !> ZSYCONVF can also convert in Hermitian matrix case, i.e. between + !> formats used in ZHETRF and ZHETRF_RK (or ZHETRF_BK). + + pure subroutine stdlib_zsyconvf( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = czero + a( i-1, i ) = czero + i = i - 1 + else + e( i ) = czero + end if + i = i - 1 + end do + ! convert permutations and ipiv + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and czero out + ! corresponding entries in input storage a + i = 1 + e( n ) = czero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_zswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_zswap( i-1, a( i+1, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is no interchnge of rows i and and ipiv(i), + ! so this should be reflected in ipiv format for + ! *sytrf_rk ( or *sytrf_bk) + ipiv( i ) = i + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations and ipiv + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_zswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + if ( i>1 ) then + if( ip/=(i+1) ) then + call stdlib_zswap( i-1, a( ip, 1 ), lda,a( i+1, 1 ), lda ) + end if + end if + ! convert ipiv + ! there is cone interchange of rows i+1 and ipiv(i+1), + ! so this should be recorded in consecutive entries + ! in ipiv format for *sytrf + ipiv( i ) = ipiv( i+1 ) + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_zsyconvf + + !> If parameter WAY = 'C': + !> ZSYCONVF_ROOK: converts the factorization output format used in + !> ZSYTRF_ROOK provided on entry in parameter A into the factorization + !> output format used in ZSYTRF_RK (or ZSYTRF_BK) that is stored + !> on exit in parameters A and E. IPIV format for ZSYTRF_ROOK and + !> ZSYTRF_RK (or ZSYTRF_BK) is the same and is not converted. + !> If parameter WAY = 'R': + !> ZSYCONVF_ROOK performs the conversion in reverse direction, i.e. + !> converts the factorization output format used in ZSYTRF_RK + !> (or ZSYTRF_BK) provided on entry in parameters A and E into + !> the factorization output format used in ZSYTRF_ROOK that is stored + !> on exit in parameter A. IPIV format for ZSYTRF_ROOK and + !> ZSYTRF_RK (or ZSYTRF_BK) is the same and is not converted. + !> ZSYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between + !> formats used in ZHETRF_ROOK and ZHETRF_RK (or ZHETRF_BK). + + pure subroutine stdlib_zsyconvf_rook( uplo, way, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, way + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), e(*) + ! ===================================================================== + + ! External Subroutines + logical(lk) :: upper, convert + integer(ilp) :: i, ip, ip2 + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + convert = stdlib_lsame( way, 'C' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.convert .and. .not.stdlib_lsame( way, 'R' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda1 ) + if( ipiv( i )<0 ) then + e( i ) = a( i-1, i ) + e( i-1 ) = czero + a( i-1, i ) = czero + i = i - 1 + else + e( i ) = czero + end if + i = i - 1 + end do + ! convert permutations + ! apply permutations to submatrices of upper part of a + ! in factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(1:i,n-i:n) + ip = ipiv( i ) + if( i1 ) + if( ipiv( i )<0 ) then + a( i-1, i ) = e( i ) + i = i - 1 + end if + i = i - 1 + end do + ! end a is upper + end if + else + ! begin a is lower + if ( convert ) then + ! convert a (a is lower) + ! convert value + ! assign subdiagonal entries of d to array e and czero out + ! corresponding entries in input storage a + i = 1 + e( n ) = czero + do while ( i<=n ) + if( i0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_zswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i and ipiv(i) and i+1 and ipiv(i+1) + ! in a(i:n,1:i-1) + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_zswap( i-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + if( ip2/=(i+1) ) then + call stdlib_zswap( i-1, a( i+1, 1 ), lda,a( ip2, 1 ), lda ) + end if + end if + i = i + 1 + end if + i = i + 1 + end do + else + ! revert a (a is lower) + ! revert permutations + ! apply permutations to submatrices of lower part of a + ! in reverse factorization order where i decreases from n to 1 + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + ! 1-by-1 pivot interchange + ! swap rows i and ipiv(i) in a(i:n,1:i-1) + ip = ipiv( i ) + if ( i>1 ) then + if( ip/=i ) then + call stdlib_zswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + else + ! 2-by-2 pivot interchange + ! swap rows i+1 and ipiv(i+1) and i and ipiv(i) + ! in a(i:n,1:i-1) + i = i - 1 + ip = -ipiv( i ) + ip2 = -ipiv( i+1 ) + if ( i>1 ) then + if( ip2/=(i+1) ) then + call stdlib_zswap( i-1, a( ip2, 1 ), lda,a( i+1, 1 ), lda ) + end if + if( ip/=i ) then + call stdlib_zswap( i-1, a( ip, 1 ), lda,a( i, 1 ), lda ) + end if + end if + end if + i = i - 1 + end do + ! revert value + ! assign subdiagonal entries of d from array e to + ! subgiagonal entries of a. + i = 1 + do while ( i<=n-1 ) + if( ipiv( i )<0 ) then + a( i + 1, i ) = e( i ) + i = i + 1 + end if + i = i + 1 + end do + end if + ! end a is lower + end if + return + end subroutine stdlib_zsyconvf_rook + + !> ZSYEQUB: computes row and column scalings intended to equilibrate a + !> symmetric matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_zsyequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(dp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,int,log,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'ZSYEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_dp / s( j ) + end do + tol = one / sqrt( 2.0_dp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + s( i ) * real( work( i ),KIND=dp) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_zlassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = cabs1( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( real( work( i ),KIND=dp) - t*si ) + c0 = -(t*si)*si + 2 * real( work( i ),KIND=dp) * si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + real( work( i ),KIND=dp) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_dlamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_dlamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_zsyequb + + !> ZSYMV: performs the matrix-vector operation + !> y := alpha*A*x + beta*y, + !> where alpha and beta are scalars, x and y are n element vectors and + !> A is an n by n symmetric matrix. + + pure subroutine stdlib_zsymv( uplo, n, alpha, a, lda, x, incx, beta, y, incy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, incy, lda, n + complex(dp), intent(in) :: alpha, beta + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), x(*) + complex(dp), intent(inout) :: y(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky + complex(dp) :: temp1, temp2 + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( lda0 ) then + kx = 1 + else + kx = 1 - ( n-1 )*incx + end if + if( incy>0 ) then + ky = 1 + else + ky = 1 - ( n-1 )*incy + end if + ! start the operations. in this version the elements of a are + ! accessed sequentially with cone pass through the triangular part + ! of a. + ! first form y := beta*y. + if( beta/=cone ) then + if( incy==1 ) then + if( beta==czero ) then + do i = 1, n + y( i ) = czero + end do + else + do i = 1, n + y( i ) = beta*y( i ) + end do + end if + else + iy = ky + if( beta==czero ) then + do i = 1, n + y( iy ) = czero + iy = iy + incy + end do + else + do i = 1, n + y( iy ) = beta*y( iy ) + iy = iy + incy + end do + end if + end if + end if + if( alpha==czero )return + if( stdlib_lsame( uplo, 'U' ) ) then + ! form y when a is stored in upper triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + do i = 1, j - 1 + y( i ) = y( i ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( i ) + end do + y( j ) = y( j ) + temp1*a( j, j ) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + ix = kx + iy = ky + do i = 1, j - 1 + y( iy ) = y( iy ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( ix ) + ix = ix + incx + iy = iy + incy + end do + y( jy ) = y( jy ) + temp1*a( j, j ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + else + ! form y when a is stored in lower triangle. + if( ( incx==1 ) .and. ( incy==1 ) ) then + do j = 1, n + temp1 = alpha*x( j ) + temp2 = czero + y( j ) = y( j ) + temp1*a( j, j ) + do i = j + 1, n + y( i ) = y( i ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( i ) + end do + y( j ) = y( j ) + alpha*temp2 + end do + else + jx = kx + jy = ky + do j = 1, n + temp1 = alpha*x( jx ) + temp2 = czero + y( jy ) = y( jy ) + temp1*a( j, j ) + ix = jx + iy = jy + do i = j + 1, n + ix = ix + incx + iy = iy + incy + y( iy ) = y( iy ) + temp1*a( i, j ) + temp2 = temp2 + a( i, j )*x( ix ) + end do + y( jy ) = y( jy ) + alpha*temp2 + jx = jx + incx + jy = jy + incy + end do + end if + end if + return + end subroutine stdlib_zsymv + + !> ZSYR: performs the symmetric rank 1 operation + !> A := alpha*x*x**H + A, + !> where alpha is a complex scalar, x is an n element vector and A is an + !> n by n symmetric matrix. + + pure subroutine stdlib_zsyr( uplo, n, alpha, x, incx, a, lda ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: incx, lda, n + complex(dp), intent(in) :: alpha + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: x(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, info, ix, j, jx, kx + complex(dp) :: temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = 1 + else if( n<0 ) then + info = 2 + else if( incx==0 ) then + info = 5 + else if( lda ZSYSWAPR: applies an elementary permutation on the rows and the columns of + !> a symmetric matrix. + + pure subroutine stdlib_zsyswapr( uplo, n, a, lda, i1, i2) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: i1, i2, lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,n) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(dp) :: tmp + ! Executable Statements + upper = stdlib_lsame( uplo, 'U' ) + if (upper) then + ! upper + ! first swap + ! - swap column i1 and i2 from i1 to i1-1 + call stdlib_zswap( i1-1, a(1,i1), 1, a(1,i2), 1 ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1,i1+i) + a(i1,i1+i)=a(i1+i,i2) + a(i1+i,i2)=tmp + end do + ! third swap + ! - swap row i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i1,i) + a(i1,i)=a(i2,i) + a(i2,i)=tmp + end do + else + ! lower + ! first swap + ! - swap row i1 and i2 from i1 to i1-1 + call stdlib_zswap( i1-1, a(i1,1), lda, a(i2,1), lda ) + ! second swap : + ! - swap a(i1,i1) and a(i2,i2) + ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1 + tmp=a(i1,i1) + a(i1,i1)=a(i2,i2) + a(i2,i2)=tmp + do i=1,i2-i1-1 + tmp=a(i1+i,i1) + a(i1+i,i1)=a(i2,i1+i) + a(i2,i1+i)=tmp + end do + ! third swap + ! - swap col i1 and i2 from i2+1 to n + do i=i2+1,n + tmp=a(i,i1) + a(i,i1)=a(i,i2) + a(i,i2)=tmp + end do + endif + end subroutine stdlib_zsyswapr + + !> ZSYTF2: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zsytf2( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep + real(dp) :: absakk, alpha, colmax, rowmax + complex(dp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_izamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( max( absakk, colmax )==zero .or. stdlib_disnan(absakk) ) then + ! column k is zero or underflow, or contains a nan: + ! set info and continue + if( info==0 )info = k + kp = k + else + if( absakk>=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = imax + stdlib_izamax( k-imax, a( imax, imax+1 ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax>1 ) then + jmax = stdlib_izamax( imax-1, a( 1, imax ), 1 ) + rowmax = max( rowmax, cabs1( a( jmax, imax ) ) ) + end if + if( absakk>=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k-1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + call stdlib_zswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + call stdlib_zswap( kk-kp-1, a( kp+1, kk ), 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t + r1 = cone / a( k, k ) + call stdlib_zsyr( uplo, k-1, -r1, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_zscal( k-1, r1, a( 1, k ), 1 ) + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + d12 = t / d12 + do j = k - 2, 1, -1 + wkm1 = d12*( d11*a( j, k-1 )-a( j, k ) ) + wk = d12*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - a( i, k )*wk -a( i, k-1 )*wkm1 + end do + a( j, k ) = wk + a( j, k-1 ) = wkm1 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -kp + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else + ! jmax is the column-index of the largest off-diagonal + ! element in row imax, and rowmax is its absolute value + jmax = k - 1 + stdlib_izamax( imax-k, a( imax, k ), lda ) + rowmax = cabs1( a( imax, jmax ) ) + if( imax=alpha*colmax*( colmax / rowmax ) ) then + ! no interchange, use 1-by-1 pivot block + kp = k + else if( cabs1( a( imax, imax ) )>=alpha*rowmax ) then + ! interchange rows and columns k and imax, use 1-by-1 + ! pivot block + kp = imax + else + ! interchange rows and columns k+1 and imax, use 2-by-2 + ! pivot block + kp = imax + kstep = 2 + end if + end if + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp ZSYTF2_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_zsytf2_rk( uplo, n, a, lda, e, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(dp) :: absakk, alpha, colmax, rowmax, dtemp, sfmin + complex(dp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,max,sqrt,aimag,real + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_izamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + ! set e( k ) to zero + if( k>1 )e( k ) = czero + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_izamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1( a( imax, imax ) )1 )call stdlib_zswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_zswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 )call stdlib_zswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_zswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert upper triangle of a into u form by applying + ! the interchanges in columns k+1:n. + if( k1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( cabs1( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = cone / a( k, k ) + call stdlib_zsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_zscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + ! store the superdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + ! copy superdiagonal elements of d(k) to e(k) and + ! zero out superdiagonal entry of a + e( k ) = a( k-1, k ) + e( k-1 ) = czero + a( k-1, k ) = czero + end if + ! end column k is nonsingular + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + 34 continue + else + ! factorize a as l*d*l**t using the lower triangle of a + ! initialize the unused last entry of the subdiagonal array e. + e( n ) = czero + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 64 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! abs( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1( a( imax, imax ) )(k+1) )call stdlib_zswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_zswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda ) + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_zswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + ! convert lower triangle of a into l form by applying + ! the interchanges in columns 1:k-1. + if ( k>1 )call stdlib_zswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda ) + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = cone / a( k, k ) + call stdlib_zsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_zscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + ! store the subdiagonal element of d in array e + e( k ) = czero + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZSYTF2_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, U**T is the transpose of U, and D is symmetric and + !> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the unblocked version of the algorithm, calling Level 2 BLAS. + + pure subroutine stdlib_zsytf2_rook( uplo, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: sevten = 17.0e+0_dp + + + + ! Local Scalars + logical(lk) :: upper, done + integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii + real(dp) :: absakk, alpha, colmax, rowmax, dtemp, sfmin + complex(dp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z + ! Intrinsic Functions + intrinsic :: abs,max,sqrt,aimag,real + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + imax = stdlib_izamax( k-1, a( 1, k ), 1 ) + colmax = cabs1( a( imax, k ) ) + else + colmax = zero + end if + if( (max( absakk, colmax )==zero) ) then + ! column k is zero or underflow: set info and continue + if( info==0 )info = k + kp = k + else + ! test for interchange + ! equivalent to testing for (used to handle nan and inf) + ! absakk>=alpha*colmax + if( .not.( absakk1 ) then + itemp = stdlib_izamax( imax-1, a( 1, imax ), 1 ) + dtemp = cabs1( a( itemp, imax ) ) + if( dtemp>rowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! cabs1( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1(a( imax, imax ))1 )call stdlib_zswap( p-1, a( 1, k ), 1, a( 1, p ), 1 ) + if( p<(k-1) )call stdlib_zswap( k-p-1, a( p+1, k ), 1, a( p, p+1 ),lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k - kstep + 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the leading + ! submatrix a(1:k,1:k) + if( kp>1 )call stdlib_zswap( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 ) + if( ( kk>1 ) .and. ( kp<(kk-1) ) )call stdlib_zswap( kk-kp-1, a( kp+1, kk ), & + 1, a( kp, kp+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k-1, k ) + a( k-1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the leading submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = u(k)*d(k) + ! where u(k) is the k-th column of u + if( k>1 ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) and + ! store u(k) in column k + if( cabs1( a( k, k ) )>=sfmin ) then + ! perform a rank-1 update of a(1:k-1,1:k-1) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*1/d(k)*w(k)**t + d11 = cone / a( k, k ) + call stdlib_zsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + ! store u(k) in column k + call stdlib_zscal( k-1, d11, a( 1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = 1, k - 1 + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - u(k)*d(k)*u(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zsyr( uplo, k-1, -d11, a( 1, k ), 1, a, lda ) + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k-1 now hold + ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k) + ! where u(k) and u(k-1) are the k-th and (k-1)-th columns + ! of u + ! perform a rank-2 update of a(1:k-2,1:k-2) as + ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t + ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k>2 ) then + d12 = a( k-1, k ) + d22 = a( k-1, k-1 ) / d12 + d11 = a( k, k ) / d12 + t = cone / ( d11*d22-cone ) + do j = k - 2, 1, -1 + wkm1 = t*( d11*a( j, k-1 )-a( j, k ) ) + wk = t*( d22*a( j, k )-a( j, k-1 ) ) + do i = j, 1, -1 + a( i, j ) = a( i, j ) - (a( i, k ) / d12 )*wk -( a( i, k-1 ) / d12 )& + *wkm1 + end do + ! store u(k) and u(k-1) in cols k and k-1 for row j + a( j, k ) = wk / d12 + a( j, k-1 ) = wkm1 / d12 + end do + end if + end if + end if + ! store details of the interchanges in ipiv + if( kstep==1 ) then + ipiv( k ) = kp + else + ipiv( k ) = -p + ipiv( k-1 ) = -kp + end if + ! decrease k and return to the start of the main loop + k = k - kstep + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2 + k = 1 + 40 continue + ! if k > n, exit from loop + if( k>n )go to 70 + kstep = 1 + p = k + ! determine rows and columns to be interchanged and whether + ! a 1-by-1 or 2-by-2 pivot block will be used + absakk = cabs1( a( k, k ) ) + ! imax is the row-index of the largest off-diagonal element in + ! column k, and colmax is its absolute value. + ! determine both colmax and imax. + if( k=alpha*colmax + if( .not.( absakkrowmax ) then + rowmax = dtemp + jmax = itemp + end if + end if + ! equivalent to testing for (used to handle nan and inf) + ! cabs1( a( imax, imax ) )>=alpha*rowmax + if( .not.( cabs1(a( imax, imax ))(k+1) )call stdlib_zswap( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda ) + + t = a( k, k ) + a( k, k ) = a( p, p ) + a( p, p ) = t + end if + ! second swap + kk = k + kstep - 1 + if( kp/=kk ) then + ! interchange rows and columns kk and kp in the trailing + ! submatrix a(k:n,k:n) + if( kp(kk+1) ) )call stdlib_zswap( kp-kk-1, a( kk+1, kk ), & + 1, a( kp, kk+1 ),lda ) + t = a( kk, kk ) + a( kk, kk ) = a( kp, kp ) + a( kp, kp ) = t + if( kstep==2 ) then + t = a( k+1, k ) + a( k+1, k ) = a( kp, k ) + a( kp, k ) = t + end if + end if + ! update the trailing submatrix + if( kstep==1 ) then + ! 1-by-1 pivot block d(k): column k now holds + ! w(k) = l(k)*d(k) + ! where l(k) is the k-th column of l + if( k=sfmin ) then + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + d11 = cone / a( k, k ) + call stdlib_zsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + ! store l(k) in column k + call stdlib_zscal( n-k, d11, a( k+1, k ), 1 ) + else + ! store l(k) in column k + d11 = a( k, k ) + do ii = k + 1, n + a( ii, k ) = a( ii, k ) / d11 + end do + ! perform a rank-1 update of a(k+1:n,k+1:n) as + ! a := a - l(k)*d(k)*l(k)**t + ! = a - w(k)*(1/d(k))*w(k)**t + ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t + call stdlib_zsyr( uplo, n-k, -d11, a( k+1, k ), 1,a( k+1, k+1 ), lda ) + + end if + end if + else + ! 2-by-2 pivot block d(k): columns k and k+1 now hold + ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k) + ! where l(k) and l(k+1) are the k-th and (k+1)-th columns + ! of l + ! perform a rank-2 update of a(k+2:n,k+2:n) as + ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t + ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t + ! and store l(k) and l(k+1) in columns k and k+1 + if( k ZSYTRF: computes the factorization of a complex symmetric matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zsytrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_zlasyf( uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_zsytf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_zlasyf; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_zlasyf( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, n, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_zsytf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zsytrf + + !> ZSYTRF_RK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_zsytrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_zlasyf_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_zsytf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_zlasyf_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_zsytf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_zswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zsytrf_rk + + !> ZSYTRF_ROOK: computes the factorization of a complex symmetric matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zsytrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_zlasyf_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_zsytf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_zlasyf_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_zlasyf_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_zsytf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zsytrf_rook + + !> ZSYTRI: computes the inverse of a complex symmetric indefinite matrix + !> A using the factorization A = U*D*U**T or A = L*D*L**T computed by + !> ZSYTRF. + + pure subroutine stdlib_zsytri( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + complex(dp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_zdotu( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = a( k, k+1 ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-cone ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_zdotu( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_zdotu( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_zcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_zsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_zdotu( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + kp = abs( ipiv( k ) ) + if( kp/=k ) then + ! interchange rows and columns k and kp in the leading + ! submatrix a(1:k+1,1:k+1) + call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_zswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + if( kstep==2 ) then + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + end if + k = k + kstep + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k ZSYTRI_ROOK: computes the inverse of a complex symmetric + !> matrix A using the factorization A = U*D*U**T or A = L*D*L**T + !> computed by ZSYTRF_ROOK. + + pure subroutine stdlib_zsytri_rook( uplo, n, a, lda, ipiv, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kp, kstep + complex(dp) :: ak, akkp1, akp1, d, t, temp + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( info, info )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do info = 1, n + if( ipiv( info )>0 .and. a( info, info )==czero )return + end do + end if + info = 0 + if( upper ) then + ! compute inv(a) from the factorization a = u*d*u**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 30 continue + ! if k > n, exit from loop. + if( k>n )go to 40 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_zdotu( k-1, work, 1, a( 1, k ),1 ) + end if + kstep = 1 + else + ! 2 x 2 diagonal block + ! invert the diagonal block. + t = a( k, k+1 ) + ak = a( k, k ) / t + akp1 = a( k+1, k+1 ) / t + akkp1 = a( k, k+1 ) / t + d = t*( ak*akp1-cone ) + a( k, k ) = akp1 / d + a( k+1, k+1 ) = ak / d + a( k, k+1 ) = -akkp1 / d + ! compute columns k and k+1 of the inverse. + if( k>1 ) then + call stdlib_zcopy( k-1, a( 1, k ), 1, work, 1 ) + call stdlib_zsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k ), 1 ) + + a( k, k ) = a( k, k ) - stdlib_zdotu( k-1, work, 1, a( 1, k ),1 ) + a( k, k+1 ) = a( k, k+1 ) -stdlib_zdotu( k-1, a( 1, k ), 1, a( 1, k+1 ), 1 ) + + call stdlib_zcopy( k-1, a( 1, k+1 ), 1, work, 1 ) + call stdlib_zsymv( uplo, k-1, -cone, a, lda, work, 1, czero,a( 1, k+1 ), 1 ) + + a( k+1, k+1 ) = a( k+1, k+1 ) -stdlib_zdotu( k-1, work, 1, a( 1, k+1 ), 1 ) + + end if + kstep = 2 + end if + if( kstep==1 ) then + ! interchange rows and columns k and ipiv(k) in the leading + ! submatrix a(1:k+1,1:k+1) + kp = ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_zswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + else + ! interchange rows and columns k and k+1 with -ipiv(k) and + ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1) + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_zswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + temp = a( k, k+1 ) + a( k, k+1 ) = a( kp, k+1 ) + a( kp, k+1 ) = temp + end if + k = k + 1 + kp = -ipiv( k ) + if( kp/=k ) then + if( kp>1 )call stdlib_zswap( kp-1, a( 1, k ), 1, a( 1, kp ), 1 ) + call stdlib_zswap( k-kp-1, a( kp+1, k ), 1, a( kp, kp+1 ), lda ) + temp = a( k, k ) + a( k, k ) = a( kp, kp ) + a( kp, kp ) = temp + end if + end if + k = k + 1 + go to 30 + 40 continue + else + ! compute inv(a) from the factorization a = l*d*l**t. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = n + 50 continue + ! if k < 1, exit from loop. + if( k<1 )go to 60 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! invert the diagonal block. + a( k, k ) = cone / a( k, k ) + ! compute column k of the inverse. + if( k ZSYTRS: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSYTRF. + + pure subroutine stdlib_zsytrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + call stdlib_zscal( nrhs, cone / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_zgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_zgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, a( 1, k ),1, cone, b( & + k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb, a( 1, k ),1, cone, b( & + k, 1 ), ldb ) + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b, ldb,a( 1, k+1 ), 1, cone, b(& + k+1, 1 ), ldb ) + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZSYTRS2: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSYTRF and converted by ZSYCONV. + + pure subroutine stdlib_zsytrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_ztrsm('L','U','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + call stdlib_zscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_ztrsm('L','U','T','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_zswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ztrsm('L','L','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + call stdlib_zscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / akm1k + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i, j ) / akm1k + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**t \ b) -> b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_ztrsm('L','L','T','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_zsyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_zsytrs2 + + !> ZSYTRS_3: solves a system of linear equations A * X = B with a complex + !> symmetric matrix A using the factorization computed + !> by ZSYTRF_RK or ZSYTRF_BK: + !> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This algorithm is using Level 3 BLAS. + + pure subroutine stdlib_zsytrs_3( uplo, n, nrhs, a, lda, e, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*), e(*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j, k, kp + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda b [ (u \p**t * b) ] + call stdlib_ztrsm( 'L', 'U', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i = n + do while ( i>=1 ) + if( ipiv( i )>0 ) then + call stdlib_zscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else if ( i>1 ) then + akm1k = e( i ) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / akm1k + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + end if + i = i - 1 + end do + ! compute (u**t \ b) -> b [ u**t \ (d \ (u \p**t * b) ) ] + call stdlib_ztrsm( 'L', 'U', 'T', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (u**t \ (d \ (u \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for upper case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + else + ! begin lower + ! solve a*x = b, where a = l*d*l**t. + ! p**t * b + ! interchange rows k and ipiv(k) of matrix b in the same order + ! that the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = 1, n, 1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ztrsm( 'L', 'L', 'N', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i = 1 + do while ( i<=n ) + if( ipiv( i )>0 ) then + call stdlib_zscal( nrhs, cone / a( i, i ), b( i, 1 ), ldb ) + else if( i b [ l**t \ (d \ (l \p**t * b) ) ] + call stdlib_ztrsm('L', 'L', 'T', 'U', n, nrhs, cone, a, lda, b, ldb ) + ! p * b [ p * (l**t \ (d \ (l \p**t * b) )) ] + ! interchange rows k and ipiv(k) of matrix b in reverse order + ! from the formation order of ipiv(i) vector for lower case. + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv(i) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + do k = n, 1, -1 + kp = abs( ipiv( k ) ) + if( kp/=k ) then + call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end if + end do + ! end lower + end if + return + end subroutine stdlib_zsytrs_3 + + !> ZSYTRS_AA: solves a system of linear equations A*X = B with a complex + !> symmetric matrix A using the factorization A = U**T*T*U or + !> A = L*T*L**T computed by ZSYTRF_AA. + + pure subroutine stdlib_zsytrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**t \ b -> b [ (u**t \p**t * b) ] + call stdlib_ztrsm( 'L', 'U', 'T', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**t \p**t * b) ] + call stdlib_zlacpy( 'F', 1, n, a( 1, 1 ), lda+1, work( n ), 1) + if( n>1 ) then + call stdlib_zlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_zlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_zgtsv( n, nrhs, work( 1 ), work( n ), work( 2*n ), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**t \p**t * b) ) ] + call stdlib_ztrsm( 'L', 'U', 'N', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b -> b [ p * (u \ (t \ (u**t \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**t. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_ztrsm( 'L', 'L', 'N', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_zlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_zlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1 ) + call stdlib_zlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1 ) + end if + call stdlib_zgtsv( n, nrhs, work( 1 ), work(n), work( 2*n ), b, ldb,info) + ! 3) backward substitution with l**t + if( n>1 ) then + ! compute (l**t \ b) -> b [ l**t \ (t \ (l \p**t * b) ) ] + call stdlib_ztrsm( 'L', 'L', 'T', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b -> b [ p * (l**t \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_zsytrs_aa + + !> ZSYTRS_ROOK: solves a system of linear equations A*X = B with + !> a complex symmetric matrix A using the factorization A = U*D*U**T or + !> A = L*D*L**T computed by ZSYTRF_ROOK. + + pure subroutine stdlib_zsytrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + call stdlib_zscal( nrhs, cone / a( k, k ), b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1 ) + if( kp/=k-1 )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + if( k>2 ) then + call stdlib_zgeru( k-2, nrhs,-cone, a( 1, k ), 1, b( k, 1 ),ldb, b( 1, 1 ), & + ldb ) + call stdlib_zgeru( k-2, nrhs,-cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 )& + , ldb ) + end if + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / akm1k + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**t *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**t(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 )call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), 1, & + cone, b( k, 1 ), ldb ) + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), 1, cone, & + b( k, 1 ), ldb ) + call stdlib_zgemv( 'TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 ), 1, cone,& + b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1). + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_zswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**t. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**t(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZTBRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular band + !> coefficient matrix. + !> The solution matrix X must be computed by ZTBTRS or some other + !> means before entering this routine. ZTBRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_ztbrfs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, x, ldx, ferr,& + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: ab(ldab,*), b(ldb,*), x(ldx,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_ztbsv( uplo, transt, diag, n, kd, ab, ldab, work,1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_ztbsv( uplo, transn, diag, n, kd, ab, ldab, work,1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_ztbrfs + + !> ZTBTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular band matrix of order N, and B is an + !> N-by-NRHS matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_ztbtrs( uplo, trans, diag, n, kd, nrhs, ab, ldab, b,ldb, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab Level 3 BLAS like routine for A in RFP Format. + !> ZTFSM: solves the matrix equation + !> op( A )*X = alpha*B or X*op( A ) = alpha*B + !> where alpha is a scalar, X and B are m by n matrices, A is a unit, or + !> non-unit, upper or lower triangular matrix and op( A ) is one of + !> op( A ) = A or op( A ) = A**H. + !> A is in Rectangular Full Packed (RFP) Format. + !> The matrix X is overwritten on B. + + pure subroutine stdlib_ztfsm( transr, side, uplo, trans, diag, m, n, alpha, a,b, ldb ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, diag, side, trans, uplo + integer(ilp), intent(in) :: ldb, m, n + complex(dp), intent(in) :: alpha + ! Array Arguments + complex(dp), intent(in) :: a(0:*) + complex(dp), intent(inout) :: b(0:ldb-1,0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans + integer(ilp) :: m1, m2, n1, n2, k, info, i, j + ! Intrinsic Functions + intrinsic :: max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lside = stdlib_lsame( side, 'L' ) + lower = stdlib_lsame( uplo, 'L' ) + notrans = stdlib_lsame( trans, 'N' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lside .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -3 + else if( .not.notrans .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -4 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -5 + else if( m<0 ) then + info = -6 + else if( n<0 ) then + info = -7 + else if( ldb ZTFTTP: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard packed format (TP). + + pure subroutine stdlib_ztfttp( transr, uplo, n, arf, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(out) :: ap(0:*) + complex(dp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: conjg + ! Intrinsic Functions + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTFTTP', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + ap( 0 ) = arf( 0 ) + else + ap( 0 ) = conjg( arf( 0 ) ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + ap( ijp ) = arf( ij ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + ap( ijp ) = conjg( arf( ij ) ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_ztfttp + + !> ZTFTTR: copies a triangular matrix A from rectangular full packed + !> format (TF) to standard full format (TR). + + pure subroutine stdlib_ztfttr( transr, uplo, n, arf, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(dp), intent(out) :: a(0:lda-1,0:*) + complex(dp), intent(in) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt, nx2, np1x2 + integer(ilp) :: i, j, l, ij + ! Intrinsic Functions + intrinsic :: conjg,max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda=n + ij = 0 + do j = 0, n2 + do i = n1, n2 + j + a( n2+j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = j, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0); lda=n + ij = nt - n + do j = n - 1, n1, -1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = j - n1, n1 - 1 + a( j-n1, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + ij = ij - nx2 + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ij = 0 + do j = 0, n2 - 1 + do i = 0, j + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = n1 + j, n - 1 + a( i, n1+j ) = arf( ij ) + ij = ij + 1 + end do + end do + do j = n2, n - 1 + do i = 0, n1 - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ij = 0 + do j = 0, n1 + do i = n1, n - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + do j = 0, n1 - 1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = n2 + j, n - 1 + a( n2+j, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1); lda=n+1 + ij = 0 + do j = 0, k - 1 + do i = k, k + j + a( k+j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = j, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0); lda=n+1 + ij = nt - n - 1 + do j = n - 1, k, -1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = j - k, k - 1 + a( j-k, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + ij = ij - np1x2 + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper, a=b) + ! t1 -> a(0,1) , t2 -> a(0,0) , s -> a(0,k+1) : + ! t1 -> a(0+k) , t2 -> a(0+0) , s -> a(0+k*(k+1)); lda=k + ij = 0 + j = k + do i = k, n - 1 + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do j = 0, k - 2 + do i = 0, j + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + do i = k + 1 + j, n - 1 + a( i, k+1+j ) = arf( ij ) + ij = ij + 1 + end do + end do + do j = k - 1, n - 1 + do i = 0, k - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is even (see paper, a=b) + ! t1 -> a(0,k+1) , t2 -> a(0,k) , s -> a(0,0) + ! t1 -> a(0+k*(k+1)) , t2 -> a(0+k*k) , s -> a(0+0)); lda=k + ij = 0 + do j = 0, k + do i = k, n - 1 + a( j, i ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + do j = 0, k - 2 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + do l = k + 1 + j, n - 1 + a( k+1+j, l ) = conjg( arf( ij ) ) + ij = ij + 1 + end do + end do + ! note that here j = k-1 + do i = 0, j + a( i, j ) = arf( ij ) + ij = ij + 1 + end do + end if + end if + end if + return + end subroutine stdlib_ztfttr + + !> ZTGEVC: computes some or all of the right and/or left eigenvectors of + !> a pair of complex matrices (S,P), where S and P are upper triangular. + !> Matrix pairs of this type are produced by the generalized Schur + !> factorization of a complex matrix pair (A,B): + !> A = Q*S*Z**H, B = Q*P*Z**H + !> as computed by ZGGHRD + ZHGEQZ. + !> The right eigenvector x and the left eigenvector y of (S,P) + !> corresponding to an eigenvalue w are defined by: + !> S*x = w*P*x, (y**H)*S = w*(y**H)*P, + !> where y**H denotes the conjugate tranpose of y. + !> The eigenvalues are not input to this routine, but are computed + !> directly from the diagonal elements of S and P. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of (S,P), or the products Z*X and/or Q*Y, + !> where Z and Q are input matrices. + !> If Q and Z are the unitary factors from the generalized Schur + !> factorization of a matrix pair (A,B), then Z*X and Q*Y + !> are the matrices of right and left eigenvectors of (A,B). + + pure subroutine stdlib_ztgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, & + mm, m, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldp, lds, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: p(ldp,*), s(lds,*) + complex(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: compl, compr, ilall, ilback, ilbbad, ilcomp, lsa, lsb + integer(ilp) :: i, ibeg, ieig, iend, ihwmny, im, iside, isrc, j, je, jr + real(dp) :: acoefa, acoeff, anorm, ascale, bcoefa, big, bignum, bnorm, bscale, dmin, & + safmin, sbeta, scale, small, temp, ulp, xmax + complex(dp) :: bcoeff, ca, cb, d, salpha, sum, suma, sumb, x + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Statement Functions + real(dp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=dp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode and test the input parameters + if( stdlib_lsame( howmny, 'A' ) ) then + ihwmny = 1 + ilall = .true. + ilback = .false. + else if( stdlib_lsame( howmny, 'S' ) ) then + ihwmny = 2 + ilall = .false. + ilback = .false. + else if( stdlib_lsame( howmny, 'B' ) ) then + ihwmny = 3 + ilall = .true. + ilback = .true. + else + ihwmny = -1 + end if + if( stdlib_lsame( side, 'R' ) ) then + iside = 1 + compl = .false. + compr = .true. + else if( stdlib_lsame( side, 'L' ) ) then + iside = 2 + compl = .true. + compr = .false. + else if( stdlib_lsame( side, 'B' ) ) then + iside = 3 + compl = .true. + compr = .true. + else + iside = -1 + end if + info = 0 + if( iside<0 ) then + info = -1 + else if( ihwmny<0 ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lds=safmin .and. abs( acoeff )=safmin .and. abs1( bcoeff )bignum*temp ) then + do jr = je, j - 1 + work( jr ) = temp*work( jr ) + end do + xmax = one + end if + suma = czero + sumb = czero + do jr = je, j - 1 + suma = suma + conjg( s( jr, j ) )*work( jr ) + sumb = sumb + conjg( p( jr, j ) )*work( jr ) + end do + sum = acoeff*suma - conjg( bcoeff )*sumb + ! form x(j) = - sum / conjg( a*s(j,j) - b*p(j,j) ) + ! with scaling and perturbation of the denominator + d = conjg( acoeff*s( j, j )-bcoeff*p( j, j ) ) + if( abs1( d )<=dmin )d = cmplx( dmin,KIND=dp) + if( abs1( d )=bignum*abs1( d ) ) then + temp = one / abs1( sum ) + do jr = je, j - 1 + work( jr ) = temp*work( jr ) + end do + xmax = temp*xmax + sum = temp*sum + end if + end if + work( j ) = stdlib_zladiv( -sum, d ) + xmax = max( xmax, abs1( work( j ) ) ) + end do loop_100 + ! back transform eigenvector if howmny='b'. + if( ilback ) then + call stdlib_zgemv( 'N', n, n+1-je, cone, vl( 1, je ), ldvl,work( je ), 1, & + czero, work( n+1 ), 1 ) + isrc = 2 + ibeg = 1 + else + isrc = 1 + ibeg = je + end if + ! copy and scale eigenvector into column of vl + xmax = zero + do jr = ibeg, n + xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) ) + end do + if( xmax>safmin ) then + temp = one / xmax + do jr = ibeg, n + vl( jr, ieig ) = temp*work( ( isrc-1 )*n+jr ) + end do + else + ibeg = n + 1 + end if + do jr = 1, ibeg - 1 + vl( jr, ieig ) = czero + end do + end if + end do loop_140 + end if + ! right eigenvectors + if( compr ) then + ieig = im + 1 + ! main loop over eigenvalues + loop_250: do je = n, 1, -1 + if( ilall ) then + ilcomp = .true. + else + ilcomp = select( je ) + end if + if( ilcomp ) then + ieig = ieig - 1 + if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=dp) )& + <=safmin ) then + ! singular matrix pencil -- return unit eigenvector + do jr = 1, n + vr( jr, ieig ) = czero + end do + vr( ieig, ieig ) = cone + cycle loop_250 + end if + ! non-singular eigenvalue: + ! compute coefficients a and b in + ! ( a a - b b ) x = 0 + temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=dp) )& + *bscale, safmin ) + salpha = ( temp*s( je, je ) )*ascale + sbeta = ( temp*real( p( je, je ),KIND=dp) )*bscale + acoeff = sbeta*ascale + bcoeff = salpha*bscale + ! scale to avoid underflow + lsa = abs( sbeta )>=safmin .and. abs( acoeff )=safmin .and. abs1( bcoeff )=bignum*abs1( d ) ) then + temp = one / abs1( work( j ) ) + do jr = 1, je + work( jr ) = temp*work( jr ) + end do + end if + end if + work( j ) = stdlib_zladiv( -work( j ), d ) + if( j>1 ) then + ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling + if( abs1( work( j ) )>one ) then + temp = one / abs1( work( j ) ) + if( acoefa*rwork( j )+bcoefa*rwork( n+j )>=bignum*temp ) then + do jr = 1, je + work( jr ) = temp*work( jr ) + end do + end if + end if + ca = acoeff*work( j ) + cb = bcoeff*work( j ) + do jr = 1, j - 1 + work( jr ) = work( jr ) + ca*s( jr, j ) -cb*p( jr, j ) + end do + end if + end do loop_210 + ! back transform eigenvector if howmny='b'. + if( ilback ) then + call stdlib_zgemv( 'N', n, je, cone, vr, ldvr, work, 1,czero, work( n+1 ), & + 1 ) + isrc = 2 + iend = n + else + isrc = 1 + iend = je + end if + ! copy and scale eigenvector into column of vr + xmax = zero + do jr = 1, iend + xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) ) + end do + if( xmax>safmin ) then + temp = one / xmax + do jr = 1, iend + vr( jr, ieig ) = temp*work( ( isrc-1 )*n+jr ) + end do + else + iend = 0 + end if + do jr = iend + 1, n + vr( jr, ieig ) = czero + end do + end if + end do loop_250 + end if + return + end subroutine stdlib_ztgevc + + !> ZTGEX2: swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) + !> in an upper triangular matrix pair (A, B) by an unitary equivalence + !> transformation. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + + pure subroutine stdlib_ztgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, info ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: j1, lda, ldb, ldq, ldz, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: twenty = 2.0e+1_dp + integer(ilp), parameter :: ldst = 2 + logical(lk), parameter :: wands = .true. + + + + + ! Local Scalars + logical(lk) :: strong, weak + integer(ilp) :: i, m + real(dp) :: cq, cz, eps, sa, sb, scale, smlnum, sum, thresha, threshb + complex(dp) :: cdum, f, g, sq, sz + ! Local Arrays + complex(dp) :: s(ldst,ldst), t(ldst,ldst), work(8) + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,sqrt + ! Executable Statements + info = 0 + ! quick return if possible + if( n<=1 )return + m = ldst + weak = .false. + strong = .false. + ! make a local copy of selected block in (a, b) + call stdlib_zlacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst ) + call stdlib_zlacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst ) + ! compute the threshold for testing the acceptance of swapping. + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + scale = real( czero,KIND=dp) + sum = real( cone,KIND=dp) + call stdlib_zlacpy( 'FULL', m, m, s, ldst, work, m ) + call stdlib_zlacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m ) + call stdlib_zlassq( m*m, work, 1, scale, sum ) + sa = scale*sqrt( sum ) + scale = real( czero,KIND=dp) + sum = real( cone,KIND=dp) + call stdlib_zlassq( m*m, work(m*m+1), 1, scale, sum ) + sb = scale*sqrt( sum ) + ! thres has been changed from + ! thresh = max( ten*eps*sa, smlnum ) + ! to + ! thresh = max( twenty*eps*sa, smlnum ) + ! on 04/01/10. + ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by + ! jim demmel and guillaume revy. see forum post 1783. + thresha = max( twenty*eps*sa, smlnum ) + threshb = max( twenty*eps*sb, smlnum ) + ! compute unitary ql and rq that swap 1-by-1 and 1-by-1 blocks + ! using givens rotations and perform the swap tentatively. + f = s( 2, 2 )*t( 1, 1 ) - t( 2, 2 )*s( 1, 1 ) + g = s( 2, 2 )*t( 1, 2 ) - t( 2, 2 )*s( 1, 2 ) + sa = abs( s( 2, 2 ) ) * abs( t( 1, 1 ) ) + sb = abs( s( 1, 1 ) ) * abs( t( 2, 2 ) ) + call stdlib_zlartg( g, f, cz, sz, cdum ) + sz = -sz + call stdlib_zrot( 2, s( 1, 1 ), 1, s( 1, 2 ), 1, cz, conjg( sz ) ) + call stdlib_zrot( 2, t( 1, 1 ), 1, t( 1, 2 ), 1, cz, conjg( sz ) ) + if( sa>=sb ) then + call stdlib_zlartg( s( 1, 1 ), s( 2, 1 ), cq, sq, cdum ) + else + call stdlib_zlartg( t( 1, 1 ), t( 2, 1 ), cq, sq, cdum ) + end if + call stdlib_zrot( 2, s( 1, 1 ), ldst, s( 2, 1 ), ldst, cq, sq ) + call stdlib_zrot( 2, t( 1, 1 ), ldst, t( 2, 1 ), ldst, cq, sq ) + ! weak stability test: |s21| <= o(eps f-norm((a))) + ! and |t21| <= o(eps f-norm((b))) + weak = abs( s( 2, 1 ) )<=thresha .and.abs( t( 2, 1 ) )<=threshb + if( .not.weak )go to 20 + if( wands ) then + ! strong stability test: + ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a))) + ! and + ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b))) + call stdlib_zlacpy( 'FULL', m, m, s, ldst, work, m ) + call stdlib_zlacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m ) + call stdlib_zrot( 2, work, 1, work( 3 ), 1, cz, -conjg( sz ) ) + call stdlib_zrot( 2, work( 5 ), 1, work( 7 ), 1, cz, -conjg( sz ) ) + call stdlib_zrot( 2, work, 2, work( 2 ), 2, cq, -sq ) + call stdlib_zrot( 2, work( 5 ), 2, work( 6 ), 2, cq, -sq ) + do i = 1, 2 + work( i ) = work( i ) - a( j1+i-1, j1 ) + work( i+2 ) = work( i+2 ) - a( j1+i-1, j1+1 ) + work( i+4 ) = work( i+4 ) - b( j1+i-1, j1 ) + work( i+6 ) = work( i+6 ) - b( j1+i-1, j1+1 ) + end do + scale = real( czero,KIND=dp) + sum = real( cone,KIND=dp) + call stdlib_zlassq( m*m, work, 1, scale, sum ) + sa = scale*sqrt( sum ) + scale = real( czero,KIND=dp) + sum = real( cone,KIND=dp) + call stdlib_zlassq( m*m, work(m*m+1), 1, scale, sum ) + sb = scale*sqrt( sum ) + strong = sa<=thresha .and. sb<=threshb + if( .not.strong )go to 20 + end if + ! if the swap is accepted ("weakly" and "strongly"), apply the + ! equivalence transformations to the original matrix pair (a,b) + call stdlib_zrot( j1+1, a( 1, j1 ), 1, a( 1, j1+1 ), 1, cz,conjg( sz ) ) + call stdlib_zrot( j1+1, b( 1, j1 ), 1, b( 1, j1+1 ), 1, cz,conjg( sz ) ) + call stdlib_zrot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda, cq, sq ) + call stdlib_zrot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb, cq, sq ) + ! set n1 by n2 (2,1) blocks to 0 + a( j1+1, j1 ) = czero + b( j1+1, j1 ) = czero + ! accumulate transformations into q and z if requested. + if( wantz )call stdlib_zrot( n, z( 1, j1 ), 1, z( 1, j1+1 ), 1, cz,conjg( sz ) ) + + if( wantq )call stdlib_zrot( n, q( 1, j1 ), 1, q( 1, j1+1 ), 1, cq,conjg( sq ) ) + + ! exit with info = 0 if swap was successfully performed. + return + ! exit with info = 1 if swap was rejected. + 20 continue + info = 1 + return + end subroutine stdlib_ztgex2 + + !> ZTGEXC: reorders the generalized Schur decomposition of a complex + !> matrix pair (A,B), using an unitary equivalence transformation + !> (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with + !> row index IFST is moved to row ILST. + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + !> Optionally, the matrices Q and Z of generalized Schur vectors are + !> updated. + !> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H + !> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H + + pure subroutine stdlib_ztgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ifst, lda, ldb, ldq, ldz, n + integer(ilp), intent(inout) :: ilst + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: here + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! decode and test input arguments. + info = 0 + if( n<0 ) then + info = -3 + else if( ldan ) then + info = -12 + else if( ilst<1 .or. ilst>n ) then + info = -13 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTGEXC', -info ) + return + end if + ! quick return if possible + if( n<=1 )return + if( ifst==ilst )return + if( ifst=ilst )go to 20 + here = here + 1 + end if + ilst = here + return + end subroutine stdlib_ztgexc + + !> ZTPLQT2: computes a LQ a factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_ztplqt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda ZTPQRT2: computes a QR factorization of a complex "triangular-pentagonal" + !> matrix C, which is composed of a triangular block A and pentagonal block B, + !> using the compact WY representation for Q. + + pure subroutine stdlib_ztpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, p, mp, np + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. l>min(m,n) ) then + info = -3 + else if( lda ZTPRFB: applies a complex "triangular-pentagonal" block reflector H or its + !> conjugate transpose H**H to a complex matrix C, which is composed of two + !> blocks A and B, either from the left or right. + + pure subroutine stdlib_ztprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, & + lda, b, ldb, work, ldwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: direct, side, storev, trans + integer(ilp), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(in) :: t(ldt,*), v(ldv,*) + complex(dp), intent(out) :: work(ldwork,*) + ! ========================================================================== + + ! Local Scalars + integer(ilp) :: i, j, mp, np, kp + logical(lk) :: left, forward, column, right, backward, row + ! Intrinsic Functions + intrinsic :: conjg + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return + if( stdlib_lsame( storev, 'C' ) ) then + column = .true. + row = .false. + else if ( stdlib_lsame( storev, 'R' ) ) then + column = .false. + row = .true. + else + column = .false. + row = .false. + end if + if( stdlib_lsame( side, 'L' ) ) then + left = .true. + right = .false. + else if( stdlib_lsame( side, 'R' ) ) then + left = .false. + right = .true. + else + left = .false. + right = .false. + end if + if( stdlib_lsame( direct, 'F' ) ) then + forward = .true. + backward = .false. + else if( stdlib_lsame( direct, 'B' ) ) then + forward = .false. + backward = .true. + else + forward = .false. + backward = .false. + end if + ! --------------------------------------------------------------------------- + if( column .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (m-by-k) + ! form h c or h**h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - t (a + v**h b) or a = a - t**h (a + v**h b) + ! b = b - v t (a + v**h b) or b = b - v t**h (a + v**h b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'U', 'C', 'N', l, n, cone, v( mp, 1 ), ldv,work, ldwork ) + + call stdlib_zgemm( 'C', 'N', l, n, m-l, cone, v, ldv, b, ldb,cone, work, ldwork ) + + call stdlib_zgemm( 'C', 'N', k-l, n, m, cone, v( 1, kp ), ldv,b, ldb, czero, work( & + kp, 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'N', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb ) + + call stdlib_zgemm( 'N', 'N', l, n, k-l, -cone, v( mp, kp ), ldv,work( kp, 1 ), & + ldwork, cone, b( mp, 1 ), ldb ) + call stdlib_ztrmm( 'L', 'U', 'N', 'N', l, n, cone, v( mp, 1 ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i ] (k-by-k) + ! [ v ] (n-by-k) + ! form c h or c h**h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - (a + b v) t or a = a - (a + b v) t**h + ! b = b - (a + b v) t v**h or b = b - (a + b v) t**h v**h + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_ztrmm( 'R', 'U', 'N', 'N', m, l, cone, v( np, 1 ), ldv,work, ldwork ) + + call stdlib_zgemm( 'N', 'N', m, l, n-l, cone, b, ldb,v, ldv, cone, work, ldwork ) + + call stdlib_zgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v( 1, kp ), ldv, czero, work( & + 1, kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_zgemm( 'N', 'C', m, l, k-l, -cone, work( 1, kp ), ldwork,v( np, kp ), & + ldv, cone, b( 1, np ), ldb ) + call stdlib_ztrmm( 'R', 'U', 'C', 'N', m, l, cone, v( np, 1 ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (m-by-k) + ! [ i ] (k-by-k) + ! form h c or h**h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - t (a + v**h b) or a = a - t**h (a + v**h b) + ! b = b - v t (a + v**h b) or b = b - v t**h (a + v**h b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_zgemm( 'C', 'N', l, n, m-l, cone, v( mp, kp ), ldv,b( mp, 1 ), ldb, & + cone, work( kp, 1 ), ldwork ) + call stdlib_zgemm( 'C', 'N', k-l, n, m, cone, v, ldv,b, ldb, czero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'L', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'N', 'N', m-l, n, k, -cone, v( mp, 1 ), ldv,work, ldwork, cone, & + b( mp, 1 ), ldb ) + call stdlib_zgemm( 'N', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb ) + + call stdlib_ztrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1, kp ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( column .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v ] (n-by-k) + ! [ i ] (k-by-k) + ! form c h or c h**h where c = [ b a ] (b is m-by-n, a is m-by-k) + ! h = i - w t w**h or h**h = i - w t**h w**h + ! a = a - (a + b v) t or a = a - (a + b v) t**h + ! b = b - (a + b v) t v**h or b = b - (a + b v) t**h v**h + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_ztrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_zgemm( 'N', 'N', m, l, n-l, cone, b( 1, np ), ldb,v( np, kp ), ldv, & + cone, work( 1, kp ), ldwork ) + call stdlib_zgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v, ldv, czero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v( np, 1 ), ldv, cone, & + b( 1, np ), ldb ) + call stdlib_zgemm( 'N', 'C', m, l, k-l, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_ztrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1, kp ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**h c where c = [ a ] (k-by-n) + ! [ b ] (m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - t (a + v b) or a = a - t**h (a + v b) + ! b = b - v**h t (a + v b) or b = b - v**h t**h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( m-l+1, m ) + kp = min( l+1, k ) + do j = 1, n + do i = 1, l + work( i, j ) = b( m-l+i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1, mp ), ldv,work, ldb ) + + call stdlib_zgemm( 'N', 'N', l, n, m-l, cone, v, ldv,b, ldb,cone, work, ldwork ) + + call stdlib_zgemm( 'N', 'N', k-l, n, m, cone, v( kp, 1 ), ldv,b, ldb, czero, work( & + kp, 1 ), ldwork ) + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'C', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb ) + + call stdlib_zgemm( 'C', 'N', l, n, k-l, -cone, v( kp, mp ), ldv,work( kp, 1 ), & + ldwork, cone, b( mp, 1 ), ldb ) + call stdlib_ztrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1, mp ), ldv,work, ldwork ) + + do j = 1, n + do i = 1, l + b( m-l+i, j ) = b( m-l+i, j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. forward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ i v ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**h where c = [ a b ] (a is m-by-k, b is m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - (a + b v**h) t or a = a - (a + b v**h) t**h + ! b = b - (a + b v**h) t v or b = b - (a + b v**h) t**h v + ! --------------------------------------------------------------------------- + np = min( n-l+1, n ) + kp = min( l+1, k ) + do j = 1, l + do i = 1, m + work( i, j ) = b( i, n-l+j ) + end do + end do + call stdlib_ztrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1, np ), ldv,work, ldwork ) + + call stdlib_zgemm( 'N', 'C', m, l, n-l, cone, b, ldb, v, ldv,cone, work, ldwork ) + + call stdlib_zgemm( 'N', 'C', m, k-l, n, cone, b, ldb,v( kp, 1 ), ldv, czero, work( & + 1, kp ), ldwork ) + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_zgemm( 'N', 'N', m, l, k-l, -cone, work( 1, kp ), ldwork,v( kp, np ), & + ldv, cone, b( 1, np ), ldb ) + call stdlib_ztrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1, np ), ldv,work, ldwork ) + + do j = 1, l + do i = 1, m + b( i, n-l+j ) = b( i, n-l+j ) - work( i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. left ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-m ) + ! form h c or h**h c where c = [ b ] (m-by-n) + ! [ a ] (k-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - t (a + v b) or a = a - t**h (a + v b) + ! b = b - v**h t (a + v b) or b = b - v**h t**h (a + v b) + ! --------------------------------------------------------------------------- + mp = min( l+1, m ) + kp = min( k-l+1, k ) + do j = 1, n + do i = 1, l + work( k-l+i, j ) = b( i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'U', 'N', 'N', l, n, cone, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + call stdlib_zgemm( 'N', 'N', l, n, m-l, cone, v( kp, mp ), ldv,b( mp, 1 ), ldb, & + cone, work( kp, 1 ), ldwork ) + call stdlib_zgemm( 'N', 'N', k-l, n, m, cone, v, ldv, b, ldb,czero, work, ldwork ) + + do j = 1, n + do i = 1, k + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'L', 'L ', trans, 'N', k, n, cone, t, ldt,work, ldwork ) + do j = 1, n + do i = 1, k + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'C', 'N', m-l, n, k, -cone, v( 1, mp ), ldv,work, ldwork, cone, & + b( mp, 1 ), ldb ) + call stdlib_zgemm( 'C', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb ) + + call stdlib_ztrmm( 'L', 'U', 'C', 'N', l, n, cone, v( kp, 1 ), ldv,work( kp, 1 ), & + ldwork ) + do j = 1, n + do i = 1, l + b( i, j ) = b( i, j ) - work( k-l+i, j ) + end do + end do + ! --------------------------------------------------------------------------- + else if( row .and. backward .and. right ) then + ! --------------------------------------------------------------------------- + ! let w = [ v i ] ( i is k-by-k, v is k-by-n ) + ! form c h or c h**h where c = [ b a ] (a is m-by-k, b is m-by-n) + ! h = i - w**h t w or h**h = i - w**h t**h w + ! a = a - (a + b v**h) t or a = a - (a + b v**h) t**h + ! b = b - (a + b v**h) t v or b = b - (a + b v**h) t**h v + ! --------------------------------------------------------------------------- + np = min( l+1, n ) + kp = min( k-l+1, k ) + do j = 1, l + do i = 1, m + work( i, k-l+j ) = b( i, j ) + end do + end do + call stdlib_ztrmm( 'R', 'U', 'C', 'N', m, l, cone, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + call stdlib_zgemm( 'N', 'C', m, l, n-l, cone, b( 1, np ), ldb,v( kp, np ), ldv, & + cone, work( 1, kp ), ldwork ) + call stdlib_zgemm( 'N', 'C', m, k-l, n, cone, b, ldb, v, ldv,czero, work, ldwork ) + + do j = 1, k + do i = 1, m + work( i, j ) = work( i, j ) + a( i, j ) + end do + end do + call stdlib_ztrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork ) + do j = 1, k + do i = 1, m + a( i, j ) = a( i, j ) - work( i, j ) + end do + end do + call stdlib_zgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v( 1, np ), ldv, cone, & + b( 1, np ), ldb ) + call stdlib_zgemm( 'N', 'N', m, l, k-l , -cone, work, ldwork,v, ldv, cone, b, ldb ) + + call stdlib_ztrmm( 'R', 'U', 'N', 'N', m, l, cone, v( kp, 1 ), ldv,work( 1, kp ), & + ldwork ) + do j = 1, l + do i = 1, m + b( i, j ) = b( i, j ) - work( i, k-l+j ) + end do + end do + end if + return + end subroutine stdlib_ztprfb + + !> ZTPRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular packed + !> coefficient matrix. + !> The solution matrix X must be computed by ZTPTRS or some other + !> means before entering this routine. ZTPRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_ztprfs( uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx,ferr, berr, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: ap(*), b(ldb,*), x(ldx,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, kc, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_ztpsv( uplo, transt, diag, n, ap, work, 1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_ztpsv( uplo, transn, diag, n, ap, work, 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_ztprfs + + !> ZTPTRI: computes the inverse of a complex upper or lower triangular + !> matrix A stored in packed format. + + pure subroutine stdlib_ztptri( uplo, diag, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc, jclast, jj + complex(dp) :: ajj + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTPTRI', -info ) + return + end if + ! check for singularity if non-unit. + if( nounit ) then + if( upper ) then + jj = 0 + do info = 1, n + jj = jj + info + if( ap( jj )==czero )return + end do + else + jj = 1 + do info = 1, n + if( ap( jj )==czero )return + jj = jj + n - info + 1 + end do + end if + info = 0 + end if + if( upper ) then + ! compute inverse of upper triangular matrix. + jc = 1 + do j = 1, n + if( nounit ) then + ap( jc+j-1 ) = cone / ap( jc+j-1 ) + ajj = -ap( jc+j-1 ) + else + ajj = -cone + end if + ! compute elements 1:j-1 of j-th column. + call stdlib_ztpmv( 'UPPER', 'NO TRANSPOSE', diag, j-1, ap,ap( jc ), 1 ) + call stdlib_zscal( j-1, ajj, ap( jc ), 1 ) + jc = jc + j + end do + else + ! compute inverse of lower triangular matrix. + jc = n*( n+1 ) / 2 + do j = n, 1, -1 + if( nounit ) then + ap( jc ) = cone / ap( jc ) + ajj = -ap( jc ) + else + ajj = -cone + end if + if( j ZTPTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N stored in packed format, + !> and B is an N-by-NRHS matrix. A check is made to verify that A is + !> nonsingular. + + pure subroutine stdlib_ztptrs( uplo, trans, diag, n, nrhs, ap, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jc + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldb ZTPTTF: copies a triangular matrix A from standard packed format (TP) + !> to rectangular full packed format (TF). + + pure subroutine stdlib_ztpttf( transr, uplo, n, ap, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(in) :: ap(0:*) + complex(dp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k, nt + integer(ilp) :: i, j, ij + integer(ilp) :: ijp, jp, lda, js + ! Intrinsic Functions + intrinsic :: conjg,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTPTTF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + if( n==1 ) then + if( normaltransr ) then + arf( 0 ) = ap( 0 ) + else + arf( 0 ) = conjg( ap( 0 ) ) + end if + return + end if + ! size of array arf(0:nt-1) + nt = n*( n+1 ) / 2 + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe) + ! where noe = 0 if n is even, noe = 1 if n is odd + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + lda = n + 1 + else + nisodd = .true. + lda = n + end if + ! arf^c has lda rows and n+1-noe cols + if( .not.normaltransr )lda = ( n+1 ) / 2 + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n + ijp = 0 + jp = 0 + do j = 0, n2 + do i = j, n - 1 + ij = i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, n2 - 1 + do j = 1 + i, n2 + ij = i + j*lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + ijp = 0 + do j = 0, n1 - 1 + ij = n2 + j + do i = 0, j + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = n1, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ijp = 0 + do i = 0, n2 + do ij = i*( lda+1 ), n*lda - 1, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + js = 1 + do j = 0, n2 - 1 + do ij = js, js + n2 - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + ijp = 0 + js = n2*lda + do j = 0, n1 - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, n1 + do ij = i, i + ( n1+i )*lda, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + ijp = 0 + jp = 0 + do j = 0, k - 1 + do i = j, n - 1 + ij = 1 + i + jp + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + jp = jp + lda + end do + do i = 0, k - 1 + do j = i, k - 1 + ij = i + j*lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + ijp = 0 + do j = 0, k - 1 + ij = k + 1 + j + do i = 0, j + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + ij = ij + lda + end do + end do + js = 0 + do j = k, n - 1 + ij = js + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + ijp = 0 + do i = 0, k - 1 + do ij = i + ( i+1 )*lda, ( n+1 )*lda - 1, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + js = 0 + do j = 0, k - 1 + do ij = js, js + k - j - 1 + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + 1 + end do + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + ijp = 0 + js = ( k+1 )*lda + do j = 0, k - 1 + do ij = js, js + j + arf( ij ) = ap( ijp ) + ijp = ijp + 1 + end do + js = js + lda + end do + do i = 0, k - 1 + do ij = i, i + ( k+i )*lda, lda + arf( ij ) = conjg( ap( ijp ) ) + ijp = ijp + 1 + end do + end do + end if + end if + end if + return + end subroutine stdlib_ztpttf + + !> ZTPTTR: copies a triangular matrix A from standard packed format (TP) + !> to standard full format (TR). + + pure subroutine stdlib_ztpttr( uplo, n, ap, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(dp), intent(out) :: a(lda,*) + complex(dp), intent(in) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZTREVC: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + + pure subroutine stdlib_ztrevc( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + complex(dp), parameter :: cmzero = (0.0e+0_dp,0.0e+0_dp) + complex(dp), parameter :: cmone = (1.0e+0_dp,0.0e+0_dp) + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, over, rightv, somev + integer(ilp) :: i, ii, is, j, k, ki + real(dp) :: ovfl, remax, scale, smin, smlnum, ulp, unfl + complex(dp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + if( somev ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt1 ) then + call stdlib_zlatrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', 'Y',ki-1, t, ldt, & + work( 1 ), scale, rwork,info ) + work( ki ) = scale + end if + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + call stdlib_zcopy( ki, work( 1 ), 1, vr( 1, is ), 1 ) + ii = stdlib_izamax( ki, vr( 1, is ), 1 ) + remax = one / cabs1( vr( ii, is ) ) + call stdlib_zdscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = cmzero + end do + else + if( ki>1 )call stdlib_zgemv( 'N', n, ki-1, cmone, vr, ldvr, work( 1 ),1, & + cmplx( scale,KIND=dp), vr( 1, ki ), 1 ) + ii = stdlib_izamax( n, vr( 1, ki ), 1 ) + remax = one / cabs1( vr( ii, ki ) ) + call stdlib_zdscal( n, remax, vr( 1, ki ), 1 ) + end if + ! set back the original diagonal elements of t. + do k = 1, ki - 1 + t( k, k ) = work( k+n ) + end do + is = is - 1 + end do loop_80 + end if + if( leftv ) then + ! compute left eigenvectors. + is = 1 + loop_130: do ki = 1, n + if( somev ) then + if( .not.select( ki ) )cycle loop_130 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + work( n ) = cmone + ! form right-hand side. + do k = ki + 1, n + work( k ) = -conjg( t( ki, k ) ) + end do + ! solve the triangular system: + ! (t(ki+1:n,ki+1:n) - t(ki,ki))**h * x = scale*work. + do k = ki + 1, n + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) ) ZTREVC3: computes some or all of the right and/or left eigenvectors of + !> a complex upper triangular matrix T. + !> Matrices of this type are produced by the Schur factorization of + !> a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. + !> The right eigenvector x and the left eigenvector y of T corresponding + !> to an eigenvalue w are defined by: + !> T*x = w*x, (y**H)*T = w*(y**H) + !> where y**H denotes the conjugate transpose of the vector y. + !> The eigenvalues are not input to this routine, but are read directly + !> from the diagonal of T. + !> This routine returns the matrices X and/or Y of right and left + !> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + !> input matrix. If Q is the unitary factor that reduces a matrix A to + !> Schur form T, then Q*X and Q*Y are the matrices of right and left + !> eigenvectors of A. + !> This uses a Level 3 BLAS version of the back transformation. + + pure subroutine stdlib_ztrevc3( side, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, mm, m, & + work, lwork, rwork, lrwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, lwork, lrwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmin = 8 + integer(ilp), parameter :: nbmax = 128 + + + + ! Local Scalars + logical(lk) :: allv, bothv, leftv, lquery, over, rightv, somev + integer(ilp) :: i, ii, is, j, k, ki, iv, maxwrk, nb + real(dp) :: ovfl, remax, scale, smin, smlnum, ulp, unfl + complex(dp) :: cdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + allv = stdlib_lsame( howmny, 'A' ) + over = stdlib_lsame( howmny, 'B' ) + somev = stdlib_lsame( howmny, 'S' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + if( somev ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + nb = stdlib_ilaenv( 1, 'ZTREVC', side // howmny, n, -1, -1, -1 ) + maxwrk = n + 2*n*nb + work(1) = maxwrk + rwork(1) = n + lquery = ( lwork==-1 .or. lrwork==-1 ) + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.allv .and. .not.over .and. .not.somev ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt= n + 2*n*nbmin ) then + nb = (lwork - n) / (2*n) + nb = min( nb, nbmax ) + call stdlib_zlaset( 'F', n, 1+2*nb, czero, czero, work, n ) + else + nb = 1 + end if + ! set the constants to control overflow. + unfl = stdlib_dlamch( 'SAFE MINIMUM' ) + ovfl = one / unfl + call stdlib_dlabad( unfl, ovfl ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = unfl*( n / ulp ) + ! store the diagonal elements of t in working array work. + do i = 1, n + work( i ) = t( i, i ) + end do + ! compute 1-norm of each column of strictly upper triangular + ! part of t to control overflow in triangular solver. + rwork( 1 ) = zero + do j = 2, n + rwork( j ) = stdlib_dzasum( j-1, t( 1, j ), 1 ) + end do + if( rightv ) then + ! ============================================================ + ! compute right eigenvectors. + ! iv is index of column in current block. + ! non-blocked version always uses iv=nb=1; + ! blocked version starts with iv=nb, goes down to 1. + ! (note the "0-th" column is used to store the original diagonal.) + iv = nb + is = m + loop_80: do ki = n, 1, -1 + if( somev ) then + if( .not.select( ki ) )cycle loop_80 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + ! -------------------------------------------------------- + ! complex right eigenvector + work( ki + iv*n ) = cone + ! form right-hand side. + do k = 1, ki - 1 + work( k + iv*n ) = -t( k, ki ) + end do + ! solve upper triangular system: + ! [ t(1:ki-1,1:ki-1) - t(ki,ki) ]*x = scale*work. + do k = 1, ki - 1 + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) )1 ) then + call stdlib_zlatrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', 'Y',ki-1, t, ldt, & + work( 1 + iv*n ), scale,rwork, info ) + work( ki + iv*n ) = scale + end if + ! copy the vector x or q*x to vr and normalize. + if( .not.over ) then + ! ------------------------------ + ! no back-transform: copy x to vr and normalize. + call stdlib_zcopy( ki, work( 1 + iv*n ), 1, vr( 1, is ), 1 ) + ii = stdlib_izamax( ki, vr( 1, is ), 1 ) + remax = one / cabs1( vr( ii, is ) ) + call stdlib_zdscal( ki, remax, vr( 1, is ), 1 ) + do k = ki + 1, n + vr( k, is ) = czero + end do + else if( nb==1 ) then + ! ------------------------------ + ! version 1: back-transform each vector with gemv, q*x. + if( ki>1 )call stdlib_zgemv( 'N', n, ki-1, cone, vr, ldvr,work( 1 + iv*n ), 1,& + cmplx( scale,KIND=dp),vr( 1, ki ), 1 ) + ii = stdlib_izamax( n, vr( 1, ki ), 1 ) + remax = one / cabs1( vr( ii, ki ) ) + call stdlib_zdscal( n, remax, vr( 1, ki ), 1 ) + else + ! ------------------------------ + ! version 2: back-transform block of vectors with gemm + ! zero out below vector + do k = ki + 1, n + work( k + iv*n ) = czero + end do + ! columns iv:nb of work are valid vectors. + ! when the number of vectors stored reaches nb, + ! or if this was last vector, do the gemm + if( (iv==1) .or. (ki==1) ) then + call stdlib_zgemm( 'N', 'N', n, nb-iv+1, ki+nb-iv, cone,vr, ldvr,work( 1 + & + (iv)*n ), n,czero,work( 1 + (nb+iv)*n ), n ) + ! normalize vectors + do k = iv, nb + ii = stdlib_izamax( n, work( 1 + (nb+k)*n ), 1 ) + remax = one / cabs1( work( ii + (nb+k)*n ) ) + call stdlib_zdscal( n, remax, work( 1 + (nb+k)*n ), 1 ) + end do + call stdlib_zlacpy( 'F', n, nb-iv+1,work( 1 + (nb+iv)*n ), n,vr( 1, ki ), & + ldvr ) + iv = nb + else + iv = iv - 1 + end if + end if + ! restore the original diagonal elements of t. + do k = 1, ki - 1 + t( k, k ) = work( k ) + end do + is = is - 1 + end do loop_80 + end if + if( leftv ) then + ! ============================================================ + ! compute left eigenvectors. + ! iv is index of column in current block. + ! non-blocked version always uses iv=1; + ! blocked version starts with iv=1, goes up to nb. + ! (note the "0-th" column is used to store the original diagonal.) + iv = 1 + is = 1 + loop_130: do ki = 1, n + if( somev ) then + if( .not.select( ki ) )cycle loop_130 + end if + smin = max( ulp*( cabs1( t( ki, ki ) ) ), smlnum ) + ! -------------------------------------------------------- + ! complex left eigenvector + work( ki + iv*n ) = cone + ! form right-hand side. + do k = ki + 1, n + work( k + iv*n ) = -conjg( t( ki, k ) ) + end do + ! solve conjugate-transposed triangular system: + ! [ t(ki+1:n,ki+1:n) - t(ki,ki) ]**h * x = scale*work. + do k = ki + 1, n + t( k, k ) = t( k, k ) - t( ki, ki ) + if( cabs1( t( k, k ) ) ZTREXC: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that the diagonal element of T with row index IFST + !> is moved to row ILST. + !> The Schur form T is reordered by a unitary similarity transformation + !> Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by + !> postmultplying it with Z. + + pure subroutine stdlib_ztrexc( compq, n, t, ldt, q, ldq, ifst, ilst, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq + integer(ilp), intent(in) :: ifst, ilst, ldq, ldt, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: q(ldq,*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: wantq + integer(ilp) :: k, m1, m2, m3 + real(dp) :: cs + complex(dp) :: sn, t11, t22, temp + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + wantq = stdlib_lsame( compq, 'V' ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldtn ).and.( n>0 )) then + info = -7 + else if(( ilst<1 .or. ilst>n ).and.( n>0 )) then + info = -8 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTREXC', -info ) + return + end if + ! quick return if possible + if( n<=1 .or. ifst==ilst )return + if( ifst ZTRRFS: provides error bounds and backward error estimates for the + !> solution to a system of linear equations with a triangular + !> coefficient matrix. + !> The solution matrix X must be computed by ZTRTRS or some other + !> means before entering this routine. ZTRRFS does not do iterative + !> refinement because doing so cannot improve the backward error. + + pure subroutine stdlib_ztrrfs( uplo, trans, diag, n, nrhs, a, lda, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), b(ldb,*), x(ldx,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: notran, nounit, upper + character :: transn, transt + integer(ilp) :: i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + notran = stdlib_lsame( trans, 'N' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 210 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_ztrsv( uplo, transt, diag, n, a, lda, work, 1 ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_ztrsv( uplo, transn, diag, n, a, lda, work, 1 ) + end if + go to 210 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_250 + return + end subroutine stdlib_ztrrfs + + !> ZTRSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or right eigenvectors of a complex upper triangular + !> matrix T (or of any matrix Q*T*Q**H with Q unitary). + + pure subroutine stdlib_ztrsna( job, howmny, select, n, t, ldt, vl, ldvl, vr,ldvr, s, sep, mm,& + m, work, ldwork, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldt, ldvl, ldvr, ldwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + real(dp), intent(out) :: rwork(*), s(*), sep(*) + complex(dp), intent(in) :: t(ldt,*), vl(ldvl,*), vr(ldvr,*) + complex(dp), intent(out) :: work(ldwork,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: somcon, wantbh, wants, wantsp + character :: normin + integer(ilp) :: i, ierr, ix, j, k, kase, ks + real(dp) :: bignum, eps, est, lnrm, rnrm, scale, smlnum, xnorm + complex(dp) :: cdum, prod + ! Local Arrays + integer(ilp) :: isave(3) + complex(dp) :: dummy(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + ! set m to the number of eigenpairs for which condition numbers are + ! to be computed. + if( somcon ) then + m = 0 + do j = 1, n + if( select( j ) )m = m + 1 + end do + else + m = n + end if + info = 0 + if( .not.wants .and. .not.wantsp ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt ZTRTI2: computes the inverse of a complex upper or lower triangular + !> matrix. + !> This is the Level 2 BLAS version of the algorithm. + + pure subroutine stdlib_ztrti2( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j + complex(dp) :: ajj + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda ZTRTRI: computes the inverse of a complex upper or lower triangular + !> matrix A. + !> This is the Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_ztrtri( uplo, diag, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, upper + integer(ilp) :: j, jb, nb, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_ztrti2( uplo, diag, n, a, lda, info ) + else + ! use blocked code + if( upper ) then + ! compute inverse of upper triangular matrix + do j = 1, n, nb + jb = min( nb, n-j+1 ) + ! compute rows 1:j-1 of current block column + call stdlib_ztrmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, cone, a, & + lda, a( 1, j ), lda ) + call stdlib_ztrsm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', diag, j-1,jb, -cone, a( & + j, j ), lda, a( 1, j ), lda ) + ! compute inverse of current diagonal block + call stdlib_ztrti2( 'UPPER', diag, jb, a( j, j ), lda, info ) + end do + else + ! compute inverse of lower triangular matrix + nn = ( ( n-1 ) / nb )*nb + 1 + do j = nn, 1, -nb + jb = min( nb, n-j+1 ) + if( j+jb<=n ) then + ! compute rows j+jb:n of current block column + call stdlib_ztrmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, & + cone, a( j+jb, j+jb ), lda,a( j+jb, j ), lda ) + call stdlib_ztrsm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', diag,n-j-jb+1, jb, -& + cone, a( j, j ), lda,a( j+jb, j ), lda ) + end if + ! compute inverse of current diagonal block + call stdlib_ztrti2( 'LOWER', diag, jb, a( j, j ), lda, info ) + end do + end if + end if + return + end subroutine stdlib_ztrtri + + !> ZTRTRS: solves a triangular system of the form + !> A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a triangular matrix of order N, and B is an N-by-NRHS + !> matrix. A check is made to verify that A is nonsingular. + + pure subroutine stdlib_ztrtrs( uplo, trans, diag, n, nrhs, a, lda, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nounit = stdlib_lsame( diag, 'N' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) .and. & + .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( lda ZTRTTF: copies a triangular matrix A from standard full format (TR) + !> to rectangular full packed format (TF) . + + pure subroutine stdlib_ztrttf( transr, uplo, n, a, lda, arf, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(dp), intent(in) :: a(0:lda-1,0:*) + complex(dp), intent(out) :: arf(0:*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2 + ! Intrinsic Functions + intrinsic :: conjg,max,mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda=n + ij = 0 + do j = 0, n2 + do i = n1, n2 + j + arf( ij ) = conjg( a( n2+j, i ) ) + ij = ij + 1 + end do + do i = j, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0); lda=n + ij = nt - n + do j = n - 1, n1, -1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = j - n1, n1 - 1 + arf( ij ) = conjg( a( j-n1, l ) ) + ij = ij + 1 + end do + ij = ij - nx2 + end do + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + ij = 0 + do j = 0, n2 - 1 + do i = 0, j + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + do i = n1 + j, n - 1 + arf( ij ) = a( i, n1+j ) + ij = ij + 1 + end do + end do + do j = n2, n - 1 + do i = 0, n1 - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda=n2 + ij = 0 + do j = 0, n1 + do i = n1, n - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + do j = 0, n1 - 1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = n2 + j, n - 1 + arf( ij ) = conjg( a( n2+j, l ) ) + ij = ij + 1 + end do + end do + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1); lda=n+1 + ij = 0 + do j = 0, k - 1 + do i = k, k + j + arf( ij ) = conjg( a( k+j, i ) ) + ij = ij + 1 + end do + do i = j, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0); lda=n+1 + ij = nt - n - 1 + do j = n - 1, k, -1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = j - k, k - 1 + arf( ij ) = conjg( a( j-k, l ) ) + ij = ij + 1 + end do + ij = ij - np1x2 + end do + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper, a=b) + ! t1 -> a(0,1) , t2 -> a(0,0) , s -> a(0,k+1) : + ! t1 -> a(0+k) , t2 -> a(0+0) , s -> a(0+k*(k+1)); lda=k + ij = 0 + j = k + do i = k, n - 1 + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do j = 0, k - 2 + do i = 0, j + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + do i = k + 1 + j, n - 1 + arf( ij ) = a( i, k+1+j ) + ij = ij + 1 + end do + end do + do j = k - 1, n - 1 + do i = 0, k - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + else + ! srpa for upper, transpose and n is even (see paper, a=b) + ! t1 -> a(0,k+1) , t2 -> a(0,k) , s -> a(0,0) + ! t1 -> a(0+k*(k+1)) , t2 -> a(0+k*k) , s -> a(0+0)); lda=k + ij = 0 + do j = 0, k + do i = k, n - 1 + arf( ij ) = conjg( a( j, i ) ) + ij = ij + 1 + end do + end do + do j = 0, k - 2 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + do l = k + 1 + j, n - 1 + arf( ij ) = conjg( a( k+1+j, l ) ) + ij = ij + 1 + end do + end do + ! note that here j = k-1 + do i = 0, j + arf( ij ) = a( i, j ) + ij = ij + 1 + end do + end if + end if + end if + return + end subroutine stdlib_ztrttf + + !> ZTRTTP: copies a triangular matrix A from full format (TR) to standard + !> packed format (TP). + + pure subroutine stdlib_ztrttp( uplo, n, a, lda, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n, lda + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: ap(*) + ! ===================================================================== + ! Parameters + ! Local Scalars + logical(lk) :: lower + integer(ilp) :: i, j, k + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZTZRZF: reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A + !> to upper triangular form by means of unitary transformations. + !> The upper trapezoidal matrix A is factored as + !> A = ( R 0 ) * Z, + !> where Z is an N-by-N unitary matrix and R is an M-by-M upper + !> triangular matrix. + + pure subroutine stdlib_ztzrzf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iws, ki, kk, ldwork, lwkmin, lwkopt, m1, mu, nb, nbmin, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarzt( 'BACKWARD', 'ROWWISE', n-m, ib, a( i, m1 ),lda, tau( i ), & + work, ldwork ) + ! apply h to a(1:i-1,i:n) from the right + call stdlib_zlarzb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', i-1, n-i+1,& + ib, n-m, a( i, m1 ),lda, work, ldwork, a( 1, i ), lda,work( ib+1 ), ldwork ) + + end if + end do + mu = i + nb - 1 + else + mu = m + end if + ! use unblocked code to factor the last or only block + if( mu>0 )call stdlib_zlatrz( mu, n, n-m, a, lda, tau, work ) + work( 1 ) = lwkopt + return + end subroutine stdlib_ztzrzf + + !> ZUNBDB: simultaneously bidiagonalizes the blocks of an M-by-M + !> partitioned unitary matrix X: + !> [ B11 | B12 0 0 ] + !> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H + !> X = [-----------] = [---------] [----------------] [---------] . + !> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] + !> [ 0 | 0 0 I ] + !> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is + !> not the case, then X must be transposed and/or permuted. This can be + !> done in constant time using the TRANS and SIGNS options. See ZUNCSD + !> for details.) + !> The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- + !> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are + !> represented implicitly by Householder vectors. + !> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: signs, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldx11, ldx12, ldx21, ldx22, lwork, m, p, q + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), tauq2(*), work(*) + complex(dp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) + + ! ==================================================================== + ! Parameters + real(dp), parameter :: realone = 1.0_dp + + + ! Local Scalars + logical(lk) :: colmajor, lquery + integer(ilp) :: i, lworkmin, lworkopt + real(dp) :: z1, z2, z3, z4 + ! Intrinsic Functions + intrinsic :: atan2,cos,max,min,sin + intrinsic :: cmplx,conjg + ! Executable Statements + ! test input arguments + info = 0 + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( .not. stdlib_lsame( signs, 'O' ) ) then + z1 = realone + z2 = realone + z3 = realone + z4 = realone + else + z1 = realone + z2 = -realone + z3 = realone + z4 = -realone + end if + lquery = lwork == -1 + if( m < 0 ) then + info = -3 + else if( p < 0 .or. p > m ) then + info = -4 + else if( q < 0 .or. q > p .or. q > m-p .or.q > m-q ) then + info = -5 + else if( colmajor .and. ldx11 < max( 1, p ) ) then + info = -7 + else if( .not.colmajor .and. ldx11 < max( 1, q ) ) then + info = -7 + else if( colmajor .and. ldx12 < max( 1, p ) ) then + info = -9 + else if( .not.colmajor .and. ldx12 < max( 1, m-q ) ) then + info = -9 + else if( colmajor .and. ldx21 < max( 1, m-p ) ) then + info = -11 + else if( .not.colmajor .and. ldx21 < max( 1, q ) ) then + info = -11 + else if( colmajor .and. ldx22 < max( 1, m-p ) ) then + info = -13 + else if( .not.colmajor .and. ldx22 < max( 1, m-q ) ) then + info = -13 + end if + ! compute workspace + if( info == 0 ) then + lworkopt = m - q + lworkmin = m - q + work(1) = lworkopt + if( lwork < lworkmin .and. .not. lquery ) then + info = -21 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'XORBDB', -info ) + return + else if( lquery ) then + return + end if + ! handle column-major and row-major separately + if( colmajor ) then + ! reduce columns 1, ..., q of x11, x12, x21, and x22 + do i = 1, q + if( i == 1 ) then + call stdlib_zscal( p-i+1, cmplx( z1, 0.0_dp,KIND=dp), x11(i,i), 1 ) + else + call stdlib_zscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_dp,KIND=dp),x11(i,i), & + 1 ) + call stdlib_zaxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), x12(& + i,i-1), 1, x11(i,i), 1 ) + end if + if( i == 1 ) then + call stdlib_zscal( m-p-i+1, cmplx( z2, 0.0_dp,KIND=dp), x21(i,i), 1 ) + else + call stdlib_zscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_dp,KIND=dp),x21(i,i),& + 1 ) + call stdlib_zaxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), & + x22(i,i-1), 1, x21(i,i), 1 ) + end if + theta(i) = atan2( stdlib_dznrm2( m-p-i+1, x21(i,i), 1 ),stdlib_dznrm2( p-i+1, & + x11(i,i), 1 ) ) + if( p > i ) then + call stdlib_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + else if ( p == i ) then + call stdlib_zlarfgp( p-i+1, x11(i,i), x11(i,i), 1, taup1(i) ) + end if + x11(i,i) = cone + if ( m-p > i ) then + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1,taup2(i) ) + else if ( m-p == i ) then + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i,i), 1,taup2(i) ) + end if + x21(i,i) = cone + if ( q > i ) then + call stdlib_zlarf( 'L', p-i+1, q-i, x11(i,i), 1,conjg(taup1(i)), x11(i,i+1), & + ldx11, work ) + call stdlib_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1,conjg(taup2(i)), x21(i,i+1),& + ldx21, work ) + end if + if ( m-q+1 > i ) then + call stdlib_zlarf( 'L', p-i+1, m-q-i+1, x11(i,i), 1,conjg(taup1(i)), x12(i,i),& + ldx12, work ) + call stdlib_zlarf( 'L', m-p-i+1, m-q-i+1, x21(i,i), 1,conjg(taup2(i)), x22(i,& + i), ldx22, work ) + end if + if( i < q ) then + call stdlib_zscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_dp,KIND=dp),x11(i,i+& + 1), ldx11 ) + call stdlib_zaxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_dp,KIND=dp),x21(i,i+1)& + , ldx21, x11(i,i+1), ldx11 ) + end if + call stdlib_zscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_dp,KIND=dp),x12(i,i)& + , ldx12 ) + call stdlib_zaxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_dp,KIND=dp),x22(i,i),& + ldx22, x12(i,i), ldx12 ) + if( i < q )phi(i) = atan2( stdlib_dznrm2( q-i, x11(i,i+1), ldx11 ),stdlib_dznrm2(& + m-q-i+1, x12(i,i), ldx12 ) ) + if( i < q ) then + call stdlib_zlacgv( q-i, x11(i,i+1), ldx11 ) + if ( i == q-1 ) then + call stdlib_zlarfgp( q-i, x11(i,i+1), x11(i,i+1), ldx11,tauq1(i) ) + else + call stdlib_zlarfgp( q-i, x11(i,i+1), x11(i,i+2), ldx11,tauq1(i) ) + end if + x11(i,i+1) = cone + end if + if ( m-q+1 > i ) then + call stdlib_zlacgv( m-q-i+1, x12(i,i), ldx12 ) + if ( m-q == i ) then + call stdlib_zlarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) + else + call stdlib_zlarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) + + end if + end if + x12(i,i) = cone + if( i < q ) then + call stdlib_zlarf( 'R', p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x11(i+1,i+1), & + ldx11, work ) + call stdlib_zlarf( 'R', m-p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x21(i+1,i+1), & + ldx21, work ) + end if + if ( p > i ) then + call stdlib_zlarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & + ldx12, work ) + end if + if ( m-p > i ) then + call stdlib_zlarf( 'R', m-p-i, m-q-i+1, x12(i,i), ldx12,tauq2(i), x22(i+1,i), & + ldx22, work ) + end if + if( i < q )call stdlib_zlacgv( q-i, x11(i,i+1), ldx11 ) + call stdlib_zlacgv( m-q-i+1, x12(i,i), ldx12 ) + end do + ! reduce columns q + 1, ..., p of x12, x22 + do i = q + 1, p + call stdlib_zscal( m-q-i+1, cmplx( -z1*z4, 0.0_dp,KIND=dp), x12(i,i),ldx12 ) + + call stdlib_zlacgv( m-q-i+1, x12(i,i), ldx12 ) + if ( i >= m-q ) then + call stdlib_zlarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) + else + call stdlib_zlarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) + end if + x12(i,i) = cone + if ( p > i ) then + call stdlib_zlarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & + ldx12, work ) + end if + if( m-p-q >= 1 )call stdlib_zlarf( 'R', m-p-q, m-q-i+1, x12(i,i), ldx12,tauq2(i),& + x22(q+1,i), ldx22, work ) + call stdlib_zlacgv( m-q-i+1, x12(i,i), ldx12 ) + end do + ! reduce columns p + 1, ..., m - q of x12, x22 + do i = 1, m - p - q + call stdlib_zscal( m-p-q-i+1, cmplx( z2*z4, 0.0_dp,KIND=dp),x22(q+i,p+i), ldx22 ) + + call stdlib_zlacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) + call stdlib_zlarfgp( m-p-q-i+1, x22(q+i,p+i), x22(q+i,p+i+1),ldx22, tauq2(p+i) ) + + x22(q+i,p+i) = cone + call stdlib_zlarf( 'R', m-p-q-i, m-p-q-i+1, x22(q+i,p+i), ldx22,tauq2(p+i), x22(& + q+i+1,p+i), ldx22, work ) + call stdlib_zlacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) + end do + else + ! reduce columns 1, ..., q of x11, x12, x21, x22 + do i = 1, q + if( i == 1 ) then + call stdlib_zscal( p-i+1, cmplx( z1, 0.0_dp,KIND=dp), x11(i,i),ldx11 ) + else + call stdlib_zscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_dp,KIND=dp),x11(i,i), & + ldx11 ) + call stdlib_zaxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), x12(& + i-1,i), ldx12, x11(i,i), ldx11 ) + end if + if( i == 1 ) then + call stdlib_zscal( m-p-i+1, cmplx( z2, 0.0_dp,KIND=dp), x21(i,i),ldx21 ) + + else + call stdlib_zscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_dp,KIND=dp),x21(i,i),& + ldx21 ) + call stdlib_zaxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), & + x22(i-1,i), ldx22, x21(i,i), ldx21 ) + end if + theta(i) = atan2( stdlib_dznrm2( m-p-i+1, x21(i,i), ldx21 ),stdlib_dznrm2( p-i+1,& + x11(i,i), ldx11 ) ) + call stdlib_zlacgv( p-i+1, x11(i,i), ldx11 ) + call stdlib_zlacgv( m-p-i+1, x21(i,i), ldx21 ) + call stdlib_zlarfgp( p-i+1, x11(i,i), x11(i,i+1), ldx11, taup1(i) ) + x11(i,i) = cone + if ( i == m-p ) then + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i,i), ldx21,taup2(i) ) + else + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i,i+1), ldx21,taup2(i) ) + end if + x21(i,i) = cone + call stdlib_zlarf( 'R', q-i, p-i+1, x11(i,i), ldx11, taup1(i),x11(i+1,i), ldx11, & + work ) + call stdlib_zlarf( 'R', m-q-i+1, p-i+1, x11(i,i), ldx11, taup1(i),x12(i,i), & + ldx12, work ) + call stdlib_zlarf( 'R', q-i, m-p-i+1, x21(i,i), ldx21, taup2(i),x21(i+1,i), & + ldx21, work ) + call stdlib_zlarf( 'R', m-q-i+1, m-p-i+1, x21(i,i), ldx21,taup2(i), x22(i,i), & + ldx22, work ) + call stdlib_zlacgv( p-i+1, x11(i,i), ldx11 ) + call stdlib_zlacgv( m-p-i+1, x21(i,i), ldx21 ) + if( i < q ) then + call stdlib_zscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_dp,KIND=dp),x11(i+1,& + i), 1 ) + call stdlib_zaxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_dp,KIND=dp),x21(i+1,i)& + , 1, x11(i+1,i), 1 ) + end if + call stdlib_zscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_dp,KIND=dp),x12(i,i)& + , 1 ) + call stdlib_zaxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_dp,KIND=dp),x22(i,i),& + 1, x12(i,i), 1 ) + if( i < q )phi(i) = atan2( stdlib_dznrm2( q-i, x11(i+1,i), 1 ),stdlib_dznrm2( m-& + q-i+1, x12(i,i), 1 ) ) + if( i < q ) then + call stdlib_zlarfgp( q-i, x11(i+1,i), x11(i+2,i), 1, tauq1(i) ) + x11(i+1,i) = cone + end if + call stdlib_zlarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1, tauq2(i) ) + x12(i,i) = cone + if( i < q ) then + call stdlib_zlarf( 'L', q-i, p-i, x11(i+1,i), 1,conjg(tauq1(i)), x11(i+1,i+1),& + ldx11, work ) + call stdlib_zlarf( 'L', q-i, m-p-i, x11(i+1,i), 1,conjg(tauq1(i)), x21(i+1,i+& + 1), ldx21, work ) + end if + call stdlib_zlarf( 'L', m-q-i+1, p-i, x12(i,i), 1,conjg(tauq2(i)), x12(i,i+1), & + ldx12, work ) + if ( m-p > i ) then + call stdlib_zlarf( 'L', m-q-i+1, m-p-i, x12(i,i), 1,conjg(tauq2(i)), x22(i,i+& + 1), ldx22, work ) + end if + end do + ! reduce columns q + 1, ..., p of x12, x22 + do i = q + 1, p + call stdlib_zscal( m-q-i+1, cmplx( -z1*z4, 0.0_dp,KIND=dp), x12(i,i), 1 ) + call stdlib_zlarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1, tauq2(i) ) + x12(i,i) = cone + if ( p > i ) then + call stdlib_zlarf( 'L', m-q-i+1, p-i, x12(i,i), 1,conjg(tauq2(i)), x12(i,i+1),& + ldx12, work ) + end if + if( m-p-q >= 1 )call stdlib_zlarf( 'L', m-q-i+1, m-p-q, x12(i,i), 1,conjg(tauq2(& + i)), x22(i,q+1), ldx22, work ) + end do + ! reduce columns p + 1, ..., m - q of x12, x22 + do i = 1, m - p - q + call stdlib_zscal( m-p-q-i+1, cmplx( z2*z4, 0.0_dp,KIND=dp),x22(p+i,q+i), 1 ) + + call stdlib_zlarfgp( m-p-q-i+1, x22(p+i,q+i), x22(p+i+1,q+i), 1,tauq2(p+i) ) + + x22(p+i,q+i) = cone + if ( m-p-q /= i ) then + call stdlib_zlarf( 'L', m-p-q-i+1, m-p-q-i, x22(p+i,q+i), 1,conjg(tauq2(p+i)),& + x22(p+i,q+i+1), ldx22,work ) + end if + end do + end if + return + end subroutine stdlib_zunbdb + + !> ZUNBDB6: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then the zero vector is returned. + + pure subroutine stdlib_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: alphasq = 0.01_dp + real(dp), parameter :: realone = 1.0_dp + real(dp), parameter :: realzero = 0.0_dp + + + ! Local Scalars + integer(ilp) :: i + real(dp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB6', -info ) + return + end if + ! first, project x onto the orthogonal complement of q's column + ! space + scl1 = realzero + ssq1 = realone + call stdlib_zlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_zlassq( m2, x2, incx2, scl2, ssq2 ) + normsq1 = scl1**2*ssq1 + scl2**2*ssq2 + if( m1 == 0 ) then + do i = 1, n + work(i) = czero + end do + else + call stdlib_zgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1 ) + end if + call stdlib_zgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1 ) + call stdlib_zgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1, cone, x1,incx1 ) + call stdlib_zgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1, cone, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_zlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_zlassq( m2, x2, incx2, scl2, ssq2 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if projection is sufficiently large in norm, then stop. + ! if projection is czero, then stop. + ! otherwise, project again. + if( normsq2 >= alphasq*normsq1 ) then + return + end if + if( normsq2 == czero ) then + return + end if + normsq1 = normsq2 + do i = 1, n + work(i) = czero + end do + if( m1 == 0 ) then + do i = 1, n + work(i) = czero + end do + else + call stdlib_zgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1 ) + end if + call stdlib_zgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1 ) + call stdlib_zgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1, cone, x1,incx1 ) + call stdlib_zgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1, cone, x2,incx2 ) + scl1 = realzero + ssq1 = realone + call stdlib_zlassq( m1, x1, incx1, scl1, ssq1 ) + scl2 = realzero + ssq2 = realone + call stdlib_zlassq( m1, x1, incx1, scl1, ssq1 ) + normsq2 = scl1**2*ssq1 + scl2**2*ssq2 + ! if second projection is sufficiently large in norm, then do + ! nothing more. alternatively, if it shrunk significantly, then + ! truncate it to czero. + if( normsq2 < alphasq*normsq1 ) then + do i = 1, m1 + x1(i) = czero + end do + do i = 1, m2 + x2(i) = czero + end do + end if + return + end subroutine stdlib_zunbdb6 + + !> ZUNG2L: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the last n columns of a product of k elementary + !> reflectors of order m + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. + + pure subroutine stdlib_zung2l( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda ZUNG2R: generates an m by n complex matrix Q with orthonormal columns, + !> which is defined as the first n columns of a product of k elementary + !> reflectors of order m + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. + + pure subroutine stdlib_zung2r( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda ZUNGL2: generates an m-by-n complex matrix Q with orthonormal rows, + !> which is defined as the first m rows of a product of k elementary + !> reflectors of order n + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. + + pure subroutine stdlib_zungl2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldak .and. j<=m )a( j, j ) = cone + end do + end if + do i = k, 1, -1 + ! apply h(i)**h to a(i:m,i:n) from the right + if( i ZUNGLQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the first M rows of a product of K elementary + !> reflectors of order N + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. + + pure subroutine stdlib_zunglq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZUNGLQ', ' ', m, n, k, -1 ) + lwkopt = max( 1, m )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=m ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_zlarft( 'FORWARD', 'ROWWISE', n-i+1, ib, a( i, i ),lda, tau( i ), & + work, ldwork ) + ! apply h**h to a(i+ib:m,i:n) from the right + call stdlib_zlarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'FORWARD','ROWWISE', m-i-& + ib+1, n-i+1, ib, a( i, i ),lda, work, ldwork, a( i+ib, i ), lda,work( ib+1 ), & + ldwork ) + end if + ! apply h**h to columns i:n of current block + call stdlib_zungl2( ib, n-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set columns 1:i-1 of current block to czero + do j = 1, i - 1 + do l = i, i + ib - 1 + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_zunglq + + !> ZUNGQL: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the last N columns of a product of K elementary + !> reflectors of order M + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. + + pure subroutine stdlib_zungql( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + if( n-k+i>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_zlarfb( 'LEFT', 'NO TRANSPOSE', 'BACKWARD','COLUMNWISE', m-k+i+ib-& + 1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + ! apply h to rows 1:m-k+i+ib-1 of current block + call stdlib_zung2l( m-k+i+ib-1, ib, ib, a( 1, n-k+i ), lda,tau( i ), work, iinfo & + ) + ! set rows m-k+i+ib:m of current block to czero + do j = n - k + i, n - k + i + ib - 1 + do l = m - k + i + ib, m + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_zungql + + !> ZUNGQR: generates an M-by-N complex matrix Q with orthonormal columns, + !> which is defined as the first N columns of a product of K elementary + !> reflectors of order M + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. + + pure subroutine stdlib_zungqr( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZUNGQR', ' ', m, n, k, -1 ) + lwkopt = max( 1, n )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( k<0 .or. k>n ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = ki + 1, 1, -nb + ib = min( nb, k-i+1 ) + if( i+ib<=n ) then + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i & + ), work, ldwork ) + ! apply h to a(i:m,i+ib:n) from the left + call stdlib_zlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-& + i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), & + ldwork ) + end if + ! apply h to rows i:m of current block + call stdlib_zung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo ) + ! set rows 1:i-1 of current block to czero + do j = i, i + ib - 1 + do l = 1, i - 1 + a( l, j ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_zungqr + + !> ZUNGR2: generates an m by n complex matrix Q with orthonormal rows, + !> which is defined as the last m rows of a product of k elementary + !> reflectors of order n + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. + + pure subroutine stdlib_zungr2( m, n, k, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, ii, j, l + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( ldan-m .and. j<=n-k )a( m-n+j, j ) = cone + end do + end if + do i = 1, k + ii = m - k + i + ! apply h(i)**h to a(1:m-k+i,1:n-k+i) from the right + call stdlib_zlacgv( n-m+ii-1, a( ii, 1 ), lda ) + a( ii, n-m+ii ) = cone + call stdlib_zlarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1 ), lda,conjg( tau( i ) ), a, lda,& + work ) + call stdlib_zscal( n-m+ii-1, -tau( i ), a( ii, 1 ), lda ) + call stdlib_zlacgv( n-m+ii-1, a( ii, 1 ), lda ) + a( ii, n-m+ii ) = cone - conjg( tau( i ) ) + ! set a(m-k+i,n-k+i+1:n) to czero + do l = n - m + ii + 1, n + a( ii, l ) = czero + end do + end do + return + end subroutine stdlib_zungr2 + + !> ZUNGRQ: generates an M-by-N complex matrix Q with orthonormal rows, + !> which is defined as the last M rows of a product of K elementary + !> reflectors of order N + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. + + pure subroutine stdlib_zungrq( m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( nm ) then + info = -3 + else if( lda1 .and. nb=nbmin .and. nb0 ) then + ! use blocked code + do i = k - kk + 1, k, nb + ib = min( nb, k-i+1 ) + ii = m - k + i + if( ii>1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h**h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_zlarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'BACKWARD','ROWWISE', ii-& + 1, n-k+i+ib-1, ib, a( ii, 1 ),lda, work, ldwork, a, lda, work( ib+1 ),ldwork ) + + end if + ! apply h**h to columns 1:n-k+i+ib-1 of current block + call stdlib_zungr2( ib, n-k+i+ib-1, ib, a( ii, 1 ), lda, tau( i ),work, iinfo ) + + ! set columns n-k+i+ib:n of current block to czero + do l = n - k + i + ib, n + do j = ii, ii + ib - 1 + a( j, l ) = czero + end do + end do + end do + end if + work( 1 ) = iws + return + end subroutine stdlib_zungrq + + !> ZUNGTSQR_ROW: generates an M-by-N complex matrix Q_out with + !> orthonormal columns from the output of ZLATSQR. These N orthonormal + !> columns are the first N columns of a product of complex unitary + !> matrices Q(k)_in of order M, which are returned by ZLATSQR in + !> a special format. + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> The input matrices Q(k)_in are stored in row and column blocks in A. + !> See the documentation of ZLATSQR for more details on the format of + !> Q(k)_in, where each Q(k)_in is represented by block Householder + !> transformations. This routine calls an auxiliary routine ZLARFB_GETT, + !> where the computation is performed on each individual block. The + !> algorithm first sweeps NB-sized column blocks from the right to left + !> starting in the bottom row block and continues to the top row block + !> (hence _ROW in the routine name). This sweep is in reverse order of + !> the order in which ZLATSQR generates the output blocks. + + pure subroutine stdlib_zungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: t(ldt,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, & + num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1 + ! Local Arrays + complex(dp) :: dummy(1,1) + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m=m, then the loop is never executed. + if ( mb=m, then we have only one row block of a of size m + ! and we work on the entire matrix a. + mb1 = min( mb, m ) + ! apply column blocks of h in the top row block from right to left. + ! kb is the column index of the current block reflector in + ! the matrices t and v. + do kb = kb_last, 1, -nblocal + ! determine the size of the current column block knb in + ! the matrices t and v. + knb = min( nblocal, n - kb + 1 ) + if( mb1-kb-knb+1==0 ) then + ! in stdlib_slarfb_gett parameters, when m=0, then the matrix b + ! does not exist, hence we need to pass a dummy array + ! reference dummy(1,1) to b with lddummy=1. + call stdlib_zlarfb_gett( 'N', 0, n-kb+1, knb,t( 1, kb ), ldt, a( kb, kb ), lda,& + dummy( 1, 1 ), 1, work, knb ) + else + call stdlib_zlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1, kb ), ldt, a( kb, & + kb ), lda,a( kb+knb, kb), lda, work, knb ) + end if + end do + work( 1 ) = cmplx( lworkopt,KIND=dp) + return + end subroutine stdlib_zungtsqr_row + + + pure subroutine stdlib_zunm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: m, n, n1, n2, ldq, ldc, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(in) :: q(ldq,*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, ldwork, len, lwkopt, nb, nq, nw + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q; + ! nw is the minimum dimension of work. + if( left ) then + nq = m + else + nq = n + end if + nw = nq + if( n1==0 .or. n2==0 ) nw = 1 + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( n1<0 .or. n1+n2/=nq ) then + info = -5 + else if( n2<0 ) then + info = -6 + else if( ldq ZUNM2L: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_zunm2l( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + complex(dp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNM2R: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + complex(dp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNML2: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_zunml2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq + complex(dp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNMLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k)**H . . . H(2)**H H(1)**H + !> as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmlq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_zunml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_zlarft( 'FORWARD', 'ROWWISE', nq-i+1, ib, a( i, i ),lda, tau( i ), & + work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_zlarfb( side, transt, 'FORWARD', 'ROWWISE', mi, ni, ib,a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunmlq + + !> ZUNMQL: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(k) . . . H(2) H(1) + !> as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmql( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_zunm2l( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. notran ) .or.( .not.left .and. .not.notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarft( 'BACKWARD', 'COLUMNWISE', nq-k+i+ib-1, ib,a( 1, i ), lda, & + tau( i ), work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**h is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**h + call stdlib_zlarfb( side, trans, 'BACKWARD', 'COLUMNWISE', mi, ni,ib, a( 1, i ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunmql + + !> ZUNMQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, & + ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + else + mi = m + ic = 1 + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i) h(i+1) . . . h(i+ib-1) + call stdlib_zlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),& + work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_zlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), & + lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunmqr + + !> ZUNMR2: overwrites the general complex m-by-n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, mi, ni, nq + complex(dp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda ZUNMR3: overwrites the general complex m by n matrix C with + !> Q * C if SIDE = 'L' and TRANS = 'N', or + !> Q**H* C if SIDE = 'L' and TRANS = 'C', or + !> C * Q if SIDE = 'R' and TRANS = 'N', or + !> C * Q**H if SIDE = 'R' and TRANS = 'C', + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZTZRZF. Q is of order m if SIDE = 'L' and of order n + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, m, n + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), tau(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, notran + integer(ilp) :: i, i1, i2, i3, ic, ja, jc, mi, ni, nq + complex(dp) :: taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda ZUNMRQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1)**H H(2)**H . . . H(k)**H + !> as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, & + nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_zunmr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo ) + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + else + mi = m + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1 ), lda, tau( & + i ), work( iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(1:m-k+i+ib-1,1:n) + mi = m - k + i + ib - 1 + else + ! h or h**h is applied to c(1:m,1:n-k+i+ib-1) + ni = n - k + i + ib - 1 + end if + ! apply h or h**h + call stdlib_zlarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1 ), & + lda, work( iwt ), ldt, c, ldc,work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunmrq + + !> ZUNMRZ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of k + !> elementary reflectors + !> Q = H(1) H(2) . . . H(k) + !> as returned by ZTZRZF. Q is of order M if SIDE = 'L' and of order N + !> if SIDE = 'R'. + + pure subroutine stdlib_zunmrz( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, l, lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + ! Local Scalars + logical(lk) :: left, lquery, notran + character :: transt + integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, ja, jc, ldwork, lwkopt, mi, nb, & + nbmin, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>nq ) then + info = -5 + else if( l<0 .or. ( left .and. ( l>m ) ) .or.( .not.left .and. ( l>n ) ) ) then + info = -6 + else if( lda1 .and. nb=k ) then + ! use unblocked code + call stdlib_zunmr3( side, trans, m, n, k, l, a, lda, tau, c, ldc,work, iinfo ) + + else + ! use blocked code + iwt = 1 + nw*nb + if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then + i1 = 1 + i2 = k + i3 = nb + else + i1 = ( ( k-1 ) / nb )*nb + 1 + i2 = 1 + i3 = -nb + end if + if( left ) then + ni = n + jc = 1 + ja = m - l + 1 + else + mi = m + ic = 1 + ja = n - l + 1 + end if + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + do i = i1, i2, i3 + ib = min( nb, k-i+1 ) + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarzt( 'BACKWARD', 'ROWWISE', l, ib, a( i, ja ), lda,tau( i ), work(& + iwt ), ldt ) + if( left ) then + ! h or h**h is applied to c(i:m,1:n) + mi = m - i + 1 + ic = i + else + ! h or h**h is applied to c(1:m,i:n) + ni = n - i + 1 + jc = i + end if + ! apply h or h**h + call stdlib_zlarzb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, l, a( i, ja )& + , lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork ) + end do + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunmrz + + !> ZBBCSD: computes the CS decomposition of a unitary matrix in + !> bidiagonal-block form, + !> [ B11 | B12 0 0 ] + !> [ 0 | 0 -I 0 ] + !> X = [----------------] + !> [ B21 | B22 0 0 ] + !> [ 0 | 0 0 I ] + !> [ C | -S 0 0 ] + !> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H + !> = [---------] [---------------] [---------] . + !> [ | U2 ] [ S | C 0 0 ] [ | V2 ] + !> [ 0 | 0 0 I ] + !> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger + !> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be + !> transposed and/or permuted. This can be done in constant time using + !> the TRANS and SIGNS options. See ZUNCSD for details.) + !> The bidiagonal matrices B11, B12, B21, and B22 are represented + !> implicitly by angles THETA(1:Q) and PHI(1:Q-1). + !> The unitary matrices U1, U2, V1T, and V2T are input/output. + !> The input matrices are pre- or post-multiplied by the appropriate + !> singular vector matrices. + + pure subroutine stdlib_zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & + lrwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q + ! Array Arguments + real(dp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& + b22e(*), rwork(*) + real(dp), intent(inout) :: phi(*), theta(*) + complex(dp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) + + ! =================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 6 + real(dp), parameter :: hundred = 100.0_dp + real(dp), parameter :: meighth = -0.125_dp + real(dp), parameter :: piover2 = 1.57079632679489661923132169163975144210_dp + + + + + ! Local Scalars + logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & + wantu2, wantv1t, wantv2t + integer(ilp) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & + iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini + real(dp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & + sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 + ! Intrinsic Functions + intrinsic :: abs,atan2,cos,max,min,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lrwork == -1 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + if( m < 0 ) then + info = -6 + else if( p < 0 .or. p > m ) then + info = -7 + else if( q < 0 .or. q > m ) then + info = -8 + else if( q > p .or. q > m-p .or. q > m-q ) then + info = -8 + else if( wantu1 .and. ldu1 < p ) then + info = -12 + else if( wantu2 .and. ldu2 < m-p ) then + info = -14 + else if( wantv1t .and. ldv1t < q ) then + info = -16 + else if( wantv2t .and. ldv2t < m-q ) then + info = -18 + end if + ! quick return if q = 0 + if( info == 0 .and. q == 0 ) then + lrworkmin = 1 + rwork(1) = lrworkmin + return + end if + ! compute workspace + if( info == 0 ) then + iu1cs = 1 + iu1sn = iu1cs + q + iu2cs = iu1sn + q + iu2sn = iu2cs + q + iv1tcs = iu2sn + q + iv1tsn = iv1tcs + q + iv2tcs = iv1tsn + q + iv2tsn = iv2tcs + q + lrworkopt = iv2tsn + q - 1 + lrworkmin = lrworkopt + rwork(1) = lrworkopt + if( lrwork < lrworkmin .and. .not. lquery ) then + info = -28 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZBBCSD', -info ) + return + else if( lquery ) then + return + end if + ! get machine constants + eps = stdlib_dlamch( 'EPSILON' ) + unfl = stdlib_dlamch( 'SAFE MINIMUM' ) + tolmul = max( ten, min( hundred, eps**meighth ) ) + tol = tolmul*eps + thresh = max( tol, maxitr*q*q*unfl ) + ! test for negligible sines or cosines + do i = 1, q + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = 1, q-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! initial deflation + imax = q + do while( imax > 1 ) + if( phi(imax-1) /= zero ) then + exit + end if + imax = imax - 1 + end do + imin = imax - 1 + if ( imin > 1 ) then + do while( phi(imin-1) /= zero ) + imin = imin - 1 + if ( imin <= 1 ) exit + end do + end if + ! initialize iteration counter + maxit = maxitr*q*q + iter = 0 + ! begin main iteration loop + do while( imax > 1 ) + ! compute the matrix entries + b11d(imin) = cos( theta(imin) ) + b21d(imin) = -sin( theta(imin) ) + do i = imin, imax - 1 + b11e(i) = -sin( theta(i) ) * sin( phi(i) ) + b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) + b12d(i) = sin( theta(i) ) * cos( phi(i) ) + b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) + b21e(i) = -cos( theta(i) ) * sin( phi(i) ) + b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) + b22d(i) = cos( theta(i) ) * cos( phi(i) ) + b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) + end do + b12d(imax) = sin( theta(imax) ) + b22d(imax) = cos( theta(imax) ) + ! abort if not converging; otherwise, increment iter + if( iter > maxit ) then + info = 0 + do i = 1, q + if( phi(i) /= zero )info = info + 1 + end do + return + end if + iter = iter + imax - imin + ! compute shifts + thetamax = theta(imin) + thetamin = theta(imin) + do i = imin+1, imax + if( theta(i) > thetamax )thetamax = theta(i) + if( theta(i) < thetamin )thetamin = theta(i) + end do + if( thetamax > piover2 - thresh ) then + ! zero on diagonals of b11 and b22; induce deflation with a + ! zero shift + mu = zero + nu = one + else if( thetamin < thresh ) then + ! zero on diagonals of b12 and b22; induce deflation with a + ! zero shift + mu = one + nu = zero + else + ! compute shifts for b11 and b21 and use the lesser + call stdlib_dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) + + call stdlib_dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) + + if( sigma11 <= sigma21 ) then + mu = sigma11 + nu = sqrt( one - mu**2 ) + if( mu < thresh ) then + mu = zero + nu = one + end if + else + nu = sigma21 + mu = sqrt( 1.0_dp - nu**2 ) + if( nu < thresh ) then + mu = one + nu = zero + end if + end if + end if + ! rotate to produce bulges in b11 and b21 + if( mu <= nu ) then + call stdlib_dlartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1) ) + else + call stdlib_dlartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1) ) + end if + temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) + b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) + + b11d(imin) = temp + b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) + b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) + temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) + b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) + + b21d(imin) = temp + b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) + b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) + ! compute theta(imin) + theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),sqrt( b11d(imin)**2+& + b11bulge**2 ) ) + ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) + if( b11d(imin)**2+b11bulge**2 > thresh**2 ) then + call stdlib_dlartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& + 1), r ) + else if( mu <= nu ) then + call stdlib_dlartgs( b11e( imin ), b11d( imin + 1 ), mu,rwork(iu1cs+imin-1), & + rwork(iu1sn+imin-1) ) + else + call stdlib_dlartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1) ) + end if + if( b21d(imin)**2+b21bulge**2 > thresh**2 ) then + call stdlib_dlartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& + 1), r ) + else if( nu < mu ) then + call stdlib_dlartgs( b21e( imin ), b21d( imin + 1 ), nu,rwork(iu2cs+imin-1), & + rwork(iu2sn+imin-1) ) + else + call stdlib_dlartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& + imin-1) ) + end if + rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) + rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) + temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) + b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) + + b11e(imin) = temp + if( imax > imin+1 ) then + b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) + b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) + end if + temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) + b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) + b12d(imin) = temp + b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) + b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) + temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) + b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) + + b21e(imin) = temp + if( imax > imin+1 ) then + b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) + b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) + end if + temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) + b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) + b22d(imin) = temp + b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) + b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) + ! inner loop: chase bulges from b11(imin,imin+2), + ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to + ! bottom-right + do i = imin+1, imax-1 + ! compute phi(i-1) + x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) + x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge + y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) + y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge + phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11e(i-1)**2 + b11bulge**2 <= thresh**2 + restart21 = b21e(i-1)**2 + b21bulge**2 <= thresh**2 + restart12 = b12d(i-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), + ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart21 ) then + call stdlib_dlartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) + else if( .not. restart11 .and. restart21 ) then + call stdlib_dlartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& + r ) + else if( restart11 .and. .not. restart21 ) then + call stdlib_dlartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& + r ) + else if( mu <= nu ) then + call stdlib_dlartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& + ) + else + call stdlib_dlartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& + ) + end if + rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) + rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_dlartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& + 1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& + 1-1), r ) + else if( nu < mu ) then + call stdlib_dlartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& + i-1-1) ) + else + call stdlib_dlartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& + i-1-1) ) + end if + temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) + b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) + b11d(i) = temp + b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) + b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) + temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) + b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) + b21d(i) = temp + b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) + b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) + temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) + b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) + b12e(i-1) = temp + b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) + b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) + temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) + b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) + b22e(i-1) = temp + b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) + b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) + ! compute theta(i) + x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) + x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge + y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) + y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge + theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) ) + ! determine if there are bulges to chase or if a new direct + ! summand has been reached + restart11 = b11d(i)**2 + b11bulge**2 <= thresh**2 + restart12 = b12e(i-1)**2 + b12bulge**2 <= thresh**2 + restart21 = b21d(i)**2 + b21bulge**2 <= thresh**2 + restart22 = b22e(i-1)**2 + b22bulge**2 <= thresh**2 + ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), + ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- + ! chasing by applying the original shift again. + if( .not. restart11 .and. .not. restart12 ) then + call stdlib_dlartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) + else if( .not. restart11 .and. restart12 ) then + call stdlib_dlartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) + + else if( restart11 .and. .not. restart12 ) then + call stdlib_dlartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & + r ) + else if( mu <= nu ) then + call stdlib_dlartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& + ) + else + call stdlib_dlartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) + + end if + if( .not. restart21 .and. .not. restart22 ) then + call stdlib_dlartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) + else if( .not. restart21 .and. restart22 ) then + call stdlib_dlartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) + + else if( restart21 .and. .not. restart22 ) then + call stdlib_dlartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & + r ) + else if( nu < mu ) then + call stdlib_dlartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& + ) + else + call stdlib_dlartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) + + end if + rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) + rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) + temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) + b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) + b11e(i) = temp + if( i < imax - 1 ) then + b11bulge = rwork(iu1sn+i-1)*b11e(i+1) + b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) + end if + temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) + b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) + b21e(i) = temp + if( i < imax - 1 ) then + b21bulge = rwork(iu2sn+i-1)*b21e(i+1) + b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) + end if + temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) + b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) + b12d(i) = temp + b12bulge = rwork(iu1sn+i-1)*b12d(i+1) + b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) + temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) + b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) + b22d(i) = temp + b22bulge = rwork(iu2sn+i-1)*b22d(i+1) + b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) + end do + ! compute phi(imax-1) + x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) + y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) + y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge + phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) ) + ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) + restart12 = b12d(imax-1)**2 + b12bulge**2 <= thresh**2 + restart22 = b22d(imax-1)**2 + b22bulge**2 <= thresh**2 + if( .not. restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( y2, y1, rwork(iv2tsn+imax-1-1),rwork(iv2tcs+imax-1-1), r ) + + else if( .not. restart12 .and. restart22 ) then + call stdlib_dlartgp( b12bulge, b12d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& + imax-1-1), r ) + else if( restart12 .and. .not. restart22 ) then + call stdlib_dlartgp( b22bulge, b22d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& + imax-1-1), r ) + else if( nu < mu ) then + call stdlib_dlartgs( b12e(imax-1), b12d(imax), nu,rwork(iv2tcs+imax-1-1),rwork(& + iv2tsn+imax-1-1) ) + else + call stdlib_dlartgs( b22e(imax-1), b22d(imax), mu,rwork(iv2tcs+imax-1-1),rwork(& + iv2tsn+imax-1-1) ) + end if + temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +rwork(iv2tsn+imax-1-1)*b12d(imax) + + b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -rwork(iv2tsn+imax-1-1)*b12e(imax-1) + + b12e(imax-1) = temp + temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +rwork(iv2tsn+imax-1-1)*b22d(imax) + + b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -rwork(iv2tsn+imax-1-1)*b22e(imax-1) + + b22e(imax-1) = temp + ! update singular vectors + if( wantu1 ) then + if( colmajor ) then + call stdlib_zlasr( 'R', 'V', 'F', p, imax-imin+1,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1),u1(1,imin), ldu1 ) + else + call stdlib_zlasr( 'L', 'V', 'F', imax-imin+1, p,rwork(iu1cs+imin-1), rwork(& + iu1sn+imin-1),u1(imin,1), ldu1 ) + end if + end if + if( wantu2 ) then + if( colmajor ) then + call stdlib_zlasr( 'R', 'V', 'F', m-p, imax-imin+1,rwork(iu2cs+imin-1), rwork(& + iu2sn+imin-1),u2(1,imin), ldu2 ) + else + call stdlib_zlasr( 'L', 'V', 'F', imax-imin+1, m-p,rwork(iu2cs+imin-1), rwork(& + iu2sn+imin-1),u2(imin,1), ldu2 ) + end if + end if + if( wantv1t ) then + if( colmajor ) then + call stdlib_zlasr( 'L', 'V', 'F', imax-imin+1, q,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1),v1t(imin,1), ldv1t ) + else + call stdlib_zlasr( 'R', 'V', 'F', q, imax-imin+1,rwork(iv1tcs+imin-1), rwork(& + iv1tsn+imin-1),v1t(1,imin), ldv1t ) + end if + end if + if( wantv2t ) then + if( colmajor ) then + call stdlib_zlasr( 'L', 'V', 'F', imax-imin+1, m-q,rwork(iv2tcs+imin-1), & + rwork(iv2tsn+imin-1),v2t(imin,1), ldv2t ) + else + call stdlib_zlasr( 'R', 'V', 'F', m-q, imax-imin+1,rwork(iv2tcs+imin-1), & + rwork(iv2tsn+imin-1),v2t(1,imin), ldv2t ) + end if + end if + ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) + if( b11e(imax-1)+b21e(imax-1) > 0 ) then + b11d(imax) = -b11d(imax) + b21d(imax) = -b21d(imax) + if( wantv1t ) then + if( colmajor ) then + call stdlib_zscal( q, cnegone, v1t(imax,1), ldv1t ) + else + call stdlib_zscal( q, cnegone, v1t(1,imax), 1 ) + end if + end if + end if + ! compute theta(imax) + x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) + y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) + theta(imax) = atan2( abs(y1), abs(x1) ) + ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), + ! and b22(imax,imax-1) + if( b11d(imax)+b12e(imax-1) < 0 ) then + b12d(imax) = -b12d(imax) + if( wantu1 ) then + if( colmajor ) then + call stdlib_zscal( p, cnegone, u1(1,imax), 1 ) + else + call stdlib_zscal( p, cnegone, u1(imax,1), ldu1 ) + end if + end if + end if + if( b21d(imax)+b22e(imax-1) > 0 ) then + b22d(imax) = -b22d(imax) + if( wantu2 ) then + if( colmajor ) then + call stdlib_zscal( m-p, cnegone, u2(1,imax), 1 ) + else + call stdlib_zscal( m-p, cnegone, u2(imax,1), ldu2 ) + end if + end if + end if + ! fix signs on b12(imax,imax) and b22(imax,imax) + if( b12d(imax)+b22d(imax) < 0 ) then + if( wantv2t ) then + if( colmajor ) then + call stdlib_zscal( m-q, cnegone, v2t(imax,1), ldv2t ) + else + call stdlib_zscal( m-q, cnegone, v2t(1,imax), 1 ) + end if + end if + end if + ! test for negligible sines or cosines + do i = imin, imax + if( theta(i) < thresh ) then + theta(i) = zero + else if( theta(i) > piover2-thresh ) then + theta(i) = piover2 + end if + end do + do i = imin, imax-1 + if( phi(i) < thresh ) then + phi(i) = zero + else if( phi(i) > piover2-thresh ) then + phi(i) = piover2 + end if + end do + ! deflate + if (imax > 1) then + do while( phi(imax-1) == zero ) + imax = imax - 1 + if (imax <= 1) exit + end do + end if + if( imin > imax - 1 )imin = imax - 1 + if (imin > 1) then + do while (phi(imin-1) /= zero) + imin = imin - 1 + if (imin <= 1) exit + end do + end if + ! repeat main iteration loop + end do + ! postprocessing: order theta from least to greatest + do i = 1, q + mini = i + thetamin = theta(i) + do j = i+1, q + if( theta(j) < thetamin ) then + mini = j + thetamin = theta(j) + end if + end do + if( mini /= i ) then + theta(mini) = theta(i) + theta(i) = thetamin + if( colmajor ) then + if( wantu1 )call stdlib_zswap( p, u1(1,i), 1, u1(1,mini), 1 ) + if( wantu2 )call stdlib_zswap( m-p, u2(1,i), 1, u2(1,mini), 1 ) + if( wantv1t )call stdlib_zswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t ) + + if( wantv2t )call stdlib_zswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),ldv2t ) + + else + if( wantu1 )call stdlib_zswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 ) + if( wantu2 )call stdlib_zswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 ) + if( wantv1t )call stdlib_zswap( q, v1t(1,i), 1, v1t(1,mini), 1 ) + if( wantv2t )call stdlib_zswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 ) + end if + end if + end do + return + end subroutine stdlib_zbbcsd + + !> ZBDSQR: computes the singular values and, optionally, the right and/or + !> left singular vectors from the singular value decomposition (SVD) of + !> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + !> zero-shift QR algorithm. The SVD of B has the form + !> B = Q * S * P**H + !> where S is the diagonal matrix of singular values, Q is an orthogonal + !> matrix of left singular vectors, and P is an orthogonal matrix of + !> right singular vectors. If left singular vectors are requested, this + !> subroutine actually returns U*Q instead of Q, and, if right singular + !> vectors are requested, this subroutine returns P**H*VT instead of + !> P**H, for given complex input matrices U and VT. When U and VT are + !> the unitary matrices that reduce a general matrix A to bidiagonal + !> form: A = U*B*VT, as computed by ZGEBRD, then + !> A = (U*Q) * S * (P**H*VT) + !> is the SVD of A. Optionally, the subroutine may also compute Q**H*C + !> for a given complex input matrix C. + !> See "Computing Small Singular Values of Bidiagonal Matrices With + !> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + !> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + !> no. 5, pp. 873-912, Sept 1990) and + !> "Accurate singular values and differential qd algorithms," by + !> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + !> Department, University of California at Berkeley, July 1992 + !> for a detailed description of the algorithm. + + pure subroutine stdlib_zbdsqr( uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u,ldu, c, ldc, rwork,& + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru + ! Array Arguments + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: c(ldc,*), u(ldu,*), vt(ldvt,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: hndrth = 0.01_dp + real(dp), parameter :: hndrd = 100.0_dp + real(dp), parameter :: meigth = -0.125_dp + integer(ilp), parameter :: maxitr = 6 + + + + + + + + + ! Local Scalars + logical(lk) :: lower, rotate + integer(ilp) :: i, idir, isub, iter, j, ll, lll, m, maxit, nm1, nm12, nm13, oldll, & + oldm + real(dp) :: abse, abss, cosl, cosr, cs, eps, f, g, h, mu, oldcs, oldsn, r, shift, & + sigmn, sigmx, sinl, sinr, sll, smax, smin, sminl, sminoa, sn, thresh, tol, tolmul, & + unfl + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sign,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lower = stdlib_lsame( uplo, 'L' ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.lower ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ncvt<0 ) then + info = -3 + else if( nru<0 ) then + info = -4 + else if( ncc<0 ) then + info = -5 + else if( ( ncvt==0 .and. ldvt<1 ) .or.( ncvt>0 .and. ldvt0 .and. ldc0 ) .or. ( nru>0 ) .or. ( ncc>0 ) + ! if no singular vectors desired, use qd algorithm + if( .not.rotate ) then + call stdlib_dlasq1( n, d, e, rwork, info ) + ! if info equals 2, dqds didn't finish, try to finish + if( info /= 2 ) return + info = 0 + end if + nm1 = n - 1 + nm12 = nm1 + nm1 + nm13 = nm12 + nm1 + idir = 0 + ! get machine constants + eps = stdlib_dlamch( 'EPSILON' ) + unfl = stdlib_dlamch( 'SAFE MINIMUM' ) + ! if matrix lower bidiagonal, rotate to be upper bidiagonal + ! by applying givens rotations on the left + if( lower ) then + do i = 1, n - 1 + call stdlib_dlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + rwork( i ) = cs + rwork( nm1+i ) = sn + end do + ! update singular vectors if desired + if( nru>0 )call stdlib_zlasr( 'R', 'V', 'F', nru, n, rwork( 1 ), rwork( n ),u, ldu ) + + if( ncc>0 )call stdlib_zlasr( 'L', 'V', 'F', n, ncc, rwork( 1 ), rwork( n ),c, ldc ) + + end if + ! compute singular values to relative accuracy tol + ! (by setting tol to be negative, algorithm will compute + ! singular values to absolute accuracy abs(tol)*norm(input matrix)) + tolmul = max( ten, min( hndrd, eps**meigth ) ) + tol = tolmul*eps + ! compute approximate maximum, minimum singular values + smax = zero + do i = 1, n + smax = max( smax, abs( d( i ) ) ) + end do + do i = 1, n - 1 + smax = max( smax, abs( e( i ) ) ) + end do + sminl = zero + if( tol>=zero ) then + ! relative accuracy desired + sminoa = abs( d( 1 ) ) + if( sminoa==zero )go to 50 + mu = sminoa + do i = 2, n + mu = abs( d( i ) )*( mu / ( mu+abs( e( i-1 ) ) ) ) + sminoa = min( sminoa, mu ) + if( sminoa==zero )go to 50 + end do + 50 continue + sminoa = sminoa / sqrt( real( n,KIND=dp) ) + thresh = max( tol*sminoa, maxitr*n*n*unfl ) + else + ! absolute accuracy desired + thresh = max( abs( tol )*smax, maxitr*n*n*unfl ) + end if + ! prepare for main iteration loop for the singular values + ! (maxit is the maximum number of passes through the inner + ! loop permitted before nonconvergence signalled.) + maxit = maxitr*n*n + iter = 0 + oldll = -1 + oldm = -1 + ! m points to last element of unconverged part of matrix + m = n + ! begin main iteration loop + 60 continue + ! check for convergence or exceeding iteration count + if( m<=1 )go to 160 + if( iter>maxit )go to 200 + ! find diagonal block of matrix to work on + if( tol0 )call stdlib_zdrot( ncvt, vt( m-1, 1 ), ldvt, vt( m, 1 ), ldvt,cosr, & + sinr ) + if( nru>0 )call stdlib_zdrot( nru, u( 1, m-1 ), 1, u( 1, m ), 1, cosl, sinl ) + + if( ncc>0 )call stdlib_zdrot( ncc, c( m-1, 1 ), ldc, c( m, 1 ), ldc, cosl,sinl ) + + m = m - 2 + go to 60 + end if + ! if working on new submatrix, choose shift direction + ! (from larger end diagonal element towards smaller) + if( ll>oldm .or. m=abs( d( m ) ) ) then + ! chase bulge from top (big end) to bottom (small end) + idir = 1 + else + ! chase bulge from bottom (big end) to top (small end) + idir = 2 + end if + end if + ! apply convergence tests + if( idir==1 ) then + ! run convergence test in forward direction + ! first apply standard test to bottom of matrix + if( abs( e( m-1 ) )<=abs( tol )*abs( d( m ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion forward + mu = abs( d( ll ) ) + sminl = mu + do lll = ll, m - 1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll+1 ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + else + ! run convergence test in backward direction + ! first apply standard test to top of matrix + if( abs( e( ll ) )<=abs( tol )*abs( d( ll ) ) .or.( tol=zero ) then + ! if relative accuracy desired, + ! apply convergence criterion backward + mu = abs( d( m ) ) + sminl = mu + do lll = m - 1, ll, -1 + if( abs( e( lll ) )<=tol*mu ) then + e( lll ) = zero + go to 60 + end if + mu = abs( d( lll ) )*( mu / ( mu+abs( e( lll ) ) ) ) + sminl = min( sminl, mu ) + end do + end if + end if + oldll = ll + oldm = m + ! compute shift. first, test if shifting would ruin relative + ! accuracy, and if so set the shift to zero. + if( tol>=zero .and. n*tol*( sminl / smax )<=max( eps, hndrth*tol ) ) then + ! use a zero shift to avoid loss of relative accuracy + shift = zero + else + ! compute the shift from 2-by-2 block at end of matrix + if( idir==1 ) then + sll = abs( d( ll ) ) + call stdlib_dlas2( d( m-1 ), e( m-1 ), d( m ), shift, r ) + else + sll = abs( d( m ) ) + call stdlib_dlas2( d( ll ), e( ll ), d( ll+1 ), shift, r ) + end if + ! test if shift negligible, and if so set to zero + if( sll>zero ) then + if( ( shift / sll )**2ll )e( i-1 ) = oldsn*r + call stdlib_dlartg( oldcs*r, d( i+1 )*sn, oldcs, oldsn, d( i ) ) + rwork( i-ll+1 ) = cs + rwork( i-ll+1+nm1 ) = sn + rwork( i-ll+1+nm12 ) = oldcs + rwork( i-ll+1+nm13 ) = oldsn + end do + h = d( m )*cs + d( m ) = h*oldcs + e( m-1 ) = h*oldsn + ! update singular vectors + if( ncvt>0 )call stdlib_zlasr( 'L', 'V', 'F', m-ll+1, ncvt, rwork( 1 ),rwork( n )& + , vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_zlasr( 'R', 'V', 'F', nru, m-ll+1, rwork( nm12+1 ),rwork( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_zlasr( 'L', 'V', 'F', m-ll+1, ncc, rwork( nm12+1 ),rwork( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + cs = one + oldcs = one + do i = m, ll + 1, -1 + call stdlib_dlartg( d( i )*cs, e( i-1 ), cs, sn, r ) + if( i0 )call stdlib_zlasr( 'L', 'V', 'B', m-ll+1, ncvt, rwork( nm12+1 ),& + rwork( nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_zlasr( 'R', 'V', 'B', nru, m-ll+1, rwork( 1 ),rwork( n ), & + u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_zlasr( 'L', 'V', 'B', m-ll+1, ncc, rwork( 1 ),rwork( n ), & + c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + end if + else + ! use nonzero shift + if( idir==1 ) then + ! chase bulge from top to bottom + ! save cosines and sines for later singular vector updates + f = ( abs( d( ll ) )-shift )*( sign( one, d( ll ) )+shift / d( ll ) ) + g = e( ll ) + do i = ll, m - 1 + call stdlib_dlartg( f, g, cosr, sinr, r ) + if( i>ll )e( i-1 ) = r + f = cosr*d( i ) + sinr*e( i ) + e( i ) = cosr*e( i ) - sinr*d( i ) + g = sinr*d( i+1 ) + d( i+1 ) = cosr*d( i+1 ) + call stdlib_dlartg( f, g, cosl, sinl, r ) + d( i ) = r + f = cosl*e( i ) + sinl*d( i+1 ) + d( i+1 ) = cosl*d( i+1 ) - sinl*e( i ) + if( i0 )call stdlib_zlasr( 'L', 'V', 'F', m-ll+1, ncvt, rwork( 1 ),rwork( n )& + , vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_zlasr( 'R', 'V', 'F', nru, m-ll+1, rwork( nm12+1 ),rwork( & + nm13+1 ), u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_zlasr( 'L', 'V', 'F', m-ll+1, ncc, rwork( nm12+1 ),rwork( & + nm13+1 ), c( ll, 1 ), ldc ) + ! test convergence + if( abs( e( m-1 ) )<=thresh )e( m-1 ) = zero + else + ! chase bulge from bottom to top + ! save cosines and sines for later singular vector updates + f = ( abs( d( m ) )-shift )*( sign( one, d( m ) )+shift /d( m ) ) + g = e( m-1 ) + do i = m, ll + 1, -1 + call stdlib_dlartg( f, g, cosr, sinr, r ) + if( ill+1 ) then + g = sinl*e( i-2 ) + e( i-2 ) = cosl*e( i-2 ) + end if + rwork( i-ll ) = cosr + rwork( i-ll+nm1 ) = -sinr + rwork( i-ll+nm12 ) = cosl + rwork( i-ll+nm13 ) = -sinl + end do + e( ll ) = f + ! test convergence + if( abs( e( ll ) )<=thresh )e( ll ) = zero + ! update singular vectors if desired + if( ncvt>0 )call stdlib_zlasr( 'L', 'V', 'B', m-ll+1, ncvt, rwork( nm12+1 ),& + rwork( nm13+1 ), vt( ll, 1 ), ldvt ) + if( nru>0 )call stdlib_zlasr( 'R', 'V', 'B', nru, m-ll+1, rwork( 1 ),rwork( n ), & + u( 1, ll ), ldu ) + if( ncc>0 )call stdlib_zlasr( 'L', 'V', 'B', m-ll+1, ncc, rwork( 1 ),rwork( n ), & + c( ll, 1 ), ldc ) + end if + end if + ! qr iteration finished, go back and check convergence + go to 60 + ! all singular values converged, so make them positive + 160 continue + do i = 1, n + if( d( i )0 )call stdlib_zdscal( ncvt, negone, vt( i, 1 ), ldvt ) + end if + end do + ! sort the singular values into decreasing order (insertion sort on + ! singular values, but only one transposition per singular vector) + do i = 1, n - 1 + ! scan for smallest d(i) + isub = 1 + smin = d( 1 ) + do j = 2, n + 1 - i + if( d( j )<=smin ) then + isub = j + smin = d( j ) + end if + end do + if( isub/=n+1-i ) then + ! swap singular values and vectors + d( isub ) = d( n+1-i ) + d( n+1-i ) = smin + if( ncvt>0 )call stdlib_zswap( ncvt, vt( isub, 1 ), ldvt, vt( n+1-i, 1 ),ldvt ) + + if( nru>0 )call stdlib_zswap( nru, u( 1, isub ), 1, u( 1, n+1-i ), 1 ) + if( ncc>0 )call stdlib_zswap( ncc, c( isub, 1 ), ldc, c( n+1-i, 1 ), ldc ) + + end if + end do + go to 220 + ! maximum number of iterations exceeded, failure to converge + 200 continue + info = 0 + do i = 1, n - 1 + if( e( i )/=zero )info = info + 1 + end do + 220 continue + return + end subroutine stdlib_zbdsqr + + !> ZGBCON: estimates the reciprocal of the condition number of a complex + !> general band matrix A, in either the 1-norm or the infinity-norm, + !> using the LU factorization computed by ZGBTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_zgbcon( norm, n, kl, ku, ab, ldab, ipiv, anorm, rcond,work, rwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, onenrm + character :: normin + integer(ilp) :: ix, j, jp, kase, kase1, kd, lm + real(dp) :: ainvnm, scale, smlnum + complex(dp) :: t, zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( anorm0 + kase = 0 + 10 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(l). + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + jp = ipiv( j ) + t = work( jp ) + if( jp/=j ) then + work( jp ) = work( j ) + work( j ) = t + end if + call stdlib_zaxpy( lm, -t, ab( kd+1, j ), 1, work( j+1 ), 1 ) + end do + end if + ! multiply by inv(u). + call stdlib_zlatbs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', normin, n,kl+ku, ab, & + ldab, work, scale, rwork, info ) + else + ! multiply by inv(u**h). + call stdlib_zlatbs( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',normin, n, kl+ku, & + ab, ldab, work, scale, rwork,info ) + ! multiply by inv(l**h). + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + work( j ) = work( j ) - stdlib_zdotc( lm, ab( kd+1, j ), 1,work( j+1 ), 1 ) + + jp = ipiv( j ) + if( jp/=j ) then + t = work( jp ) + work( jp ) = work( j ) + work( j ) = t + end if + end do + end if + end if + ! divide x by 1/scale if doing so will not cause overflow. + normin = 'Y' + if( scale/=one ) then + ix = stdlib_izamax( n, work, 1 ) + if( scale ZGBTRF: computes an LU factorization of a complex m-by-n band matrix A + !> using partial pivoting with row interchanges. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zgbtrf( m, n, kl, ku, ab, ldab, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldwork = nbmax+1 + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp, ju, k2, km, kv, nb, & + nw + complex(dp) :: temp + ! Local Arrays + complex(dp) :: work13(ldwork,nbmax), work31(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! kv is the number of superdiagonals in the factor u, allowing for + ! fill-in + kv = ku + kl + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( ldabkl ) then + ! use unblocked code + call stdlib_zgbtf2( m, n, kl, ku, ab, ldab, ipiv, info ) + else + ! use blocked code + ! czero the superdiagonal elements of the work array work13 + do j = 1, nb + do i = 1, j - 1 + work13( i, j ) = czero + end do + end do + ! czero the subdiagonal elements of the work array work31 + do j = 1, nb + do i = j + 1, nb + work31( i, j ) = czero + end do + end do + ! gaussian elimination with partial pivoting + ! set fill-in elements in columns ku+2 to kv to czero + do j = ku + 2, min( kv, n ) + do i = kv - j + 2, kl + ab( i, j ) = czero + end do + end do + ! ju is the index of the last column affected by the current + ! stage of the factorization + ju = 1 + loop_180: do j = 1, min( m, n ), nb + jb = min( nb, min( m, n )-j+1 ) + ! the active part of the matrix is partitioned + ! a11 a12 a13 + ! a21 a22 a23 + ! a31 a32 a33 + ! here a11, a21 and a31 denote the current block of jb columns + ! which is about to be factorized. the number of rows in the + ! partitioning are jb, i2, i3 respectively, and the numbers + ! of columns are jb, j2, j3. the superdiagonal elements of a13 + ! and the subdiagonal elements of a31 lie outside the band. + i2 = min( kl-jb, m-j-jb+1 ) + i3 = min( jb, m-j-kl+1 ) + ! j2 and j3 are computed after ju has been updated. + ! factorize the current block of jb columns + loop_80: do jj = j, j + jb - 1 + ! set fill-in elements in column jj+kv to czero + if( jj+kv<=n ) then + do i = 1, kl + ab( i, jj+kv ) = czero + end do + end if + ! find pivot and test for singularity. km is the number of + ! subdiagonal elements in the current column. + km = min( kl, m-jj ) + jp = stdlib_izamax( km+1, ab( kv+1, jj ), 1 ) + ipiv( jj ) = jp + jj - j + if( ab( kv+jp, jj )/=czero ) then + ju = max( ju, min( jj+ku+jp-1, n ) ) + if( jp/=1 ) then + ! apply interchange to columns j to j+jb-1 + if( jp+jj-1jj )call stdlib_zgeru( km, jm-jj, -cone, ab( kv+2, jj ), 1,ab( kv, & + jj+1 ), ldab-1,ab( kv+1, jj+1 ), ldab-1 ) + else + ! if pivot is czero, set info to the index of the pivot + ! unless a czero pivot has already been found. + if( info==0 )info = jj + end if + ! copy current column of a31 into the work array work31 + nw = min( jj-j+1, i3 ) + if( nw>0 )call stdlib_zcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,work31( 1, jj-j+1 )& + , 1 ) + end do loop_80 + if( j+jb<=n ) then + ! apply the row interchanges to the other blocks. + j2 = min( ju-j+1, kv ) - jb + j3 = max( 0, ju-j-kv+1 ) + ! use stdlib_zlaswp to apply the row interchanges to a12, a22, and + ! a32. + call stdlib_zlaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,ipiv( j ), 1 ) + + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + ! apply the row interchanges to a13, a23, and a33 + ! columnwise. + k2 = j - 1 + jb + j2 + do i = 1, j3 + jj = k2 + i + do ii = j + i - 1, j + jb - 1 + ip = ipiv( ii ) + if( ip/=ii ) then + temp = ab( kv+1+ii-jj, jj ) + ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj ) + ab( kv+1+ip-jj, jj ) = temp + end if + end do + end do + ! update the relevant part of the trailing submatrix + if( j2>0 ) then + ! update a12 + call stdlib_ztrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j2, cone, & + ab( kv+1, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1 ) + if( i2>0 ) then + ! update a22 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j2,jb, -cone, ab(& + kv+1+jb, j ), ldab-1,ab( kv+1-jb, j+jb ), ldab-1, cone,ab( kv+1, j+jb )& + , ldab-1 ) + end if + if( i3>0 ) then + ! update a32 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j2,jb, -cone, & + work31, ldwork,ab( kv+1-jb, j+jb ), ldab-1, cone,ab( kv+kl+1-jb, j+jb ),& + ldab-1 ) + end if + end if + if( j3>0 ) then + ! copy the lower triangle of a13 into the work array + ! work13 + do jj = 1, j3 + do ii = jj, jb + work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 ) + end do + end do + ! update a13 in the work array + call stdlib_ztrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT',jb, j3, cone, & + ab( kv+1, j ), ldab-1,work13, ldwork ) + if( i2>0 ) then + ! update a23 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i2, j3,jb, -cone, ab(& + kv+1+jb, j ), ldab-1,work13, ldwork, cone, ab( 1+jb, j+kv ),ldab-1 ) + + end if + if( i3>0 ) then + ! update a33 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', i3, j3,jb, -cone, & + work31, ldwork, work13,ldwork, cone, ab( 1+kl, j+kv ), ldab-1 ) + end if + ! copy the lower triangle of a13 back into place + do jj = 1, j3 + do ii = jj, jb + ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj ) + end do + end do + end if + else + ! adjust the pivot indices. + do i = j, j + jb - 1 + ipiv( i ) = ipiv( i ) + j - 1 + end do + end if + ! partially undo the interchanges in the current block to + ! restore the upper triangular form of a31 and copy the upper + ! triangle of a31 back into place + do jj = j + jb - 1, j, -1 + jp = ipiv( jj ) - jj + 1 + if( jp/=1 ) then + ! apply interchange to columns j to jj-1 + if( jp+jj-10 )call stdlib_zcopy( nw, work31( 1, jj-j+1 ), 1,ab( kv+kl+1-jj+j, jj )& + , 1 ) + end do + end do loop_180 + end if + return + end subroutine stdlib_zgbtrf + + !> ZGBTRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general band matrix A using the LU factorization computed + !> by ZGBTRF. + + pure subroutine stdlib_zgbtrs( trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lnoti, notran + integer(ilp) :: i, j, kd, l, lm + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab<( 2*kl+ku+1 ) ) then + info = -7 + else if( ldb0 + if( notran ) then + ! solve a*x = b. + ! solve l*x = b, overwriting b with x. + ! l is represented as a product of permutations and unit lower + ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1), + ! where each transformation l(i) is a rank-cone modification of + ! the identity matrix. + if( lnoti ) then + do j = 1, n - 1 + lm = min( kl, n-j ) + l = ipiv( j ) + if( l/=j )call stdlib_zswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + call stdlib_zgeru( lm, nrhs, -cone, ab( kd+1, j ), 1, b( j, 1 ),ldb, b( j+1, & + 1 ), ldb ) + end do + end if + do i = 1, nrhs + ! solve u*x = b, overwriting b with x. + call stdlib_ztbsv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, kl+ku,ab, ldab, b( 1, & + i ), 1 ) + end do + else if( stdlib_lsame( trans, 'T' ) ) then + ! solve a**t * x = b. + do i = 1, nrhs + ! solve u**t * x = b, overwriting b with x. + call stdlib_ztbsv( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, kl+ku, ab,ldab, b( 1, i )& + , 1 ) + end do + ! solve l**t * x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_zgemv( 'TRANSPOSE', lm, nrhs, -cone, b( j+1, 1 ),ldb, ab( kd+1, j & + ), 1, cone, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_zswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + else + ! solve a**h * x = b. + do i = 1, nrhs + ! solve u**h * x = b, overwriting b with x. + call stdlib_ztbsv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT', n,kl+ku, ab, ldab,& + b( 1, i ), 1 ) + end do + ! solve l**h * x = b, overwriting b with x. + if( lnoti ) then + do j = n - 1, 1, -1 + lm = min( kl, n-j ) + call stdlib_zlacgv( nrhs, b( j, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', lm, nrhs, -cone,b( j+1, 1 ), ldb, & + ab( kd+1, j ), 1, cone,b( j, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( j, 1 ), ldb ) + l = ipiv( j ) + if( l/=j )call stdlib_zswap( nrhs, b( l, 1 ), ldb, b( j, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_zgbtrs + + !> ZGEBD2: reduces a complex general m by n matrix A to upper or lower + !> real bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_zgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! reduce to upper bidiagonal form + do i = 1, n + ! generate elementary reflector h(i) to annihilate a(i+1:m,i) + alpha = a( i, i ) + call stdlib_zlarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1,tauq( i ) ) + d( i ) = real( alpha,KIND=dp) + a( i, i ) = cone + ! apply h(i)**h to a(i:m,i+1:n) from the left + if( i ZGECON: estimates the reciprocal of the condition number of a general + !> complex matrix A, in either the 1-norm or the infinity-norm, using + !> the LU factorization computed by ZGETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + pure subroutine stdlib_zgecon( norm, n, a, lda, anorm, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, scale, sl, smlnum, su + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEHD2: reduces a complex general matrix A to upper Hessenberg form H + !> by a unitary similarity transformation: Q**H * A * Q = H . + + pure subroutine stdlib_zgehd2( n, ilo, ihi, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda ZGELQ2: computes an LQ factorization of a complex m-by-n matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a n-by-n orthogonal matrix; + !> L is a lower-triangular m-by-m matrix; + !> 0 is a m-by-(n-m) zero matrix, if m < n. + + pure subroutine stdlib_zgelq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGELQF: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_zgelqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 ) + lwkopt = m*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb ZGELQT3: recursively computes a LQ factorization of a complex M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_zgelqt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, m1, m2, iinfo + ! Executable Statements + info = 0 + if( m < 0 ) then + info = -1 + else if( n < m ) then + info = -2 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, m ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZGELQT3', -info ) + return + end if + if( m==1 ) then + ! compute householder transform when m=1 + call stdlib_zlarfg( n, a(1,1), a( 1, min( 2, n ) ), lda, t(1,1) ) + t(1,1)=conjg(t(1,1)) + else + ! otherwise, split a into blocks... + m1 = m/2 + m2 = m-m1 + i1 = min( m1+1, m ) + j1 = min( m+1, n ) + ! compute a(1:m1,1:n) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_zgelqt3( m1, n, a, lda, t, ldt, iinfo ) + ! compute a(j1:m,1:n) = a(j1:m,1:n) q1^h [workspace: t(1:n1,j1:n)] + do i=1,m2 + do j=1,m1 + t( i+m1, j ) = a( i+m1, j ) + end do + end do + call stdlib_ztrmm( 'R', 'U', 'C', 'U', m2, m1, cone,a, lda, t( i1, 1 ), ldt ) + + call stdlib_zgemm( 'N', 'C', m2, m1, n-m1, cone, a( i1, i1 ), lda,a( 1, i1 ), lda, & + cone, t( i1, 1 ), ldt) + call stdlib_ztrmm( 'R', 'U', 'N', 'N', m2, m1, cone,t, ldt, t( i1, 1 ), ldt ) + + call stdlib_zgemm( 'N', 'N', m2, n-m1, m1, -cone, t( i1, 1 ), ldt,a( 1, i1 ), lda, & + cone, a( i1, i1 ), lda ) + call stdlib_ztrmm( 'R', 'U', 'N', 'U', m2, m1 , cone,a, lda, t( i1, 1 ), ldt ) + + do i=1,m2 + do j=1,m1 + a( i+m1, j ) = a( i+m1, j ) - t( i+m1, j ) + t( i+m1, j )= czero + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_zgelqt3( m2, n-m1, a( i1, i1 ), lda,t( i1, i1 ), ldt, iinfo ) + ! compute t3 = t(j1:n1,1:n) = -t1 y1^h y2 t2 + do i=1,m2 + do j=1,m1 + t( j, i+m1 ) = (a( j, i+m1 )) + end do + end do + call stdlib_ztrmm( 'R', 'U', 'C', 'U', m1, m2, cone,a( i1, i1 ), lda, t( 1, i1 ), & + ldt ) + call stdlib_zgemm( 'N', 'C', m1, m2, n-m, cone, a( 1, j1 ), lda,a( i1, j1 ), lda, & + cone, t( 1, i1 ), ldt ) + call stdlib_ztrmm( 'L', 'U', 'N', 'N', m1, m2, -cone, t, ldt,t( 1, i1 ), ldt ) + + call stdlib_ztrmm( 'R', 'U', 'N', 'N', m1, m2, cone,t( i1, i1 ), ldt, t( 1, i1 ), & + ldt ) + ! y = (y1,y2); l = [ l1 0 ]; t = [t1 t3] + ! [ a(1:n1,j1:n) l2 ] [ 0 t2] + end if + return + end subroutine stdlib_zgelqt3 + + !> ZGEMLQT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex unitary matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by ZGELQT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_zgemlqt( side, trans, m, n, k, mb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, mb, ldt + ! Array Arguments + complex(dp), intent(in) :: v(ldv,*), t(ldt,*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( mb<1 .or. (mb>k .and. k>0)) then + info = -6 + else if( ldv ZGEMQRT: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q C C Q + !> TRANS = 'C': Q**H C C Q**H + !> where Q is a complex orthogonal matrix defined as the product of K + !> elementary reflectors: + !> Q = H(1) H(2) . . . H(K) = I - V T V**H + !> generated using the compact WY representation as returned by ZGEQRT. + !> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. + + pure subroutine stdlib_zgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, ldc, m, n, nb, ldt + ! Array Arguments + complex(dp), intent(in) :: v(ldv,*), t(ldt,*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, ldwork, kf, q + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if( left ) then + ldwork = max( 1, n ) + q = m + else if ( right ) then + ldwork = max( 1, m ) + q = n + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>q ) then + info = -5 + else if( nb<1 .or. (nb>k .and. k>0)) then + info = -6 + else if( ldv ZGEQL2: computes a QL factorization of a complex m by n matrix A: + !> A = Q * L. + + pure subroutine stdlib_zgeql2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEQLF: computes a QL factorization of a complex M-by-N matrix A: + !> A = Q * L. + + pure subroutine stdlib_zgeqlf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarft( 'BACKWARD', 'COLUMNWISE', m-k+i+ib-1, ib,a( 1, n-k+i ), & + lda, tau( i ), work, ldwork ) + ! apply h**h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left + call stdlib_zlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'BACKWARD','COLUMNWISE', m-& + k+i+ib-1, n-k+i-1, ib,a( 1, n-k+i ), lda, work, ldwork, a, lda,work( ib+1 ), & + ldwork ) + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_zgeql2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_zgeqlf + + !> ZGEQR2: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + pure subroutine stdlib_zgeqr2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEQR2P: computes a QR factorization of a complex m-by-n matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a m-by-m orthogonal matrix; + !> R is an upper-triangular n-by-n matrix with nonnegative diagonal + !> entries; + !> 0 is a (m-n)-by-n zero matrix, if m > n. + + subroutine stdlib_zgeqr2p( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: conjg,max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGEQRF: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_zgeqrf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + k = min( m, n ) + info = 0 + nb = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb ZGEQR2P computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix with nonnegative diagonal + !> entries; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + subroutine stdlib_zgeqrfp( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nb = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=nbmin .and. nb ZGEQRT2: computes a QR factorization of a complex M-by-N matrix A, + !> using the compact WY representation of Q. + + pure subroutine stdlib_zgeqrt2( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(dp) :: aii, alpha + ! Executable Statements + ! test the input arguments + info = 0 + if( n<0 ) then + info = -2 + else if( m t(i,1) + call stdlib_zlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1,t( i, 1 ) ) + if( i ZGEQRT3: recursively computes a QR factorization of a complex M-by-N + !> matrix A, using the compact WY representation of Q. + !> Based on the algorithm of Elmroth and Gustavson, + !> IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + pure recursive subroutine stdlib_zgeqrt3( m, n, a, lda, t, ldt, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, ldt + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, j, j1, n1, n2, iinfo + ! Executable Statements + info = 0 + if( n < 0 ) then + info = -2 + else if( m < n ) then + info = -1 + else if( lda < max( 1, m ) ) then + info = -4 + else if( ldt < max( 1, n ) ) then + info = -6 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZGEQRT3', -info ) + return + end if + if( n==1 ) then + ! compute householder transform when n=1 + call stdlib_zlarfg( m, a(1,1), a( min( 2, m ), 1 ), 1, t(1,1) ) + else + ! otherwise, split a into blocks... + n1 = n/2 + n2 = n-n1 + j1 = min( n1+1, n ) + i1 = min( n+1, m ) + ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h + call stdlib_zgeqrt3( m, n1, a, lda, t, ldt, iinfo ) + ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)] + do j=1,n2 + do i=1,n1 + t( i, j+n1 ) = a( i, j+n1 ) + end do + end do + call stdlib_ztrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1, j1 ), ldt ) + + call stdlib_zgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1 ), lda,a( j1, j1 ), lda, & + cone, t( 1, j1 ), ldt) + call stdlib_ztrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1, j1 ), ldt ) + + call stdlib_zgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1 ), lda,t( 1, j1 ), ldt, & + cone, a( j1, j1 ), lda ) + call stdlib_ztrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1, j1 ), ldt ) + + do j=1,n2 + do i=1,n1 + a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 ) + end do + end do + ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h + call stdlib_zgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo ) + ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2 + do i=1,n1 + do j=1,n2 + t( i, j+n1 ) = conjg(a( j+n1, i )) + end do + end do + call stdlib_ztrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1, j1 ), & + ldt ) + call stdlib_zgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1 ), lda,a( i1, j1 ), lda, & + cone, t( 1, j1 ), ldt ) + call stdlib_ztrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1, j1 ), ldt ) + + call stdlib_ztrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1, j1 ), & + ldt ) + ! y = (y1,y2); r = [ r1 a(1:n1,j1:n) ]; t = [t1 t3] + ! [ 0 r2 ] [ 0 t2] + end if + return + end subroutine stdlib_zgeqrt3 + + !> ZGERQ2: computes an RQ factorization of a complex m by n matrix A: + !> A = R * Q. + + pure subroutine stdlib_zgerq2( m, n, a, lda, tau, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, k + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZGERQF: computes an RQ factorization of a complex M-by-N matrix A: + !> A = R * Q. + + pure subroutine stdlib_zgerqf( m, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, & + nx + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. lwork1 .and. nb=nbmin .and. nb1 ) then + ! form the triangular factor of the block reflector + ! h = h(i+ib-1) . . . h(i+1) h(i) + call stdlib_zlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1 ), lda, & + tau( i ), work, ldwork ) + ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right + call stdlib_zlarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-& + k+i+ib-1, ib,a( m-k+i, 1 ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork ) + + end if + end do + mu = m - k + i + nb - 1 + nu = n - k + i + nb - 1 + else + mu = m + nu = n + end if + ! use unblocked code to factor the last or only block + if( mu>0 .and. nu>0 )call stdlib_zgerq2( mu, nu, a, lda, tau, work, iinfo ) + work( 1 ) = iws + return + end subroutine stdlib_zgerqf + + !> ZGESC2: solves a system of linear equations + !> A * X = scale* RHS + !> with a general N-by-N matrix A using the LU factorization with + !> complete pivoting computed by ZGETC2. + + pure subroutine stdlib_zgesc2( n, a, lda, rhs, ipiv, jpiv, scale ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: scale + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: rhs(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: bignum, eps, smlnum + complex(dp) :: temp + ! Intrinsic Functions + intrinsic :: abs,real,cmplx + ! Executable Statements + ! set constant to control overflow + eps = stdlib_dlamch( 'P' ) + smlnum = stdlib_dlamch( 'S' ) / eps + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! apply permutations ipiv to rhs + call stdlib_zlaswp( 1, rhs, lda, 1, n-1, ipiv, 1 ) + ! solve for l part + do i = 1, n - 1 + do j = i + 1, n + rhs( j ) = rhs( j ) - a( j, i )*rhs( i ) + end do + end do + ! solve for u part + scale = one + ! check for scaling + i = stdlib_izamax( n, rhs, 1 ) + if( two*smlnum*abs( rhs( i ) )>abs( a( n, n ) ) ) then + temp = cmplx( one / two, zero,KIND=dp) / abs( rhs( i ) ) + call stdlib_zscal( n, temp, rhs( 1 ), 1 ) + scale = scale*real( temp,KIND=dp) + end if + do i = n, 1, -1 + temp = cmplx( one, zero,KIND=dp) / a( i, i ) + rhs( i ) = rhs( i )*temp + do j = i + 1, n + rhs( i ) = rhs( i ) - rhs( j )*( a( i, j )*temp ) + end do + end do + ! apply permutations jpiv to the solution (rhs) + call stdlib_zlaswp( 1, rhs, lda, 1, n-1, jpiv, -1 ) + return + end subroutine stdlib_zgesc2 + + !> ZGETRF2: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the recursive version of the algorithm. It divides + !> the matrix into four submatrices: + !> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 + !> A = [ -----|----- ] with n1 = min(m,n)/2 + !> [ A21 | A22 ] n2 = n-n1 + !> [ A11 ] + !> The subroutine calls itself to factor [ --- ], + !> [ A12 ] + !> [ A12 ] + !> do the swaps on [ --- ], solve A12, update A22, + !> [ A22 ] + !> then calls itself to factor A22 and do the swaps on A21. + + pure recursive subroutine stdlib_zgetrf2( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + real(dp) :: sfmin + complex(dp) :: temp + integer(ilp) :: i, iinfo, n1, n2 + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda= sfmin ) then + call stdlib_zscal( m-1, cone / a( 1, 1 ), a( 2, 1 ), 1 ) + else + do i = 1, m-1 + a( 1+i, 1 ) = a( 1+i, 1 ) / a( 1, 1 ) + end do + end if + else + info = 1 + end if + else + ! use recursive code + n1 = min( m, n ) / 2 + n2 = n-n1 + ! [ a11 ] + ! factor [ --- ] + ! [ a21 ] + call stdlib_zgetrf2( m, n1, a, lda, ipiv, iinfo ) + if ( info==0 .and. iinfo>0 )info = iinfo + ! [ a12 ] + ! apply interchanges to [ --- ] + ! [ a22 ] + call stdlib_zlaswp( n2, a( 1, n1+1 ), lda, 1, n1, ipiv, 1 ) + ! solve a12 + call stdlib_ztrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1, n1+1 ), lda ) + + ! update a22 + call stdlib_zgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1 ), lda,a( 1, n1+1 ), & + lda, cone, a( n1+1, n1+1 ), lda ) + ! factor a22 + call stdlib_zgetrf2( m-n1, n2, a( n1+1, n1+1 ), lda, ipiv( n1+1 ),iinfo ) + ! adjust info and the pivot indices + if ( info==0 .and. iinfo>0 )info = iinfo + n1 + do i = n1+1, min( m, n ) + ipiv( i ) = ipiv( i ) + n1 + end do + ! apply interchanges to a21 + call stdlib_zlaswp( n1, a( 1, 1 ), lda, n1+1, min( m, n), ipiv, 1 ) + end if + return + end subroutine stdlib_zgetrf2 + + !> ZGETRI: computes the inverse of a matrix using the LU factorization + !> computed by ZGETRF. + !> This method inverts U and then computes inv(A) by solving the system + !> inv(A)*L = inv(U) for inv(A). + + pure subroutine stdlib_zgetri( n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iws, j, jb, jj, jp, ldwork, lwkopt, nb, nbmin, nn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'ZGETRI', ' ', n, -1, -1, -1 ) + lwkopt = n*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( lda 0 from stdlib_ztrtri, then u is singular, + ! and the inverse is not computed. + call stdlib_ztrtri( 'UPPER', 'NON-UNIT', n, a, lda, info ) + if( info>0 )return + nbmin = 2 + ldwork = n + if( nb>1 .and. nb=n ) then + ! use unblocked code. + do j = n, 1, -1 + ! copy current column of l to work and replace with zeros. + do i = j + 1, n + work( i ) = a( i, j ) + a( i, j ) = czero + end do + ! compute current column of inv(a). + if( j ZGETRS: solves a system of linear equations + !> A * X = B, A**T * X = B, or A**H * X = B + !> with a general N-by-N matrix A using the LU factorization computed + !> by ZGETRF. + + pure subroutine stdlib_zgetrs( trans, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notran + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZGGHRD: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then ZGGHRD reduces the original + !> problem to generalized Hessenberg form. + + pure subroutine stdlib_zgghrd( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: ilq, ilz + integer(ilp) :: icompq, icompz, jcol, jrow + real(dp) :: c + complex(dp) :: ctemp, s + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! decode compq + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + icompq = 0 + end if + ! decode compz + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + icompz = 0 + end if + ! test the input parameters. + info = 0 + if( icompq<=0 ) then + info = -1 + else if( icompz<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi ZGGQRF: computes a generalized QR factorization of an N-by-M matrix A + !> and an N-by-P matrix B: + !> A = Q*R, B = Q*T*Z, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, + !> and R and T assume one of the forms: + !> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, + !> ( 0 ) N-M N M-N + !> M + !> where R11 is upper triangular, and + !> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, + !> P-N N ( T21 ) P + !> P + !> where T12 or T21 is upper triangular. + !> In particular, if B is square and nonsingular, the GQR factorization + !> of A and B implicitly gives the QR factorization of inv(B)*A: + !> inv(B)*A = Z**H * (inv(T)*R) + !> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the + !> conjugate transpose of matrix Z. + + pure subroutine stdlib_zggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'ZGEQRF', ' ', n, m, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'ZGERQF', ' ', n, p, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'ZUNMQR', ' ', n, m, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( p<0 ) then + info = -3 + else if( lda ZGGRQF: computes a generalized RQ factorization of an M-by-N matrix A + !> and a P-by-N matrix B: + !> A = R*Q, B = Z*T*Q, + !> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary + !> matrix, and R and T assume one of the forms: + !> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, + !> N-M M ( R21 ) N + !> N + !> where R12 or R21 is upper triangular, and + !> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, + !> ( 0 ) P-N P N-P + !> N + !> where T11 is upper triangular. + !> In particular, if B is square and nonsingular, the GRQ factorization + !> of A and B implicitly gives the RQ factorization of A*inv(B): + !> A*inv(B) = (R*inv(T))*Z**H + !> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the + !> conjugate transpose of the matrix Z. + + pure subroutine stdlib_zggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: taua(*), taub(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkopt, nb, nb1, nb2, nb3 + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb1 = stdlib_ilaenv( 1, 'ZGERQF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'ZGEQRF', ' ', p, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'ZUNMRQ', ' ', m, n, p, -1 ) + nb = max( nb1, nb2, nb3 ) + lwkopt = max( n, m, p )*nb + work( 1 ) = lwkopt + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( p<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda ZGTTRS: solves one of the systems of equations + !> A * X = B, A**T * X = B, or A**H * X = B, + !> with a tridiagonal matrix A using the LU factorization computed + !> by ZGTTRF. + + pure subroutine stdlib_zgttrs( trans, n, nrhs, dl, d, du, du2, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: d(*), dl(*), du(*), du2(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: itrans, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + notran = ( trans=='N' .or. trans=='N' ) + if( .not.notran .and. .not.( trans=='T' .or. trans=='T' ) .and. .not.( trans=='C' .or. & + trans=='C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_zgtts2( itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_zgtts2( itrans, n, jb, dl, d, du, du2, ipiv, b( 1, j ),ldb ) + end do + end if + end subroutine stdlib_zgttrs + + !> ZHB2ST_KERNELS: is an internal routine used by the ZHETRD_HB2ST + !> subroutine. + + pure subroutine stdlib_zhb2st_kernels( uplo, wantz, ttype,st, ed, sweep, n, nb, ib,a, lda, & + v, tau, ldvt, work) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: wantz + integer(ilp), intent(in) :: ttype, st, ed, sweep, n, nb, ib, lda, ldvt + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: v(*), tau(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, j1, j2, lm, ln, vpos, taupos, dpos, ofdpos, ajeter + complex(dp) :: ctmp + ! Intrinsic Functions + intrinsic :: conjg,mod + ! Executable Statements + ajeter = ib + ldvt + upper = stdlib_lsame( uplo, 'U' ) + if( upper ) then + dpos = 2 * nb + 1 + ofdpos = 2 * nb + else + dpos = 1 + ofdpos = 2 + endif + ! upper case + if( upper ) then + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + else + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + endif + if( ttype==1 ) then + lm = ed - st + 1 + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = conjg( a( ofdpos-i, st+i ) ) + a( ofdpos-i, st+i ) = czero + end do + ctmp = conjg( a( ofdpos, st ) ) + call stdlib_zlarfg( lm, ctmp, v( vpos+1 ), 1,tau( taupos ) ) + a( ofdpos, st ) = ctmp + lm = ed - st + 1 + call stdlib_zlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==3 ) then + lm = ed - st + 1 + call stdlib_zlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==2 ) then + j1 = ed+1 + j2 = min( ed+nb, n ) + ln = ed-st+1 + lm = j2-j1+1 + if( lm>0) then + call stdlib_zlarfx( 'LEFT', ln, lm, v( vpos ),conjg( tau( taupos ) ),a( & + dpos-nb, j1 ), lda-1, work) + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + else + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + endif + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) =conjg( a( dpos-nb-i, j1+i ) ) + a( dpos-nb-i, j1+i ) = czero + end do + ctmp = conjg( a( dpos-nb, j1 ) ) + call stdlib_zlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) ) + a( dpos-nb, j1 ) = ctmp + call stdlib_zlarfx( 'RIGHT', ln-1, lm, v( vpos ),tau( taupos ),a( dpos-nb+& + 1, j1 ), lda-1, work) + endif + endif + ! lower case + else + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + else + vpos = mod( sweep-1, 2 ) * n + st + taupos = mod( sweep-1, 2 ) * n + st + endif + if( ttype==1 ) then + lm = ed - st + 1 + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = a( ofdpos+i, st-1 ) + a( ofdpos+i, st-1 ) = czero + end do + call stdlib_zlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,tau( taupos ) ) + + lm = ed - st + 1 + call stdlib_zlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==3 ) then + lm = ed - st + 1 + call stdlib_zlarfy( uplo, lm, v( vpos ), 1,conjg( tau( taupos ) ),a( dpos, st )& + , lda-1, work) + endif + if( ttype==2 ) then + j1 = ed+1 + j2 = min( ed+nb, n ) + ln = ed-st+1 + lm = j2-j1+1 + if( lm>0) then + call stdlib_zlarfx( 'RIGHT', lm, ln, v( vpos ),tau( taupos ), a( dpos+nb, & + st ),lda-1, work) + if( wantz ) then + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + else + vpos = mod( sweep-1, 2 ) * n + j1 + taupos = mod( sweep-1, 2 ) * n + j1 + endif + v( vpos ) = cone + do i = 1, lm-1 + v( vpos+i ) = a( dpos+nb+i, st ) + a( dpos+nb+i, st ) = czero + end do + call stdlib_zlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,tau( taupos ) ) + + call stdlib_zlarfx( 'LEFT', lm, ln-1, v( vpos ),conjg( tau( taupos ) ),a( & + dpos+nb-1, st+1 ), lda-1, work) + endif + endif + endif + return + end subroutine stdlib_zhb2st_kernels + + !> ZHEEQUB: computes row and column scalings intended to equilibrate a + !> Hermitian matrix A (with respect to the Euclidean norm) and reduce + !> its condition number. The scale factors S are computed by the BIN + !> algorithm (see references) so that the scaled matrix B with elements + !> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of + !> the smallest possible condition number over all possible diagonal + !> scalings. + + pure subroutine stdlib_zheequb( uplo, n, a, lda, s, scond, amax, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: amax, scond + character, intent(in) :: uplo + ! Array Arguments + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(out) :: s(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: max_iter = 100 + + + ! Local Scalars + integer(ilp) :: i, j, iter + real(dp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, & + scale, sumsq + logical(lk) :: up + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,int,log,max,min,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + if ( .not. ( stdlib_lsame( uplo, 'U' ) .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -1 + else if ( n < 0 ) then + info = -2 + else if ( lda < max( 1, n ) ) then + info = -4 + end if + if ( info /= 0 ) then + call stdlib_xerbla( 'ZHEEQUB', -info ) + return + end if + up = stdlib_lsame( uplo, 'U' ) + amax = zero + ! quick return if possible. + if ( n == 0 ) then + scond = one + return + end if + do i = 1, n + s( i ) = zero + end do + amax = zero + if ( up ) then + do j = 1, n + do i = 1, j-1 + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + end do + else + do j = 1, n + s( j ) = max( s( j ), cabs1( a( j, j ) ) ) + amax = max( amax, cabs1( a( j, j ) ) ) + do i = j+1, n + s( i ) = max( s( i ), cabs1( a( i, j ) ) ) + s( j ) = max( s( j ), cabs1( a( i, j ) ) ) + amax = max( amax, cabs1( a( i, j ) ) ) + end do + end do + end if + do j = 1, n + s( j ) = 1.0_dp / s( j ) + end do + tol = one / sqrt( 2.0_dp * n ) + do iter = 1, max_iter + scale = zero + sumsq = zero + ! beta = |a|s + do i = 1, n + work( i ) = zero + end do + if ( up ) then + do j = 1, n + do i = 1, j-1 + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + end do + else + do j = 1, n + work( j ) = work( j ) + cabs1( a( j, j ) ) * s( j ) + do i = j+1, n + work( i ) = work( i ) + cabs1( a( i, j ) ) * s( j ) + work( j ) = work( j ) + cabs1( a( i, j ) ) * s( i ) + end do + end do + end if + ! avg = s^t beta / n + avg = zero + do i = 1, n + avg = avg + real( s( i )*work( i ),KIND=dp) + end do + avg = avg / n + std = zero + do i = n+1, 2*n + work( i ) = s( i-n ) * work( i-n ) - avg + end do + call stdlib_zlassq( n, work( n+1 ), 1, scale, sumsq ) + std = scale * sqrt( sumsq / n ) + if ( std < tol * avg ) goto 999 + do i = 1, n + t = cabs1( a( i, i ) ) + si = s( i ) + c2 = ( n-1 ) * t + c1 = ( n-2 ) * ( real( work( i ),KIND=dp) - t*si ) + c0 = -(t*si)*si + 2 * real( work( i ),KIND=dp) * si - n*avg + d = c1*c1 - 4*c0*c2 + if ( d <= 0 ) then + info = -1 + return + end if + si = -2*c0 / ( c1 + sqrt( d ) ) + d = si - s( i ) + u = zero + if ( up ) then + do j = 1, i + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + else + do j = 1, i + t = cabs1( a( i, j ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + do j = i+1,n + t = cabs1( a( j, i ) ) + u = u + s( j )*t + work( j ) = work( j ) + d*t + end do + end if + avg = avg + ( u + real( work( i ),KIND=dp) ) * d / n + s( i ) = si + end do + end do + 999 continue + smlnum = stdlib_dlamch( 'SAFEMIN' ) + bignum = one / smlnum + smin = bignum + smax = zero + t = one / sqrt( avg ) + base = stdlib_dlamch( 'B' ) + u = one / log( base ) + do i = 1, n + s( i ) = base ** int( u * log( s( i ) * t ),KIND=ilp) + smin = min( smin, s( i ) ) + smax = max( smax, s( i ) ) + end do + scond = max( smin, smlnum ) / min( smax, bignum ) + end subroutine stdlib_zheequb + + !> ZHEGS2: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. + !> B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. + + pure subroutine stdlib_zhegs2( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k + real(dp) :: akk, bkk + complex(dp) :: ct + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda ZHEGST: reduces a complex Hermitian-definite generalized + !> eigenproblem to standard form. + !> If ITYPE = 1, the problem is A*x = lambda*B*x, + !> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) + !> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or + !> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. + !> B must have been previously factorized as U**H*U or L*L**H by ZPOTRF. + + pure subroutine stdlib_zhegst( itype, uplo, n, a, lda, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: k, kb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n ) then + ! use unblocked code + call stdlib_zhegs2( itype, uplo, n, a, lda, b, ldb, info ) + else + ! use blocked code + if( itype==1 ) then + if( upper ) then + ! compute inv(u**h)*a*inv(u) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(k:n,k:n) + call stdlib_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_ztrsm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, & + n-k-kb+1, cone,b( k, k ), ldb, a( k, k+kb ), lda ) + call stdlib_zhemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(& + k, k+kb ), ldb,cone, a( k, k+kb ), lda ) + call stdlib_zher2k( uplo, 'CONJUGATE TRANSPOSE', n-k-kb+1,kb, -cone, a( & + k, k+kb ), lda,b( k, k+kb ), ldb, one,a( k+kb, k+kb ), lda ) + call stdlib_zhemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(& + k, k+kb ), ldb,cone, a( k, k+kb ), lda ) + call stdlib_ztrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+& + 1, cone,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda ) + end if + end do + else + ! compute inv(l)*a*inv(l**h) + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(k:n,k:n) + call stdlib_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + if( k+kb<=n ) then + call stdlib_ztrsm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', n-k-& + kb+1, kb, cone,b( k, k ), ldb, a( k+kb, k ), lda ) + call stdlib_zhemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, & + b( k+kb, k ), ldb,cone, a( k+kb, k ), lda ) + call stdlib_zher2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-cone, a( k+kb, & + k ), lda,b( k+kb, k ), ldb, one,a( k+kb, k+kb ), lda ) + call stdlib_zhemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, & + b( k+kb, k ), ldb,cone, a( k+kb, k ), lda ) + call stdlib_ztrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, & + kb, cone,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda ) + end if + end do + end if + else + if( upper ) then + ! compute u*a*u**h + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the upper triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_ztrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, cone, & + b, ldb, a( 1, k ), lda ) + call stdlib_zhemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1, k ),& + ldb, cone, a( 1, k ),lda ) + call stdlib_zher2k( uplo, 'NO TRANSPOSE', k-1, kb, cone,a( 1, k ), lda, b( & + 1, k ), ldb, one, a,lda ) + call stdlib_zhemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1, k ),& + ldb, cone, a( 1, k ),lda ) + call stdlib_ztrmm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', k-1, & + kb, cone, b( k, k ), ldb,a( 1, k ), lda ) + call stdlib_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + else + ! compute l**h*a*l + do k = 1, n, nb + kb = min( n-k+1, nb ) + ! update the lower triangle of a(1:k+kb-1,1:k+kb-1) + call stdlib_ztrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, cone,& + b, ldb, a( k, 1 ), lda ) + call stdlib_zhemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1 ), & + ldb, cone, a( k, 1 ),lda ) + call stdlib_zher2k( uplo, 'CONJUGATE TRANSPOSE', k-1, kb,cone, a( k, 1 ), & + lda, b( k, 1 ), ldb,one, a, lda ) + call stdlib_zhemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1 ), & + ldb, cone, a( k, 1 ),lda ) + call stdlib_ztrmm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, k-1,& + cone, b( k, k ), ldb,a( k, 1 ), lda ) + call stdlib_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info ) + + end do + end if + end if + end if + return + end subroutine stdlib_zhegst + + !> ZHETD2: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_zhetd2( uplo, n, a, lda, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i + complex(dp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U') + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZHETRD: reduces a complex Hermitian matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_zhetrd( uplo, n, a, lda, d, e, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, iws, j, kk, ldwork, lwkopt, nb, nbmin, nx + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb ZHETRD_HB2ST: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_zhetrd_hb2st( stage1, vect, uplo, n, kd, ab, ldab,d, e, hous, lhous, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: stage1, uplo, vect + integer(ilp), intent(in) :: n, kd, ldab, lhous, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: hous(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: rzero = 0.0e+0_dp + + + ! Local Scalars + logical(lk) :: lquery, wantq, upper, afters1 + integer(ilp) :: i, m, k, ib, sweepid, myid, shift, stt, st, ed, stind, edind, & + blklastind, colpt, thed, stepercol, grsiz, thgrsiz, thgrnb, thgrid, nbtiles, ttype, & + tid, nthreads, debug, abdpos, abofdpos, dpos, ofdpos, awpos, inda, indw, apos, sizea, & + lda, indv, indtau, sizev, sizetau, ldv, lhmin, lwmin + real(dp) :: abstmp + complex(dp) :: tmp + ! Intrinsic Functions + intrinsic :: min,max,ceiling,real + ! Executable Statements + ! determine the minimal workspace size required. + ! test the input parameters + debug = 0 + info = 0 + afters1 = stdlib_lsame( stage1, 'Y' ) + wantq = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) .or. ( lhous==-1 ) + ! determine the block size, the workspace size and the hous size. + ib = stdlib_ilaenv2stage( 2, 'ZHETRD_HB2ST', vect, n, kd, -1, -1 ) + lhmin = stdlib_ilaenv2stage( 3, 'ZHETRD_HB2ST', vect, n, kd, ib, -1 ) + lwmin = stdlib_ilaenv2stage( 4, 'ZHETRD_HB2ST', vect, n, kd, ib, -1 ) + if( .not.afters1 .and. .not.stdlib_lsame( stage1, 'N' ) ) then + info = -1 + else if( .not.stdlib_lsame( vect, 'N' ) ) then + info = -2 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab<(kd+1) ) then + info = -7 + else if( lhoused ) exit + loop_120: do m = 1, stepercol + st = stt + loop_130: do sweepid = st, ed + loop_140: do k = 1, grsiz + myid = (i-sweepid)*(stepercol*grsiz)+ (m-1)*grsiz + k + if ( myid==1 ) then + ttype = 1 + else + ttype = mod( myid, 2 ) + 2 + endif + if( ttype==2 ) then + colpt = (myid/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + blklastind = colpt + else + colpt = ((myid+1)/2)*kd + sweepid + stind = colpt-kd+1 + edind = min(colpt,n) + if( ( stind>=edind-1 ).and.( edind==n ) ) then + blklastind = n + else + blklastind = 0 + endif + endif + ! call the kernel + !$ if( ttype/=1 ) then + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(in:WORK(MYID-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + !$ call stdlib_zhb2st_kernels( uplo, wantq, ttype,stind, edind, & + !$ sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + !$ indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ else + !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) & + !$OMP& DEPEND(out:WORK(MYID)) + !$ tid = omp_get_thread_num() + call stdlib_zhb2st_kernels( uplo, wantq, ttype,stind, edind, & + sweepid, n, kd, ib,work ( inda ), lda,hous( indv ), hous( & + indtau ), ldv,work( indw + tid*kd ) ) + !$OMP END TASK + !$ endif + if ( blklastind>=(n-1) ) then + stt = stt + 1 + exit + endif + end do loop_140 + end do loop_130 + end do loop_120 + end do loop_110 + end do loop_100 + !$OMP END MASTER + !$OMP END PARALLEL + ! copy the diagonal from a to d. note that d is real thus only + ! the real part is needed, the imaginary part should be czero. + do i = 1, n + d( i ) = real( work( dpos+(i-1)*lda ),KIND=dp) + end do + ! copy the off diagonal from a to e. note that e is real thus only + ! the real part is needed, the imaginary part should be czero. + if( upper ) then + do i = 1, n-1 + e( i ) = real( work( ofdpos+i*lda ),KIND=dp) + end do + else + do i = 1, n-1 + e( i ) = real( work( ofdpos+(i-1)*lda ),KIND=dp) + end do + endif + hous( 1 ) = lhmin + work( 1 ) = lwmin + return + end subroutine stdlib_zhetrd_hb2st + + !> ZHETRD_HE2HB: reduces a complex Hermitian matrix A to complex Hermitian + !> band-diagonal form AB by a unitary similarity transformation: + !> Q**H * A * Q = AB. + + pure subroutine stdlib_zhetrd_he2hb( uplo, n, kd, a, lda, ab, ldab, tau,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldab, lwork, n, kd + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: ab(ldab,*), tau(*), work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: rone = 1.0e+0_dp + + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, j, iinfo, lwmin, pn, pk, lk, ldt, ldw, lds2, lds1, ls2, ls1, lw, lt,& + tpos, wpos, s2pos, s1pos + ! Intrinsic Functions + intrinsic :: min,max + ! Executable Statements + ! determine the minimal workspace size required + ! and test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + lwmin = stdlib_ilaenv2stage( 4, 'ZHETRD_HE2HB', '', n, kd, -1, -1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( lda ZHETRF: computes the factorization of a complex Hermitian matrix A + !> using the Bunch-Kaufman diagonal pivoting method. The form of the + !> factorization is + !> A = U*D*U**H or A = L*D*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zhetrf( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_zlahef( uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo ) + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_zhetf2( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**h using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_zlahef; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_zlahef( uplo, n-k+1, nb, kb, a( k, k ), lda, ipiv( k ),work, n, & + iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_zhetf2( uplo, n-k+1, a( k, k ), lda, ipiv( k ), iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zhetrf + + !> ZHETRF_RK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: + !> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + !> For more information see Further Details section. + + pure subroutine stdlib_zhetrf_rk( uplo, n, a, lda, e, ipiv, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: abs,max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_zlahef_rk( uplo, k, nb, kb, a, lda, e,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_zhetf2_rk( uplo, k, a, lda, e, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k-kb+1:k and apply row permutations to the + ! last k+1 colunms k+1:n after that block + ! (we can do the simple loop over ipiv with decrement -1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k n, exit from loop + if( k>n )go to 35 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_zlahef_rk( uplo, n-k+1, nb, kb, a( k, k ), lda, e( k ),ipiv( k ), & + work, ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_zhetf2_rk( uplo, n-k+1, a( k, k ), lda, e( k ),ipiv( k ), iinfo ) + + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do i = k, k + kb - 1 + if( ipiv( i )>0 ) then + ipiv( i ) = ipiv( i ) + k - 1 + else + ipiv( i ) = ipiv( i ) - k + 1 + end if + end do + ! apply permutations to the leading panel 1:k-1 + ! read ipiv from the last block factored, i.e. + ! indices k:k+kb-1 and apply row permutations to the + ! first k-1 colunms 1:k-1 before that block + ! (we can do the simple loop over ipiv with increment 1, + ! since the abs value of ipiv( i ) represents the row index + ! of the interchange with row i in both 1x1 and 2x2 pivot cases) + if( k>1 ) then + do i = k, ( k + kb - 1 ), 1 + ip = abs( ipiv( i ) ) + if( ip/=i ) then + call stdlib_zswap( k-1, a( i, 1 ), lda,a( ip, 1 ), lda ) + end if + end do + end if + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + ! this label is the exit from main loop over k increasing + ! from 1 to n in steps of kb + 35 continue + ! end lower + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zhetrf_rk + + !> ZHETRF_ROOK: computes the factorization of a complex Hermitian matrix A + !> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. + !> The form of the factorization is + !> A = U*D*U**T or A = L*D*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zhetrf_rook( uplo, n, a, lda, ipiv, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nbnb ) then + ! factorize columns k-kb+1:k of a and use blocked code to + ! update columns 1:k-kb + call stdlib_zlahef_rook( uplo, k, nb, kb, a, lda,ipiv, work, ldwork, iinfo ) + + else + ! use unblocked code to factorize columns 1:k of a + call stdlib_zhetf2_rook( uplo, k, a, lda, ipiv, iinfo ) + kb = k + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + ! no need to adjust ipiv + ! decrease k and return to the start of the main loop + k = k - kb + go to 10 + else + ! factorize a as l*d*l**t using the lower triangle of a + ! k is the main loop index, increasing from 1 to n in steps of + ! kb, where kb is the number of columns factorized by stdlib_zlahef_rook; + ! kb is either nb or nb-1, or n-k+1 for the last block + k = 1 + 20 continue + ! if k > n, exit from loop + if( k>n )go to 40 + if( k<=n-nb ) then + ! factorize columns k:k+kb-1 of a and use blocked code to + ! update columns k+kb:n + call stdlib_zlahef_rook( uplo, n-k+1, nb, kb, a( k, k ), lda,ipiv( k ), work, & + ldwork, iinfo ) + else + ! use unblocked code to factorize columns k:n of a + call stdlib_zhetf2_rook( uplo, n-k+1, a( k, k ), lda, ipiv( k ),iinfo ) + kb = n - k + 1 + end if + ! set info on the first occurrence of a zero pivot + if( info==0 .and. iinfo>0 )info = iinfo + k - 1 + ! adjust ipiv + do j = k, k + kb - 1 + if( ipiv( j )>0 ) then + ipiv( j ) = ipiv( j ) + k - 1 + else + ipiv( j ) = ipiv( j ) - k + 1 + end if + end do + ! increase k and return to the start of the main loop + k = k + kb + go to 20 + end if + 40 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zhetrf_rook + + !> ZHETRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF. + + pure subroutine stdlib_zhetrs( uplo, n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(dp) :: s + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=dp) / real( a( k, k ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_zgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_zgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 )& + , 1, cone, b( k+1, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZHETRS2: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF and converted by ZSYCONV. + + pure subroutine stdlib_zhetrs2( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, j, k, kp + real(dp) :: s + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp==-ipiv( k-1 ) )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb & + ) + k=k-2 + end if + end do + ! compute (u \p**t * b) -> b [ (u \p**t * b) ] + call stdlib_ztrsm('L','U','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (u \p**t * b) ] + i=n + do while ( i >= 1 ) + if( ipiv(i) > 0 ) then + s = real( cone,KIND=dp) / real( a( i, i ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( i, 1 ), ldb ) + elseif ( i > 1) then + if ( ipiv(i-1) == ipiv(i) ) then + akm1k = work(i) + akm1 = a( i-1, i-1 ) / akm1k + ak = a( i, i ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i-1, j ) / akm1k + bk = b( i, j ) / conjg( akm1k ) + b( i-1, j ) = ( ak*bkm1-bk ) / denom + b( i, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i - 1 + endif + endif + i = i - 1 + end do + ! compute (u**h \ b) -> b [ u**h \ (d \ (u \p**t * b) ) ] + call stdlib_ztrsm('L','U','C','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (u**h \ (d \ (u \p**t * b) )) ] + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k < n .and. kp==-ipiv( k+1 ) )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp,& + 1 ), ldb ) + k=k+2 + endif + end do + else + ! solve a*x = b, where a = l*d*l**h. + ! p**t * b + k=1 + do while ( k <= n ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k+1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k+1). + kp = -ipiv( k+1 ) + if( kp==-ipiv( k ) )call stdlib_zswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + + k=k+2 + endif + end do + ! compute (l \p**t * b) -> b [ (l \p**t * b) ] + call stdlib_ztrsm('L','L','N','U',n,nrhs,cone,a,lda,b,ldb) + ! compute d \ b -> b [ d \ (l \p**t * b) ] + i=1 + do while ( i <= n ) + if( ipiv(i) > 0 ) then + s = real( cone,KIND=dp) / real( a( i, i ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( i, 1 ), ldb ) + else + akm1k = work(i) + akm1 = a( i, i ) / conjg( akm1k ) + ak = a( i+1, i+1 ) / akm1k + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( i, j ) / conjg( akm1k ) + bk = b( i+1, j ) / akm1k + b( i, j ) = ( ak*bkm1-bk ) / denom + b( i+1, j ) = ( akm1*bk-bkm1 ) / denom + end do + i = i + 1 + endif + i = i + 1 + end do + ! compute (l**h \ b) -> b [ l**h \ (d \ (l \p**t * b) ) ] + call stdlib_ztrsm('L','L','C','U',n,nrhs,cone,a,lda,b,ldb) + ! p * b [ p * (l**h \ (d \ (l \p**t * b) )) ] + k=n + do while ( k >= 1 ) + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k=k-1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( k>1 .and. kp==-ipiv( k-1 ) )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, & + 1 ), ldb ) + k=k-2 + endif + end do + end if + ! revert a + call stdlib_zsyconv( uplo, 'R', n, a, lda, ipiv, work, iinfo ) + return + end subroutine stdlib_zhetrs2 + + !> ZHETRS_AA: solves a system of linear equations A*X = B with a complex + !> hermitian matrix A using the factorization A = U**H*T*U or + !> A = L*T*L**H computed by ZHETRF_AA. + + pure subroutine stdlib_zhetrs_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, nrhs, lda, ldb, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + logical(lk) :: lquery, upper + integer(ilp) :: k, kp, lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute u**h \ b -> b [ (u**h \p**t * b) ] + call stdlib_ztrsm( 'L', 'U', 'C', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b( 2, 1 ),& + ldb ) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (u**h \p**t * b) ] + call stdlib_zlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1 ) + if( n>1 ) then + call stdlib_zlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 2*n ), 1) + call stdlib_zlacpy( 'F', 1, n-1, a( 1, 2 ), lda+1, work( 1 ), 1 ) + call stdlib_zlacgv( n-1, work( 1 ), 1 ) + end if + call stdlib_zgtsv( n, nrhs, work(1), work(n), work(2*n), b, ldb,info ) + ! 3) backward substitution with u + if( n>1 ) then + ! compute u \ b -> b [ u \ (t \ (u**h \p**t * b) ) ] + call stdlib_ztrsm( 'L', 'U', 'N', 'U', n-1, nrhs, cone, a( 1, 2 ),lda, b(2, 1), & + ldb) + ! pivot, p * b [ p * (u**h \ (t \ (u \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + else + ! solve a*x = b, where a = l*t*l**h. + ! 1) forward substitution with l + if( n>1 ) then + ! pivot, p**t * b -> b + do k = 1, n + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + ! compute l \ b -> b [ (l \p**t * b) ] + call stdlib_ztrsm( 'L', 'L', 'N', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b(2, 1), & + ldb) + end if + ! 2) solve with triangular matrix t + ! compute t \ b -> b [ t \ (l \p**t * b) ] + call stdlib_zlacpy( 'F', 1, n, a(1, 1), lda+1, work(n), 1) + if( n>1 ) then + call stdlib_zlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 1 ), 1) + call stdlib_zlacpy( 'F', 1, n-1, a( 2, 1 ), lda+1, work( 2*n ), 1) + call stdlib_zlacgv( n-1, work( 2*n ), 1 ) + end if + call stdlib_zgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb,info) + ! 3) backward substitution with l**h + if( n>1 ) then + ! compute l**h \ b -> b [ l**h \ (t \ (l \p**t * b) ) ] + call stdlib_ztrsm( 'L', 'L', 'C', 'U', n-1, nrhs, cone, a( 2, 1 ),lda, b( 2, 1 ),& + ldb) + ! pivot, p * b [ p * (l**h \ (t \ (l \p**t * b) )) ] + do k = n, 1, -1 + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + end do + end if + end if + return + end subroutine stdlib_zhetrs_aa + + !> ZHETRS_ROOK: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF_ROOK. + + pure subroutine stdlib_zhetrs_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kp + real(dp) :: s + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: conjg,max,real + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgeru( k-1, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=dp) / real( a( k, k ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k and -ipiv(k), then k-1 and -ipiv(k-1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k-1) + if( kp/=k-1 )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_zgeru( k-2, nrhs, -cone, a( 1, k ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb & + ) + call stdlib_zgeru( k-2, nrhs, -cone, a( 1, k-1 ), 1, b( k-1, 1 ),ldb, b( 1, 1 ), & + ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = a( k-1, k ) + akm1 = a( k-1, k-1 ) / akm1k + ak = a( k, k ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, a( 1, k+1 )& + , 1, cone, b( k+1, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k), then k+1 and -ipiv(k+1) + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kp = -ipiv( k+1 ) + if( kp/=k+1 )call stdlib_zswap( nrhs, b( k+1, 1 ), ldb, b( kp, 1 ), ldb ) + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZHPTRD: reduces a complex Hermitian matrix A stored in packed form to + !> real symmetric tridiagonal form T by a unitary similarity + !> transformation: Q**H * A * Q = T. + + pure subroutine stdlib_zhptrd( uplo, n, ap, d, e, tau, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: tau(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, i1, i1i1, ii + complex(dp) :: alpha, taui + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZHPTRD', -info ) + return + end if + ! quick return if possible + if( n<=0 )return + if( upper ) then + ! reduce the upper triangle of a. + ! i1 is the index in ap of a(1,i+1). + i1 = n*( n-1 ) / 2 + 1 + ap( i1+n-1 ) = real( ap( i1+n-1 ),KIND=dp) + do i = n - 1, 1, -1 + ! generate elementary reflector h(i) = i - tau * v * v**h + ! to annihilate a(1:i-1,i+1) + alpha = ap( i1+i-1 ) + call stdlib_zlarfg( i, alpha, ap( i1 ), 1, taui ) + e( i ) = real( alpha,KIND=dp) + if( taui/=czero ) then + ! apply h(i) from both sides to a(1:i,1:i) + ap( i1+i-1 ) = cone + ! compute y := tau * a * v storing y in tau(1:i) + call stdlib_zhpmv( uplo, i, taui, ap, ap( i1 ), 1, czero, tau,1 ) + ! compute w := y - 1/2 * tau * (y**h *v) * v + alpha = -chalf*taui*stdlib_zdotc( i, tau, 1, ap( i1 ), 1 ) + call stdlib_zaxpy( i, alpha, ap( i1 ), 1, tau, 1 ) + ! apply the transformation as a rank-2 update: + ! a := a - v * w**h - w * v**h + call stdlib_zhpr2( uplo, i, -cone, ap( i1 ), 1, tau, 1, ap ) + end if + ap( i1+i-1 ) = e( i ) + d( i+1 ) = real( ap( i1+i ),KIND=dp) + tau( i ) = taui + i1 = i1 - i + end do + d( 1 ) = real( ap( 1 ),KIND=dp) + else + ! reduce the lower triangle of a. ii is the index in ap of + ! a(i,i) and i1i1 is the index of a(i+1,i+1). + ii = 1 + ap( 1 ) = real( ap( 1 ),KIND=dp) + do i = 1, n - 1 + i1i1 = ii + n - i + 1 + ! generate elementary reflector h(i) = i - tau * v * v**h + ! to annihilate a(i+2:n,i) + alpha = ap( ii+1 ) + call stdlib_zlarfg( n-i, alpha, ap( ii+2 ), 1, taui ) + e( i ) = real( alpha,KIND=dp) + if( taui/=czero ) then + ! apply h(i) from both sides to a(i+1:n,i+1:n) + ap( ii+1 ) = cone + ! compute y := tau * a * v storing y in tau(i:n-1) + call stdlib_zhpmv( uplo, n-i, taui, ap( i1i1 ), ap( ii+1 ), 1,czero, tau( i ),& + 1 ) + ! compute w := y - 1/2 * tau * (y**h *v) * v + alpha = -chalf*taui*stdlib_zdotc( n-i, tau( i ), 1, ap( ii+1 ),1 ) + call stdlib_zaxpy( n-i, alpha, ap( ii+1 ), 1, tau( i ), 1 ) + ! apply the transformation as a rank-2 update: + ! a := a - v * w**h - w * v**h + call stdlib_zhpr2( uplo, n-i, -cone, ap( ii+1 ), 1, tau( i ), 1,ap( i1i1 ) ) + + end if + ap( ii+1 ) = e( i ) + d( i ) = real( ap( ii ),KIND=dp) + tau( i ) = taui + ii = i1i1 + end do + d( n ) = real( ap( ii ),KIND=dp) + end if + return + end subroutine stdlib_zhptrd + + !> ZHPTRS: solves a system of linear equations A*X = B with a complex + !> Hermitian matrix A stored in packed format using the factorization + !> A = U*D*U**H or A = L*D*L**H computed by ZHPTRF. + + pure subroutine stdlib_zhptrs( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, k, kc, kp + real(dp) :: s + complex(dp) :: ak, akm1, akm1k, bk, bkm1, denom + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in column k of a. + call stdlib_zgeru( k-1, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + ! multiply by the inverse of the diagonal block. + s = real( cone,KIND=dp) / real( ap( kc+k-1 ),KIND=dp) + call stdlib_zdscal( nrhs, s, b( k, 1 ), ldb ) + k = k - 1 + else + ! 2 x 2 diagonal block + ! interchange rows k-1 and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k-1 )call stdlib_zswap( nrhs, b( k-1, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(u(k)), where u(k) is the transformation + ! stored in columns k-1 and k of a. + call stdlib_zgeru( k-2, nrhs, -cone, ap( kc ), 1, b( k, 1 ), ldb,b( 1, 1 ), ldb ) + + call stdlib_zgeru( k-2, nrhs, -cone, ap( kc-( k-1 ) ), 1,b( k-1, 1 ), ldb, b( 1, & + 1 ), ldb ) + ! multiply by the inverse of the diagonal block. + akm1k = ap( kc+k-2 ) + akm1 = ap( kc-1 ) / akm1k + ak = ap( kc+k-1 ) / conjg( akm1k ) + denom = akm1*ak - cone + do j = 1, nrhs + bkm1 = b( k-1, j ) / akm1k + bk = b( k, j ) / conjg( akm1k ) + b( k-1, j ) = ( ak*bkm1-bk ) / denom + b( k, j ) = ( akm1*bk-bkm1 ) / denom + end do + kc = kc - k + 1 + k = k - 2 + end if + go to 10 + 30 continue + ! next solve u**h *x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 40 continue + ! if k > n, exit from loop. + if( k>n )go to 50 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(u**h(k)), where u(k) is the transformation + ! stored in column k of a. + if( k>1 ) then + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + end if + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + k + k = k + 1 + else + ! 2 x 2 diagonal block + ! multiply by inv(u**h(k+1)), where u(k+1) is the transformation + ! stored in columns k and k+1 of a. + if( k>1 ) then + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc ), & + 1, cone, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k+1, 1 ), ldb ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', k-1, nrhs, -cone, b,ldb, ap( kc+k ),& + 1, cone, b( k+1, 1 ), ldb ) + call stdlib_zlacgv( nrhs, b( k+1, 1 ), ldb ) + end if + ! interchange rows k and -ipiv(k). + kp = -ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + kc = kc + 2*k + 1 + k = k + 2 + end if + go to 40 + 50 continue + else + ! solve a*x = b, where a = l*d*l**h. + ! first solve l*d*x = b, overwriting b with x. + ! k is the main loop index, increasing from 1 to n in steps of + ! 1 or 2, depending on the size of the diagonal blocks. + k = 1 + kc = 1 + 60 continue + ! if k > n, exit from loop. + if( k>n )go to 80 + if( ipiv( k )>0 ) then + ! 1 x 1 diagonal block + ! interchange rows k and ipiv(k). + kp = ipiv( k ) + if( kp/=k )call stdlib_zswap( nrhs, b( k, 1 ), ldb, b( kp, 1 ), ldb ) + ! multiply by inv(l(k)), where l(k) is the transformation + ! stored in column k of a. + if( k0 ) then + ! 1 x 1 diagonal block + ! multiply by inv(l**h(k)), where l(k) is the transformation + ! stored in column k of a. + if( k ZLA_GBRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(dp) function stdlib_zla_gbrcond_c( trans, n, kl, ku, ab,ldab, afb, ldafb, ipiv,c, & + capply, info, work,rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, kl, ku, ldab, ldafb + integer(ilp) :: kd, ke + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: ab(ldab,*), afb(ldafb,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(dp) :: ainvnm, anorm, tmp + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_zla_gbrcond_c = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 .or. kl>n-1 ) then + info = -3 + else if( ku<0 .or. ku>n-1 ) then + info = -4 + else if( ldab ZLA_GERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(dp) function stdlib_zla_gercond_c( trans, n, a, lda, af,ldaf, ipiv, c, capply,info, & + work, rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: notrans + integer(ilp) :: kase, i, j + real(dp) :: ainvnm, anorm, tmp + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_zla_gercond_c = zero + info = 0 + notrans = stdlib_lsame( trans, 'N' ) + if ( .not. notrans .and. .not. stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_HERCOND_C: computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(dp) function stdlib_zla_hercond_c( uplo, n, a, lda, af,ldaf, ipiv, c, capply,info, work,& + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase, i, j + real(dp) :: ainvnm, anorm, tmp + logical(lk) :: up, upper + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_zla_hercond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_HERPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(dp) function stdlib_zla_herpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + real(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(dp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper, lsame + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if (upper) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( cabs1( a( i,j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i,j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_zsytrs. + ! calls to stdlib_sswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k-1 ) =max( cabs1( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k+1 ) =max( cabs1( af( i, k+1 ) ) , work( k+1 ) ) + end do + work(k) = max( cabs1( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_zla_herpvgrw = rpvgrw + end function stdlib_zla_herpvgrw + + !> ZLA_PORCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector + + real(dp) function stdlib_zla_porcond_c( uplo, n, a, lda, af,ldaf, c, capply, info,work, & + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase + real(dp) :: ainvnm, anorm, tmp + integer(ilp) :: i, j + logical(lk) :: up, upper + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max,real,aimag + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_zla_porcond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_SYRCOND_C: Computes the infinity norm condition number of + !> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. + + real(dp) function stdlib_zla_syrcond_c( uplo, n, a, lda, af,ldaf, ipiv, c, capply,info, work,& + rwork ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + logical(lk), intent(in) :: capply + integer(ilp), intent(in) :: n, lda, ldaf + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(in) :: c(*) + real(dp), intent(out) :: rwork(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: kase + real(dp) :: ainvnm, anorm, tmp + integer(ilp) :: i, j + logical(lk) :: up, upper + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + stdlib_zla_syrcond_c = zero + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda ZLA_SYRPVGRW: computes the reciprocal pivot growth factor + !> norm(A)/norm(U). The "max absolute element" norm is used. If this is + !> much less than 1, the stability of the LU factorization of the + !> (equilibrated) matrix A could be poor. This also means that the + !> solution X, estimated condition numbers, and error bounds could be + !> unreliable. + + real(dp) function stdlib_zla_syrpvgrw( uplo, n, info, a, lda, af,ldaf, ipiv, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, info, lda, ldaf + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), af(ldaf,*) + real(dp), intent(out) :: work(*) + integer(ilp), intent(in) :: ipiv(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: ncols, i, j, k, kp + real(dp) :: amax, umax, rpvgrw, tmp + logical(lk) :: upper + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag ( zdum ) ) + ! Executable Statements + upper = stdlib_lsame( 'UPPER', uplo ) + if ( info==0 ) then + if ( upper ) then + ncols = 1 + else + ncols = n + end if + else + ncols = info + end if + rpvgrw = one + do i = 1, 2*n + work( i ) = zero + end do + ! find the max magnitude entry of each column of a. compute the max + ! for all n columns so we can apply the pivot permutation while + ! looping below. assume a full factorization is the common case. + if ( upper ) then + do j = 1, n + do i = 1, j + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + else + do j = 1, n + do i = j, n + work( n+i ) = max( cabs1( a( i, j ) ), work( n+i ) ) + work( n+j ) = max( cabs1( a( i, j ) ), work( n+j ) ) + end do + end do + end if + ! now find the max magnitude entry of each column of u or l. also + ! permute the magnitudes of a above so they're in the same order as + ! the factor. + ! the iteration orders and permutations were copied from stdlib_zsytrs. + ! calls to stdlib_sswap would be severe overkill. + if ( upper ) then + k = n + do while ( k < ncols .and. k>0 ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = 1, k + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k - 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k-1 ) + work( n+k-1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = 1, k-1 + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k-1 ) =max( cabs1( af( i, k-1 ) ), work( k-1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k - 2 + end if + end do + k = ncols + do while ( k <= n ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k + 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k + 2 + end if + end do + else + k = 1 + do while ( k <= ncols ) + if ( ipiv( k )>0 ) then + ! 1x1 pivot + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + do i = k, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + end do + k = k + 1 + else + ! 2x2 pivot + kp = -ipiv( k ) + tmp = work( n+k+1 ) + work( n+k+1 ) = work( n+kp ) + work( n+kp ) = tmp + do i = k+1, n + work( k ) = max( cabs1( af( i, k ) ), work( k ) ) + work( k+1 ) =max( cabs1( af( i, k+1 ) ), work( k+1 ) ) + end do + work( k ) = max( cabs1( af( k, k ) ), work( k ) ) + k = k + 2 + end if + end do + k = ncols + do while ( k >= 1 ) + if ( ipiv( k )>0 ) then + kp = ipiv( k ) + if ( kp /= k ) then + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + end if + k = k - 1 + else + kp = -ipiv( k ) + tmp = work( n+k ) + work( n+k ) = work( n+kp ) + work( n+kp ) = tmp + k = k - 2 + endif + end do + end if + ! compute the *inverse* of the max element growth factor. dividing + ! by zero would imply the largest entry of the factor's column is + ! zero. than can happen when either the column of a is zero or + ! massive pivots made the factor underflow to zero. neither counts + ! as growth in itself, so simply ignore terms with zero + ! denominators. + if ( upper ) then + do i = ncols, n + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + else + do i = 1, ncols + umax = work( i ) + amax = work( n+i ) + if ( umax /= zero ) then + rpvgrw = min( amax / umax, rpvgrw ) + end if + end do + end if + stdlib_zla_syrpvgrw = rpvgrw + end function stdlib_zla_syrpvgrw + + !> ZLABRD: reduces the first NB rows and columns of a complex general + !> m by n matrix A to upper or lower real bidiagonal form by a unitary + !> transformation Q**H * A * P, and returns the matrices X and Y which + !> are needed to apply the transformation to the unreduced part of A. + !> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + !> bidiagonal form. + !> This is an auxiliary routine called by ZGEBRD + + pure subroutine stdlib_zlabrd( m, n, nb, a, lda, d, e, tauq, taup, x, ldx, y,ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, ldx, ldy, m, n, nb + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: taup(*), tauq(*), x(ldx,*), y(ldy,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( m<=0 .or. n<=0 )return + if( m>=n ) then + ! reduce to upper bidiagonal form + loop_10: do i = 1, nb + ! update a(i:m,i) + call stdlib_zlacgv( i-1, y( i, 1 ), ldy ) + call stdlib_zgemv( 'NO TRANSPOSE', m-i+1, i-1, -cone, a( i, 1 ),lda, y( i, 1 ), & + ldy, cone, a( i, i ), 1 ) + call stdlib_zlacgv( i-1, y( i, 1 ), ldy ) + call stdlib_zgemv( 'NO TRANSPOSE', m-i+1, i-1, -cone, x( i, 1 ),ldx, a( 1, i ), & + 1, cone, a( i, i ), 1 ) + ! generate reflection q(i) to annihilate a(i+1:m,i) + alpha = a( i, i ) + call stdlib_zlarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1,tauq( i ) ) + d( i ) = real( alpha,KIND=dp) + if( i ZLAED7: computes the updated eigensystem of a diagonal + !> matrix after modification by a rank-one symmetric matrix. This + !> routine is used only for the eigenproblem which requires all + !> eigenvalues and optionally eigenvectors of a dense or banded + !> Hermitian matrix that has been reduced to tridiagonal form. + !> T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) + !> where Z = Q**Hu, u is a vector of length N with ones in the + !> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + !> The eigenvectors of the original matrix are stored in Q, and the + !> eigenvalues are in D. The algorithm consists of three stages: + !> The first stage consists of deflating the size of the problem + !> when there are multiple eigenvalues or if there is a zero in + !> the Z vector. For each such occurrence the dimension of the + !> secular equation problem is reduced by one. This stage is + !> performed by the routine DLAED2. + !> The second stage consists of calculating the updated + !> eigenvalues. This is done by finding the roots of the secular + !> equation via the routine DLAED4 (as called by SLAED3). + !> This routine also calculates the eigenvectors of the current + !> problem. + !> The final stage consists of computing the updated eigenvectors + !> directly using the updated eigenvalues. The eigenvectors for + !> the current problem are multiplied with the eigenvectors from + !> the overall problem. + + pure subroutine stdlib_zlaed7( n, cutpnt, qsiz, tlvls, curlvl, curpbm, d, q,ldq, rho, indxq, & + qstore, qptr, prmptr, perm,givptr, givcol, givnum, work, rwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: curlvl, curpbm, cutpnt, ldq, n, qsiz, tlvls + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: rho + ! Array Arguments + integer(ilp), intent(inout) :: givcol(2,*), givptr(*), perm(*), prmptr(*), qptr(*) + + integer(ilp), intent(out) :: indxq(*), iwork(*) + real(dp), intent(inout) :: d(*), givnum(2,*), qstore(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: q(ldq,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: coltyp, curr, i, idlmda, indx, indxc, indxp, iq, iw, iz, k, n1, n2, & + ptr + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + ! if( icompq<0 .or. icompq>1 ) then + ! info = -1 + ! else if( n<0 ) then + if( n<0 ) then + info = -1 + else if( min( 1, n )>cutpnt .or. n ZLAEIN: uses inverse iteration to find a right or left eigenvector + !> corresponding to the eigenvalue W of a complex upper Hessenberg + !> matrix H. + + pure subroutine stdlib_zlaein( rightv, noinit, n, h, ldh, w, v, b, ldb, rwork,eps3, smlnum, & + info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: noinit, rightv + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldh, n + real(dp), intent(in) :: eps3, smlnum + complex(dp), intent(in) :: w + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: b(ldb,*) + complex(dp), intent(in) :: h(ldh,*) + complex(dp), intent(inout) :: v(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: tenth = 1.0e-1_dp + + + ! Local Scalars + character :: normin, trans + integer(ilp) :: i, ierr, its, j + real(dp) :: growto, nrmsml, rootn, rtemp, scale, vnorm + complex(dp) :: cdum, ei, ej, temp, x + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! growto is the threshold used in the acceptance test for an + ! eigenvector. + rootn = sqrt( real( n,KIND=dp) ) + growto = tenth / rootn + nrmsml = max( one, eps3*rootn )*smlnum + ! form b = h - w*i (except that the subdiagonal elements are not + ! stored). + do j = 1, n + do i = 1, j - 1 + b( i, j ) = h( i, j ) + end do + b( j, j ) = h( j, j ) - w + end do + if( noinit ) then + ! initialize v. + do i = 1, n + v( i ) = eps3 + end do + else + ! scale supplied initial vector. + vnorm = stdlib_dznrm2( n, v, 1 ) + call stdlib_zdscal( n, ( eps3*rootn ) / max( vnorm, nrmsml ), v, 1 ) + end if + if( rightv ) then + ! lu decomposition with partial pivoting of b, replacing czero + ! pivots by eps3. + do i = 1, n - 1 + ei = h( i+1, i ) + if( cabs1( b( i, i ) )=growto*scale )go to 120 + ! choose new orthogonal starting vector and try again. + rtemp = eps3 / ( rootn+one ) + v( 1 ) = eps3 + do i = 2, n + v( i ) = rtemp + end do + v( n-its+1 ) = v( n-its+1 ) - eps3*rootn + end do + ! failure to find eigenvector in n iterations. + info = 1 + 120 continue + ! normalize eigenvector. + i = stdlib_izamax( n, v, 1 ) + call stdlib_zdscal( n, one / cabs1( v( i ) ), v, 1 ) + return + end subroutine stdlib_zlaein + + !> ZLAGS2: computes 2-by-2 unitary matrices U, V and Q, such + !> that if ( UPPER ) then + !> U**H *A*Q = U**H *( A1 A2 )*Q = ( x 0 ) + !> ( 0 A3 ) ( x x ) + !> and + !> V**H*B*Q = V**H *( B1 B2 )*Q = ( x 0 ) + !> ( 0 B3 ) ( x x ) + !> or if ( .NOT.UPPER ) then + !> U**H *A*Q = U**H *( A1 0 )*Q = ( x x ) + !> ( A2 A3 ) ( 0 x ) + !> and + !> V**H *B*Q = V**H *( B1 0 )*Q = ( x x ) + !> ( B2 B3 ) ( 0 x ) + !> where + !> U = ( CSU SNU ), V = ( CSV SNV ), + !> ( -SNU**H CSU ) ( -SNV**H CSV ) + !> Q = ( CSQ SNQ ) + !> ( -SNQ**H CSQ ) + !> The rows of the transformed A and B are parallel. Moreover, if the + !> input 2-by-2 matrix A is not zero, then the transformed (1,1) entry + !> of A is not zero. If the input matrices A and B are both not zero, + !> then the transformed (2,2) element of B is not zero, except when the + !> first rows of input A and B are parallel and the second rows are + !> zero. + + pure subroutine stdlib_zlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu, csv,snv, csq, snq ) + + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: upper + real(dp), intent(in) :: a1, a3, b1, b3 + real(dp), intent(out) :: csq, csu, csv + complex(dp), intent(in) :: a2, b2 + complex(dp), intent(out) :: snq, snu, snv + ! ===================================================================== + + ! Local Scalars + real(dp) :: a, aua11, aua12, aua21, aua22, avb12, avb11, avb21, avb22, csl, csr, d, fb,& + fc, s1, s2, snl, snr, ua11r, ua22r, vb11r, vb22r + complex(dp) :: b, c, d1, r, t, ua11, ua12, ua21, ua22, vb11, vb12, vb21, vb22 + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag + ! Statement Functions + real(dp) :: abs1 + ! Statement Function Definitions + abs1( t ) = abs( real( t,KIND=dp) ) + abs( aimag( t ) ) + ! Executable Statements + if( upper ) then + ! input matrices a and b are upper triangular matrices + ! form matrix c = a*adj(b) = ( a b ) + ! ( 0 d ) + a = a1*b3 + d = a3*b1 + b = a2*b1 - a1*b2 + fb = abs( b ) + ! transform complex 2-by-2 matrix c to real matrix by unitary + ! diagonal matrix diag(1,d1). + d1 = one + if( fb/=zero )d1 = b / fb + ! the svd of real 2 by 2 triangular c + ! ( csl -snl )*( a b )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( 0 d ) ( -snr csr ) ( 0 t ) + call stdlib_dlasv2( a, fb, d, s1, s2, snr, csr, snl, csl ) + if( abs( csl )>=abs( snl ) .or. abs( csr )>=abs( snr ) )then + ! compute the (1,1) and (1,2) elements of u**h *a and v**h *b, + ! and (1,2) element of |u|**h *|a| and |v|**h *|b|. + ua11r = csl*a1 + ua12 = csl*a2 + d1*snl*a3 + vb11r = csr*b1 + vb12 = csr*b2 + d1*snr*b3 + aua12 = abs( csl )*abs1( a2 ) + abs( snl )*abs( a3 ) + avb12 = abs( csr )*abs1( b2 ) + abs( snr )*abs( b3 ) + ! zero (1,2) elements of u**h *a and v**h *b + if( ( abs( ua11r )+abs1( ua12 ) )==zero ) then + call stdlib_zlartg( -cmplx( vb11r,KIND=dp), conjg( vb12 ), csq, snq,r ) + + else if( ( abs( vb11r )+abs1( vb12 ) )==zero ) then + call stdlib_zlartg( -cmplx( ua11r,KIND=dp), conjg( ua12 ), csq, snq,r ) + + else if( aua12 / ( abs( ua11r )+abs1( ua12 ) )<=avb12 /( abs( vb11r )+abs1( vb12 & + ) ) ) then + call stdlib_zlartg( -cmplx( ua11r,KIND=dp), conjg( ua12 ), csq, snq,r ) + + else + call stdlib_zlartg( -cmplx( vb11r,KIND=dp), conjg( vb12 ), csq, snq,r ) + + end if + csu = csl + snu = -d1*snl + csv = csr + snv = -d1*snr + else + ! compute the (2,1) and (2,2) elements of u**h *a and v**h *b, + ! and (2,2) element of |u|**h *|a| and |v|**h *|b|. + ua21 = -conjg( d1 )*snl*a1 + ua22 = -conjg( d1 )*snl*a2 + csl*a3 + vb21 = -conjg( d1 )*snr*b1 + vb22 = -conjg( d1 )*snr*b2 + csr*b3 + aua22 = abs( snl )*abs1( a2 ) + abs( csl )*abs( a3 ) + avb22 = abs( snr )*abs1( b2 ) + abs( csr )*abs( b3 ) + ! zero (2,2) elements of u**h *a and v**h *b, and then swap. + if( ( abs1( ua21 )+abs1( ua22 ) )==zero ) then + call stdlib_zlartg( -conjg( vb21 ), conjg( vb22 ), csq, snq,r ) + else if( ( abs1( vb21 )+abs( vb22 ) )==zero ) then + call stdlib_zlartg( -conjg( ua21 ), conjg( ua22 ), csq, snq,r ) + else if( aua22 / ( abs1( ua21 )+abs1( ua22 ) )<=avb22 /( abs1( vb21 )+abs1( vb22 & + ) ) ) then + call stdlib_zlartg( -conjg( ua21 ), conjg( ua22 ), csq, snq,r ) + else + call stdlib_zlartg( -conjg( vb21 ), conjg( vb22 ), csq, snq,r ) + end if + csu = snl + snu = d1*csl + csv = snr + snv = d1*csr + end if + else + ! input matrices a and b are lower triangular matrices + ! form matrix c = a*adj(b) = ( a 0 ) + ! ( c d ) + a = a1*b3 + d = a3*b1 + c = a2*b3 - a3*b2 + fc = abs( c ) + ! transform complex 2-by-2 matrix c to real matrix by unitary + ! diagonal matrix diag(d1,1). + d1 = one + if( fc/=zero )d1 = c / fc + ! the svd of real 2 by 2 triangular c + ! ( csl -snl )*( a 0 )*( csr snr ) = ( r 0 ) + ! ( snl csl ) ( c d ) ( -snr csr ) ( 0 t ) + call stdlib_dlasv2( a, fc, d, s1, s2, snr, csr, snl, csl ) + if( abs( csr )>=abs( snr ) .or. abs( csl )>=abs( snl ) )then + ! compute the (2,1) and (2,2) elements of u**h *a and v**h *b, + ! and (2,1) element of |u|**h *|a| and |v|**h *|b|. + ua21 = -d1*snr*a1 + csr*a2 + ua22r = csr*a3 + vb21 = -d1*snl*b1 + csl*b2 + vb22r = csl*b3 + aua21 = abs( snr )*abs( a1 ) + abs( csr )*abs1( a2 ) + avb21 = abs( snl )*abs( b1 ) + abs( csl )*abs1( b2 ) + ! zero (2,1) elements of u**h *a and v**h *b. + if( ( abs1( ua21 )+abs( ua22r ) )==zero ) then + call stdlib_zlartg( cmplx( vb22r,KIND=dp), vb21, csq, snq, r ) + else if( ( abs1( vb21 )+abs( vb22r ) )==zero ) then + call stdlib_zlartg( cmplx( ua22r,KIND=dp), ua21, csq, snq, r ) + else if( aua21 / ( abs1( ua21 )+abs( ua22r ) )<=avb21 /( abs1( vb21 )+abs( vb22r & + ) ) ) then + call stdlib_zlartg( cmplx( ua22r,KIND=dp), ua21, csq, snq, r ) + else + call stdlib_zlartg( cmplx( vb22r,KIND=dp), vb21, csq, snq, r ) + end if + csu = csr + snu = -conjg( d1 )*snr + csv = csl + snv = -conjg( d1 )*snl + else + ! compute the (1,1) and (1,2) elements of u**h *a and v**h *b, + ! and (1,1) element of |u|**h *|a| and |v|**h *|b|. + ua11 = csr*a1 + conjg( d1 )*snr*a2 + ua12 = conjg( d1 )*snr*a3 + vb11 = csl*b1 + conjg( d1 )*snl*b2 + vb12 = conjg( d1 )*snl*b3 + aua11 = abs( csr )*abs( a1 ) + abs( snr )*abs1( a2 ) + avb11 = abs( csl )*abs( b1 ) + abs( snl )*abs1( b2 ) + ! zero (1,1) elements of u**h *a and v**h *b, and then swap. + if( ( abs1( ua11 )+abs1( ua12 ) )==zero ) then + call stdlib_zlartg( vb12, vb11, csq, snq, r ) + else if( ( abs1( vb11 )+abs1( vb12 ) )==zero ) then + call stdlib_zlartg( ua12, ua11, csq, snq, r ) + else if( aua11 / ( abs1( ua11 )+abs1( ua12 ) )<=avb11 /( abs1( vb11 )+abs1( vb12 & + ) ) ) then + call stdlib_zlartg( ua12, ua11, csq, snq, r ) + else + call stdlib_zlartg( vb12, vb11, csq, snq, r ) + end if + csu = snr + snu = conjg( d1 )*csr + csv = snl + snv = conjg( d1 )*csl + end if + end if + return + end subroutine stdlib_zlags2 + + !> ZLAHQR: is an auxiliary routine called by CHSEQR to update the + !> eigenvalues and Schur decomposition already computed by CHSEQR, by + !> dealing with the Hessenberg submatrix in rows and columns ILO to + !> IHI. + + pure subroutine stdlib_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info & + ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(dp), intent(out) :: w(*) + ! ========================================================= + ! Parameters + real(dp), parameter :: rzero = 0.0_dp + real(dp), parameter :: rone = 1.0_dp + real(dp), parameter :: dat1 = 3.0_dp/4.0_dp + integer(ilp), parameter :: kexsh = 10 + + + + + ! Local Scalars + complex(dp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y + real(dp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, & + ulp + integer(ilp) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl + ! Local Arrays + complex(dp) :: v(2) + ! Statement Functions + real(dp) :: cabs1 + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,sqrt + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! quick return if possible + if( n==0 )return + if( ilo==ihi ) then + w( ilo ) = h( ilo, ilo ) + return + end if + ! ==== clear out the trash ==== + do j = ilo, ihi - 3 + h( j+2, j ) = czero + h( j+3, j ) = czero + end do + if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero + ! ==== ensure that subdiagonal entries are real ==== + if( wantt ) then + jlo = 1 + jhi = n + else + jlo = ilo + jhi = ihi + end if + do i = ilo + 1, ihi + if( aimag( h( i, i-1 ) )/=rzero ) then + ! ==== the following redundant normalization + ! . avoids problems with both gradual and + ! . sudden underflow in abs(h(i,i-1)) ==== + sc = h( i, i-1 ) / cabs1( h( i, i-1 ) ) + sc = conjg( sc ) / abs( sc ) + h( i, i-1 ) = abs( h( i, i-1 ) ) + call stdlib_zscal( jhi-i+1, sc, h( i, i ), ldh ) + call stdlib_zscal( min( jhi, i+1 )-jlo+1, conjg( sc ),h( jlo, i ), 1 ) + if( wantz )call stdlib_zscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1 ) + end if + end do + nh = ihi - ilo + 1 + nz = ihiz - iloz + 1 + ! set machine-dependent constants for the stopping criterion. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( nh,KIND=dp) / ulp ) + ! i1 and i2 are the indices of the first row and last column of h + ! to which transformations must be applied. if eigenvalues only are + ! being computed, i1 and i2 are set inside the main loop. + if( wantt ) then + i1 = 1 + i2 = n + end if + ! itmax is the total number of qr iterations allowed. + itmax = 30 * max( 10, nh ) + ! kdefl counts the number of iterations since a deflation + kdefl = 0 + ! the main loop begins here. i is the loop index and decreases from + ! ihi to ilo in steps of 1. each iteration of the loop works + ! with the active submatrix in rows and columns l to i. + ! eigenvalues i+1 to ihi have already converged. either l = ilo, or + ! h(l,l-1) is negligible so that the matrix splits. + i = ihi + 30 continue + if( i=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=dp) ) + if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=dp) ) + end if + ! ==== the following is a conservative small subdiagonal + ! . deflation criterion due to ahues + ! . 1997). it has better mathematical foundation and + ! . improves accuracy in some examples. ==== + if( abs( real( h( k, k-1 ),KIND=dp) )<=ulp*tst ) then + ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) + ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) + aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) + bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) + s = aa + ab + if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50 + end if + end do + 50 continue + l = k + if( l>ilo ) then + ! h(l,l-1) is negligible + h( l, l-1 ) = czero + end if + ! exit from loop if a submatrix of order 1 has split off. + if( l>=i )go to 140 + kdefl = kdefl + 1 + ! now the active submatrix is in rows and columns l to i. if + ! eigenvalues only are being computed, only the active submatrix + ! need be transformed. + if( .not.wantt ) then + i1 = l + i2 = i + end if + if( mod(kdefl,2*kexsh)==0 ) then + ! exceptional shift. + s = dat1*abs( real( h( i, i-1 ),KIND=dp) ) + t = s + h( i, i ) + else if( mod(kdefl,kexsh)==0 ) then + ! exceptional shift. + s = dat1*abs( real( h( l+1, l ),KIND=dp) ) + t = s + h( l, l ) + else + ! wilkinson's shift. + t = h( i, i ) + u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) ) + s = cabs1( u ) + if( s/=rzero ) then + x = half*( h( i-1, i-1 )-t ) + sx = cabs1( x ) + s = max( s, cabs1( x ) ) + y = s*sqrt( ( x / s )**2+( u / s )**2 ) + if( sx>rzero ) then + if( real( x / sx,KIND=dp)*real( y,KIND=dp)+aimag( x / sx )*aimag( y )& + m )call stdlib_zcopy( 2, h( k, k-1 ), 1, v, 1 ) + call stdlib_zlarfg( 2, v( 1 ), v( 2 ), 1, t1 ) + if( k>m ) then + h( k, k-1 ) = v( 1 ) + h( k+1, k-1 ) = czero + end if + v2 = v( 2 ) + t2 = real( t1*v2,KIND=dp) + ! apply g from the left to transform the rows of the matrix + ! in columns k to i2. + do j = k, i2 + sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j ) + h( k, j ) = h( k, j ) - sum + h( k+1, j ) = h( k+1, j ) - sum*v2 + end do + ! apply g from the right to transform the columns of the + ! matrix in rows i1 to min(k+2,i). + do j = i1, min( k+2, i ) + sum = t1*h( j, k ) + t2*h( j, k+1 ) + h( j, k ) = h( j, k ) - sum + h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 ) + end do + if( wantz ) then + ! accumulate transformations in the matrix z + do j = iloz, ihiz + sum = t1*z( j, k ) + t2*z( j, k+1 ) + z( j, k ) = z( j, k ) - sum + z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 ) + end do + end if + if( k==m .and. m>l ) then + ! if the qr step was started at row m > l because two + ! consecutive small subdiagonals were found, then extra + ! scaling must be performed to ensure that h(m,m-1) remains + ! real. + temp = cone - t1 + temp = temp / abs( temp ) + h( m+1, m ) = h( m+1, m )*conjg( temp ) + if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp + do j = m, i + if( j/=m+1 ) then + if( i2>j )call stdlib_zscal( i2-j, temp, h( j, j+1 ), ldh ) + call stdlib_zscal( j-i1, conjg( temp ), h( i1, j ), 1 ) + if( wantz ) then + call stdlib_zscal( nz, conjg( temp ), z( iloz, j ),1 ) + end if + end if + end do + end if + end do loop_120 + ! ensure that h(i,i-1) is real. + temp = h( i, i-1 ) + if( aimag( temp )/=rzero ) then + rtemp = abs( temp ) + h( i, i-1 ) = rtemp + temp = temp / rtemp + if( i2>i )call stdlib_zscal( i2-i, conjg( temp ), h( i, i+1 ), ldh ) + call stdlib_zscal( i-i1, temp, h( i1, i ), 1 ) + if( wantz ) then + call stdlib_zscal( nz, temp, z( iloz, i ), 1 ) + end if + end if + end do loop_130 + ! failure to converge in remaining number of iterations + info = i + return + 140 continue + ! h(i,i-1) is negligible: cone eigenvalue has converged. + w( i ) = h( i, i ) + ! reset deflation counter + kdefl = 0 + ! return to start of the main loop with new value of i. + i = l - 1 + go to 30 + 150 continue + return + end subroutine stdlib_zlahqr + + !> ZLAHR2: reduces the first NB columns of A complex general n-BY-(n-k+1) + !> matrix A so that elements below the k-th subdiagonal are zero. The + !> reduction is performed by an unitary similarity transformation + !> Q**H * A * Q. The routine returns the matrices V and T which determine + !> Q as a block reflector I - V*T*V**H, and also the matrix Y = A * V * T. + !> This is an auxiliary routine called by ZGEHRD. + + pure subroutine stdlib_zlahr2( n, k, nb, a, lda, tau, t, ldt, y, ldy ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: k, lda, ldt, ldy, n, nb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,nb), tau(nb), y(ldy,nb) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + complex(dp) :: ei + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! quick return if possible + if( n<=1 )return + loop_10: do i = 1, nb + if( i>1 ) then + ! update a(k+1:n,i) + ! update i-th column of a - y * v**h + call stdlib_zlacgv( i-1, a( k+i-1, 1 ), lda ) + call stdlib_zgemv( 'NO TRANSPOSE', n-k, i-1, -cone, y(k+1,1), ldy,a( k+i-1, 1 ), & + lda, cone, a( k+1, i ), 1 ) + call stdlib_zlacgv( i-1, a( k+i-1, 1 ), lda ) + ! apply i - v * t**h * v**h to this column (call it b) from the + ! left, using the last column of t as workspace + ! let v = ( v1 ) and b = ( b1 ) (first i-1 rows) + ! ( v2 ) ( b2 ) + ! where v1 is unit lower triangular + ! w := v1**h * b1 + call stdlib_zcopy( i-1, a( k+1, i ), 1, t( 1, nb ), 1 ) + call stdlib_ztrmv( 'LOWER', 'CONJUGATE TRANSPOSE', 'UNIT',i-1, a( k+1, 1 ),lda, & + t( 1, nb ), 1 ) + ! w := w + v2**h * b2 + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', n-k-i+1, i-1,cone, a( k+i, 1 ),lda, a( & + k+i, i ), 1, cone, t( 1, nb ), 1 ) + ! w := t**h * w + call stdlib_ztrmv( 'UPPER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, & + nb ), 1 ) + ! b2 := b2 - v2*w + call stdlib_zgemv( 'NO TRANSPOSE', n-k-i+1, i-1, -cone,a( k+i, 1 ),lda, t( 1, nb & + ), 1, cone, a( k+i, i ), 1 ) + ! b1 := b1 - v1*w + call stdlib_ztrmv( 'LOWER', 'NO TRANSPOSE','UNIT', i-1,a( k+1, 1 ), lda, t( 1, & + nb ), 1 ) + call stdlib_zaxpy( i-1, -cone, t( 1, nb ), 1, a( k+1, i ), 1 ) + a( k+i-1, i-1 ) = ei + end if + ! generate the elementary reflector h(i) to annihilate + ! a(k+i+1:n,i) + call stdlib_zlarfg( n-k-i+1, a( k+i, i ), a( min( k+i+1, n ), i ), 1,tau( i ) ) + + ei = a( k+i, i ) + a( k+i, i ) = cone + ! compute y(k+1:n,i) + call stdlib_zgemv( 'NO TRANSPOSE', n-k, n-k-i+1,cone, a( k+1, i+1 ),lda, a( k+i, i )& + , 1, czero, y( k+1, i ), 1 ) + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', n-k-i+1, i-1,cone, a( k+i, 1 ), lda,a( k+& + i, i ), 1, czero, t( 1, i ), 1 ) + call stdlib_zgemv( 'NO TRANSPOSE', n-k, i-1, -cone,y( k+1, 1 ), ldy,t( 1, i ), 1, & + cone, y( k+1, i ), 1 ) + call stdlib_zscal( n-k, tau( i ), y( k+1, i ), 1 ) + ! compute t(1:i,i) + call stdlib_zscal( i-1, -tau( i ), t( 1, i ), 1 ) + call stdlib_ztrmv( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',i-1, t, ldt,t( 1, i ), 1 ) + + t( i, i ) = tau( i ) + end do loop_10 + a( k+nb, nb ) = ei + ! compute y(1:k,1:nb) + call stdlib_zlacpy( 'ALL', k, nb, a( 1, 2 ), lda, y, ldy ) + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE','UNIT', k, nb,cone, a( k+1, 1 ), & + lda, y, ldy ) + if( n>k+nb )call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', k,nb, n-k-nb, cone,a( 1,& + 2+nb ), lda, a( k+1+nb, 1 ), lda, cone, y,ldy ) + call stdlib_ztrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE','NON-UNIT', k, nb,cone, t, ldt, y, & + ldy ) + return + end subroutine stdlib_zlahr2 + + !> ZLALS0: applies back the multiplying factors of either the left or the + !> right singular vector matrix of a diagonal matrix appended by a row + !> to the right hand side matrix B in solving the least squares problem + !> using the divide-and-conquer SVD approach. + !> For the left singular vector matrix, three types of orthogonal + !> matrices are involved: + !> (1L) Givens rotations: the number of such rotations is GIVPTR; the + !> pairs of columns/rows they were applied to are stored in GIVCOL; + !> and the C- and S-values of these rotations are stored in GIVNUM. + !> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + !> row, and for J=2:N, PERM(J)-th row of B is to be moved to the + !> J-th row. + !> (3L) The left singular vector matrix of the remaining matrix. + !> For the right singular vector matrix, four types of orthogonal + !> matrices are involved: + !> (1R) The right singular vector matrix of the remaining matrix. + !> (2R) If SQRE = 1, one extra Givens rotation to generate the right + !> null space. + !> (3R) The inverse transformation of (2L). + !> (4R) The inverse transformation of (1L). + + pure subroutine stdlib_zlals0( icompq, nl, nr, sqre, nrhs, b, ldb, bx, ldbx,perm, givptr, & + givcol, ldgcol, givnum, ldgnum,poles, difl, difr, z, k, c, s, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: givptr, icompq, k, ldb, ldbx, ldgcol, ldgnum, nl, nr, nrhs,& + sqre + integer(ilp), intent(out) :: info + real(dp), intent(in) :: c, s + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), perm(*) + real(dp), intent(in) :: difl(*), difr(ldgnum,*), givnum(ldgnum,*), poles(ldgnum,*), z(& + *) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: bx(ldbx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, jcol, jrow, m, n, nlp1 + real(dp) :: diflj, difrj, dj, dsigj, dsigjp, temp + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag,max + ! Executable Statements + ! test the input parameters. + info = 0 + n = nl + nr + 1 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( nl<1 ) then + info = -2 + else if( nr<1 ) then + info = -3 + else if( ( sqre<0 ) .or. ( sqre>1 ) ) then + info = -4 + else if( nrhs<1 ) then + info = -5 + else if( ldb ZLALSA: is an itermediate step in solving the least squares problem + !> by computing the SVD of the coefficient matrix in compact form (The + !> singular vectors are computed as products of simple orthorgonal + !> matrices.). + !> If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector + !> matrix of an upper bidiagonal matrix to the right hand side; and if + !> ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the + !> right hand side. The singular vector matrices were generated in + !> compact form by ZLALSA. + + pure subroutine stdlib_zlalsa( icompq, smlsiz, n, nrhs, b, ldb, bx, ldbx, u,ldu, vt, k, difl,& + difr, z, poles, givptr,givcol, ldgcol, perm, givnum, c, s, rwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: icompq, ldb, ldbx, ldgcol, ldu, n, nrhs, smlsiz + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(in) :: givcol(ldgcol,*), givptr(*), k(*), perm(ldgcol,*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(in) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), s(& + *), u(ldu,*), vt(ldu,*), z(ldu,*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: bx(ldbx,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, i1, ic, im1, inode, j, jcol, jimag, jreal, jrow, lf, ll, lvl, lvl2, & + nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqre + ! Intrinsic Functions + intrinsic :: real,cmplx,aimag + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( icompq<0 ) .or. ( icompq>1 ) ) then + info = -1 + else if( smlsiz<3 ) then + info = -2 + else if( n ZLALSD: uses the singular value decomposition of A to solve the least + !> squares problem of finding X to minimize the Euclidean norm of each + !> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + !> are N-by-NRHS. The solution X overwrites B. + !> The singular values of A smaller than RCOND times the largest + !> singular value are treated as zero in solving the least squares + !> problem; in this case a minimum norm solution is returned. + !> The actual singular values are returned in D in ascending order. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_zlalsd( uplo, smlsiz, n, nrhs, d, e, b, ldb, rcond,rank, work, rwork, & + iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: ldb, n, nrhs, smlsiz + real(dp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: bx, bxst, c, difl, difr, givcol, givnum, givptr, i, icmpq1, icmpq2, & + irwb, irwib, irwrb, irwu, irwvt, irwwrk, iwk, j, jcol, jimag, jreal, jrow, k, nlvl, & + nm1, nrwork, nsize, nsub, perm, poles, s, sizei, smlszp, sqre, st, st1, u, vt, & + z + real(dp) :: cs, eps, orgnrm, rcnd, r, sn, tol + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,int,log,sign + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -3 + else if( nrhs<1 ) then + info = -4 + else if( ( ldb<1 ) .or. ( ldb=one ) ) then + rcnd = eps + else + rcnd = rcond + end if + rank = 0 + ! quick return if possible. + if( n==0 ) then + return + else if( n==1 ) then + if( d( 1 )==zero ) then + call stdlib_zlaset( 'A', 1, nrhs, czero, czero, b, ldb ) + else + rank = 1 + call stdlib_zlascl( 'G', 0, 0, d( 1 ), one, 1, nrhs, b, ldb, info ) + d( 1 ) = abs( d( 1 ) ) + end if + return + end if + ! rotate the matrix if it is lower bidiagonal. + if( uplo=='L' ) then + do i = 1, n - 1 + call stdlib_dlartg( d( i ), e( i ), cs, sn, r ) + d( i ) = r + e( i ) = sn*d( i+1 ) + d( i+1 ) = cs*d( i+1 ) + if( nrhs==1 ) then + call stdlib_zdrot( 1, b( i, 1 ), 1, b( i+1, 1 ), 1, cs, sn ) + else + rwork( i*2-1 ) = cs + rwork( i*2 ) = sn + end if + end do + if( nrhs>1 ) then + do i = 1, nrhs + do j = 1, n - 1 + cs = rwork( j*2-1 ) + sn = rwork( j*2 ) + call stdlib_zdrot( 1, b( j, i ), 1, b( j+1, i ), 1, cs, sn ) + end do + end do + end if + end if + ! scale. + nm1 = n - 1 + orgnrm = stdlib_dlanst( 'M', n, d, e ) + if( orgnrm==zero ) then + call stdlib_zlaset( 'A', n, nrhs, czero, czero, b, ldb ) + return + end if + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, n, 1, d, n, info ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, nm1, 1, e, nm1, info ) + ! if n is smaller than the minimum divide size smlsiz, then solve + ! the problem with another solver. + if( n<=smlsiz ) then + irwu = 1 + irwvt = irwu + n*n + irwwrk = irwvt + n*n + irwrb = irwwrk + irwib = irwrb + n*nrhs + irwb = irwib + n*nrhs + call stdlib_dlaset( 'A', n, n, zero, one, rwork( irwu ), n ) + call stdlib_dlaset( 'A', n, n, zero, one, rwork( irwvt ), n ) + call stdlib_dlasdq( 'U', 0, n, n, n, 0, d, e, rwork( irwvt ), n,rwork( irwu ), n, & + rwork( irwwrk ), 1,rwork( irwwrk ), info ) + if( info/=0 ) then + return + end if + ! in the real version, b is passed to stdlib_dlasdq and multiplied + ! internally by q**h. here b is complex and that product is + ! computed below in two steps (real and imaginary parts). + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=dp) + end do + end do + call stdlib_dgemm( 'T', 'N', n, nrhs, n, one, rwork( irwu ), n,rwork( irwb ), n, & + zero, rwork( irwrb ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_dgemm( 'T', 'N', n, nrhs, n, one, rwork( irwu ), n,rwork( irwb ), n, & + zero, rwork( irwib ), n ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = 1, n + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=dp) + end do + end do + tol = rcnd*abs( d( stdlib_idamax( n, d, 1 ) ) ) + do i = 1, n + if( d( i )<=tol ) then + call stdlib_zlaset( 'A', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + else + call stdlib_zlascl( 'G', 0, 0, d( i ), one, 1, nrhs, b( i, 1 ),ldb, info ) + + rank = rank + 1 + end if + end do + ! since b is complex, the following call to stdlib_dgemm is performed + ! in two steps (real and imaginary parts). that is for v * b + ! (in the real version of the code v**h is stored in work). + ! call stdlib_dgemm( 't', 'n', n, nrhs, n, one, work, n, b, ldb, zero, + ! $ work( nwork ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=dp) + end do + end do + call stdlib_dgemm( 'T', 'N', n, nrhs, n, one, rwork( irwvt ), n,rwork( irwb ), n, & + zero, rwork( irwrb ), n ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = 1, n + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_dgemm( 'T', 'N', n, nrhs, n, one, rwork( irwvt ), n,rwork( irwb ), n, & + zero, rwork( irwib ), n ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = 1, n + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=dp) + end do + end do + ! unscale. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_dlasrt( 'D', n, d, info ) + call stdlib_zlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end if + ! book-keeping and setting up some constants. + nlvl = int( log( real( n,KIND=dp) / real( smlsiz+1,KIND=dp) ) / log( two ),KIND=ilp) + & + 1 + smlszp = smlsiz + 1 + u = 1 + vt = 1 + smlsiz*n + difl = vt + smlszp*n + difr = difl + nlvl*n + z = difr + nlvl*n*2 + c = z + nlvl*n + s = c + n + poles = s + n + givnum = poles + 2*nlvl*n + nrwork = givnum + 2*nlvl*n + bx = 1 + irwrb = nrwork + irwib = irwrb + smlsiz*nrhs + irwb = irwib + smlsiz*nrhs + sizei = 1 + n + k = sizei + n + givptr = k + n + perm = givptr + n + givcol = perm + nlvl*n + iwk = givcol + nlvl*n*2 + st = 1 + sqre = 0 + icmpq1 = 1 + icmpq2 = 0 + nsub = 0 + do i = 1, n + if( abs( d( i ) )=eps ) then + ! a subproblem with e(nm1) not too small but i = nm1. + nsize = n - st + 1 + iwork( sizei+nsub-1 ) = nsize + else + ! a subproblem with e(nm1) small. this implies an + ! 1-by-1 subproblem at d(n), which is not solved + ! explicitly. + nsize = i - st + 1 + iwork( sizei+nsub-1 ) = nsize + nsub = nsub + 1 + iwork( nsub ) = n + iwork( sizei+nsub-1 ) = 1 + call stdlib_zcopy( nrhs, b( n, 1 ), ldb, work( bx+nm1 ), n ) + end if + st1 = st - 1 + if( nsize==1 ) then + ! this is a 1-by-1 subproblem and is not solved + ! explicitly. + call stdlib_zcopy( nrhs, b( st, 1 ), ldb, work( bx+st1 ), n ) + else if( nsize<=smlsiz ) then + ! this is a small subproblem and is solved by stdlib_dlasdq. + call stdlib_dlaset( 'A', nsize, nsize, zero, one,rwork( vt+st1 ), n ) + call stdlib_dlaset( 'A', nsize, nsize, zero, one,rwork( u+st1 ), n ) + call stdlib_dlasdq( 'U', 0, nsize, nsize, nsize, 0, d( st ),e( st ), rwork( & + vt+st1 ), n, rwork( u+st1 ),n, rwork( nrwork ), 1, rwork( nrwork ),info ) + + if( info/=0 ) then + return + end if + ! in the real version, b is passed to stdlib_dlasdq and multiplied + ! internally by q**h. here b is complex and that product is + ! computed below in two steps (real and imaginary parts). + j = irwb - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + j = j + 1 + rwork( j ) = real( b( jrow, jcol ),KIND=dp) + end do + end do + call stdlib_dgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( u+st1 ), n, rwork(& + irwb ), nsize,zero, rwork( irwrb ), nsize ) + j = irwb - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + j = j + 1 + rwork( j ) = aimag( b( jrow, jcol ) ) + end do + end do + call stdlib_dgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( u+st1 ), n, rwork(& + irwb ), nsize,zero, rwork( irwib ), nsize ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=dp) + end do + end do + call stdlib_zlacpy( 'A', nsize, nrhs, b( st, 1 ), ldb,work( bx+st1 ), n ) + + else + ! a large problem. solve it using divide and conquer. + call stdlib_dlasda( icmpq1, smlsiz, nsize, sqre, d( st ),e( st ), rwork( u+& + st1 ), n, rwork( vt+st1 ),iwork( k+st1 ), rwork( difl+st1 ),rwork( difr+st1 ),& + rwork( z+st1 ),rwork( poles+st1 ), iwork( givptr+st1 ),iwork( givcol+st1 ), & + n, iwork( perm+st1 ),rwork( givnum+st1 ), rwork( c+st1 ),rwork( s+st1 ), & + rwork( nrwork ),iwork( iwk ), info ) + if( info/=0 ) then + return + end if + bxst = bx + st1 + call stdlib_zlalsa( icmpq2, smlsiz, nsize, nrhs, b( st, 1 ),ldb, work( bxst ),& + n, rwork( u+st1 ), n,rwork( vt+st1 ), iwork( k+st1 ),rwork( difl+st1 ), & + rwork( difr+st1 ),rwork( z+st1 ), rwork( poles+st1 ),iwork( givptr+st1 ), & + iwork( givcol+st1 ), n,iwork( perm+st1 ), rwork( givnum+st1 ),rwork( c+st1 ),& + rwork( s+st1 ),rwork( nrwork ), iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + st = i + 1 + end if + end do loop_240 + ! apply the singular values and treat the tiny ones as zero. + tol = rcnd*abs( d( stdlib_idamax( n, d, 1 ) ) ) + do i = 1, n + ! some of the elements in d can be negative because 1-by-1 + ! subproblems were not solved explicitly. + if( abs( d( i ) )<=tol ) then + call stdlib_zlaset( 'A', 1, nrhs, czero, czero, work( bx+i-1 ), n ) + else + rank = rank + 1 + call stdlib_zlascl( 'G', 0, 0, d( i ), one, 1, nrhs,work( bx+i-1 ), n, info ) + + end if + d( i ) = abs( d( i ) ) + end do + ! now apply back the right singular vectors. + icmpq2 = 1 + loop_320: do i = 1, nsub + st = iwork( i ) + st1 = st - 1 + nsize = iwork( sizei+i-1 ) + bxst = bx + st1 + if( nsize==1 ) then + call stdlib_zcopy( nrhs, work( bxst ), n, b( st, 1 ), ldb ) + else if( nsize<=smlsiz ) then + ! since b and bx are complex, the following call to stdlib_dgemm + ! is performed in two steps (real and imaginary parts). + ! call stdlib_dgemm( 't', 'n', nsize, nrhs, nsize, one, + ! $ rwork( vt+st1 ), n, rwork( bxst ), n, zero, + ! $ b( st, 1 ), ldb ) + j = bxst - n - 1 + jreal = irwb - 1 + do jcol = 1, nrhs + j = j + n + do jrow = 1, nsize + jreal = jreal + 1 + rwork( jreal ) = real( work( j+jrow ),KIND=dp) + end do + end do + call stdlib_dgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( vt+st1 ), n, rwork( & + irwb ), nsize, zero,rwork( irwrb ), nsize ) + j = bxst - n - 1 + jimag = irwb - 1 + do jcol = 1, nrhs + j = j + n + do jrow = 1, nsize + jimag = jimag + 1 + rwork( jimag ) = aimag( work( j+jrow ) ) + end do + end do + call stdlib_dgemm( 'T', 'N', nsize, nrhs, nsize, one,rwork( vt+st1 ), n, rwork( & + irwb ), nsize, zero,rwork( irwib ), nsize ) + jreal = irwrb - 1 + jimag = irwib - 1 + do jcol = 1, nrhs + do jrow = st, st + nsize - 1 + jreal = jreal + 1 + jimag = jimag + 1 + b( jrow, jcol ) = cmplx( rwork( jreal ),rwork( jimag ),KIND=dp) + end do + end do + else + call stdlib_zlalsa( icmpq2, smlsiz, nsize, nrhs, work( bxst ), n,b( st, 1 ), ldb,& + rwork( u+st1 ), n,rwork( vt+st1 ), iwork( k+st1 ),rwork( difl+st1 ), rwork( & + difr+st1 ),rwork( z+st1 ), rwork( poles+st1 ),iwork( givptr+st1 ), iwork( & + givcol+st1 ), n,iwork( perm+st1 ), rwork( givnum+st1 ),rwork( c+st1 ), rwork( s+& + st1 ),rwork( nrwork ), iwork( iwk ), info ) + if( info/=0 ) then + return + end if + end if + end do loop_320 + ! unscale and sort the singular values. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, n, 1, d, n, info ) + call stdlib_dlasrt( 'D', n, d, info ) + call stdlib_zlascl( 'G', 0, 0, orgnrm, one, n, nrhs, b, ldb, info ) + return + end subroutine stdlib_zlalsd + + !> ZLANGB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. + + real(dp) function stdlib_zlangb( norm, n, kl, ku, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: kl, ku, ldab, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k, l + real(dp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + temp = abs( ab( i, j ) ) + if( value ZLANGE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex matrix A. + + real(dp) function stdlib_zlange( norm, m, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: scale, sum, value, temp + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, m + temp = abs( a( i, j ) ) + if( value ZLANGT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex tridiagonal matrix A. + + pure real(dp) function stdlib_zlangt( norm, n, dl, d, du ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(in) :: d(*), dl(*), du(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: anorm, scale, sum, temp + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + if( anorm1 ) then + call stdlib_zlassq( n-1, dl, 1, scale, sum ) + call stdlib_zlassq( n-1, du, 1, scale, sum ) + end if + anorm = scale*sqrt( sum ) + end if + stdlib_zlangt = anorm + return + end function stdlib_zlangt + + !> ZLANHB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n hermitian band matrix A, with k super-diagonals. + + real(dp) function stdlib_zlanhb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + sum = abs( real( ab( k+1, j ),KIND=dp) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + sum = abs( real( ab( 1, j ),KIND=dp) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( real( ab( k+1, j ),KIND=dp) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( ab( 1, j ),KIND=dp) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_zlassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_zlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + do j = 1, n + if( real( ab( l, j ),KIND=dp)/=zero ) then + absa = abs( real( ab( l, j ),KIND=dp) ) + if( scale ZLANHE: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A. + + real(dp) function stdlib_zlanhe( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j - 1 + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + sum = abs( real( a( j, j ),KIND=dp) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + sum = abs( real( a( j, j ),KIND=dp) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + do i = j + 1, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( real( a( j, j ),KIND=dp) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( a( j, j ),KIND=dp) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_zlassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_zlassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + do i = 1, n + if( real( a( i, i ),KIND=dp)/=zero ) then + absa = abs( real( a( i, i ),KIND=dp) ) + if( scale ZLANHF: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian matrix A in RFP format. + + real(dp) function stdlib_zlanhf( norm, transr, uplo, n, a, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, transr, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(out) :: work(0:*) + complex(dp), intent(in) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, ifm, ilu, noe, n1, k, l, lda + real(dp) :: scale, s, value, aa, temp + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + stdlib_zlanhf = zero + return + else if( n==1 ) then + stdlib_zlanhf = abs(real(a(0),KIND=dp)) + return + end if + ! set noe = 1 if n is odd. if n is even set noe=0 + noe = 1 + if( mod( n, 2 )==0 )noe = 0 + ! set ifm = 0 when form='c' or 'c' and 1 otherwise + ifm = 1 + if( stdlib_lsame( transr, 'C' ) )ifm = 0 + ! set ilu = 0 when uplo='u or 'u' and 1 otherwise + ilu = 1 + if( stdlib_lsame( uplo, 'U' ) )ilu = 0 + ! set lda = (n+1)/2 when ifm = 0 + ! set lda = n when ifm = 1 and noe = 1 + ! set lda = n+1 when ifm = 1 and noe = 0 + if( ifm==1 ) then + if( noe==1 ) then + lda = n + else + ! noe=0 + lda = n + 1 + end if + else + ! ifm=0 + lda = ( n+1 ) / 2 + end if + if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = ( n+1 ) / 2 + value = zero + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is n by k + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(0,0) + temp = abs( real( a( j+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = j - 1 + ! l(k+j,k+j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = j + ! -> l(j,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = j + 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k + j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = k + j - 1 + ! -> u(i,i) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = i + 1 + ! =k+j; i -> u(j,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = k + j + 1, n - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + do i = 0, n - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + ! j=k-1 + end do + ! i=n-1 -> u(n-1,n-1) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end if + else + ! xpose case; a is k by n + if( ilu==1 ) then + ! uplo ='l' + do j = 0, k - 2 + do i = 0, j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = j + ! l(i,i) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = j + 1 + ! l(j+k,j+k) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = j + 2, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + j = k - 1 + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = k - 1 + ! -> l(i,i) is at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do j = k, n - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + j = k - 1 + ! -> u(j,j) is at a(0,j) + temp = abs( real( a( 0+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + do j = k, n - 1 + do i = 0, j - k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = j - k + ! -> u(i,i) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = j - k + 1 + ! u(j,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = j - k + 2, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + end if + end if + else + ! n is even + if( ifm==1 ) then + ! a is n+1 by k + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(k,k) + temp = abs( real( a( j+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + temp = abs( real( a( j+1+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = j + ! l(k+j,k+j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = j + 1 + ! -> l(j,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = j + 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 2 + do i = 0, k + j - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = k + j + ! -> u(i,i) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = i + 1 + ! =k+j+1; i -> u(j,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = k + j + 2, n + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + do i = 0, n - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + ! j=k-1 + end do + ! i=n-1 -> u(n-1,n-1) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = n + ! -> u(k-1,k-1) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end if + else + ! xpose case; a is k by n+1 + if( ilu==1 ) then + ! uplo ='l' + j = 0 + ! -> l(k,k) at a(0,0) + temp = abs( real( a( j+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + do j = 1, k - 1 + do i = 0, j - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = j - 1 + ! l(i,i) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = j + ! l(j+k,j+k) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = j + 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + j = k + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = k - 1 + ! -> l(i,i) is at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do j = k + 1, n + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + else + ! uplo = 'u' + do j = 0, k - 1 + do i = 0, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + j = k + ! -> u(j,j) is at a(0,j) + temp = abs( real( a( 0+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + do j = k + 1, n - 1 + do i = 0, j - k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = j - k - 1 + ! -> u(i,i) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + i = j - k + ! u(j,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + do i = j - k + 1, k - 1 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end do + j = n + do i = 0, k - 2 + temp = abs( a( i+j*lda ) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + i = k - 1 + ! u(k,k) at a(i,j) + temp = abs( real( a( i+j*lda ),KIND=dp) ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end if + end if + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + if( ifm==1 ) then + ! a is 'n' + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + ! uplo = 'u' + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + if( i==k+k )go to 10 + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + 10 continue + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu = 1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + if( j>0 ) then + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + end if + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + ! uplo = 'u' + do i = 0, k - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k + j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(i,j+k) + s = s + aa + work( i ) = work( i ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j+k,j+k) + work( j+k ) = s + aa + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j,j) + work( j ) = work( j ) + aa + s = zero + do l = j + 1, k - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu = 1 + do i = k, n - 1 + work( i ) = zero + end do + do j = k - 1, 0, -1 + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! -> a(j+k,i+k) + s = s + aa + work( i+k ) = work( i+k ) + aa + end do + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j+k,j+k) + s = s + aa + work( i+k ) = work( i+k ) + s + ! i=j + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! -> a(j,j) + work( j ) = aa + s = zero + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! -> a(l,j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + end if + else + ! ifm=0 + k = n / 2 + if( noe==1 ) then + ! n is odd + if( ilu==0 ) then + ! uplo = 'u' + n1 = k + ! n/2 + k = k + 1 + ! k is the row size and lda + do i = n1, n - 1 + work( i ) = zero + end do + do j = 0, n1 - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,n1+i) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=n1=k-1 is special + s = abs( real( a( 0+j*lda ),KIND=dp) ) + ! a(k-1,k-1) + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k-1,i+n1) + work( i+n1 ) = work( i+n1 ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k, n - 1 + s = zero + do i = 0, j - k - 1 + aa = abs( a( i+j*lda ) ) + ! a(i,j-k) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-k + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(j-k,j-k) + s = s + aa + work( j-k ) = work( j-k ) + s + i = i + 1 + s = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(j,j) + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu=1 + k = k + 1 + ! k=(n+1)/2 for n odd and ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 2 + ! process + s = zero + do i = 0, j - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! i=j so process of a(j,j) + s = s + aa + work( j ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( real( a( i+j*lda ),KIND=dp) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k-1 is special :process col a(k-1,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k, n - 1 + ! process col j of a = a(j,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + else + ! n is even + if( ilu==0 ) then + ! uplo = 'u' + do i = k, n - 1 + work( i ) = zero + end do + do j = 0, k - 1 + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j,i+k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = s + end do + ! j=k + aa = abs( real( a( 0+j*lda ),KIND=dp) ) + ! a(k,k) + s = aa + do i = 1, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(k,k+i) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + do j = k + 1, n - 1 + s = zero + do i = 0, j - 2 - k + aa = abs( a( i+j*lda ) ) + ! a(i,j-k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=j-1-k + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(j-k-1,j-k-1) + s = s + aa + work( j-k-1 ) = work( j-k-1 ) + s + i = i + 1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(j,j) + s = aa + do l = j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(j,l) + work( l ) = work( l ) + aa + s = s + aa + end do + work( j ) = work( j ) + s + end do + ! j=n + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(i,k-1) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = work( i ) + s + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + else + ! ilu=1 + do i = k, n - 1 + work( i ) = zero + end do + ! j=0 is special :process col a(k:n-1,k) + s = abs( real( a( 0 ),KIND=dp) ) + ! a(k,k) + do i = 1, k - 1 + aa = abs( a( i ) ) + ! a(k+i,k) + work( i+k ) = work( i+k ) + aa + s = s + aa + end do + work( k ) = work( k ) + s + do j = 1, k - 1 + ! process + s = zero + do i = 0, j - 2 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! i=j-1 so process of a(j-1,j-1) + s = s + aa + work( j-1 ) = s + ! is initialised here + i = i + 1 + ! i=j process a(j+k,j+k) + aa = abs( real( a( i+j*lda ),KIND=dp) ) + s = aa + do l = k + j + 1, n - 1 + i = i + 1 + aa = abs( a( i+j*lda ) ) + ! a(l,k+j) + s = s + aa + work( l ) = work( l ) + aa + end do + work( k+j ) = work( k+j ) + s + end do + ! j=k is special :process col a(k,0:k-1) + s = zero + do i = 0, k - 2 + aa = abs( a( i+j*lda ) ) + ! a(k,i) + work( i ) = work( i ) + aa + s = s + aa + end do + ! i=k-1 + aa = abs( real( a( i+j*lda ),KIND=dp) ) + ! a(k-1,k-1) + s = s + aa + work( i ) = s + ! done with col j=k+1 + do j = k + 1, n + ! process col j-1 of a = a(j-1,0:k-1) + s = zero + do i = 0, k - 1 + aa = abs( a( i+j*lda ) ) + ! a(j-1,i) + work( i ) = work( i ) + aa + s = s + aa + end do + work( j-1 ) = work( j-1 ) + s + end do + value = work( 0 ) + do i = 1, n-1 + temp = work( i ) + if( value < temp .or. stdlib_disnan( temp ) )value = temp + end do + end if + end if + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + k = ( n+1 ) / 2 + scale = zero + s = one + if( noe==1 ) then + ! n is odd + if( ifm==1 ) then + ! a is normal + if( ilu==0 ) then + ! a is upper + do j = 0, k - 3 + call stdlib_zlassq( k-j-2, a( k+j+1+j*lda ), 1, scale, s ) + ! l at a(k,0) + end do + do j = 0, k - 1 + call stdlib_zlassq( k+j-1, a( 0+j*lda ), 1, scale, s ) + ! trap u at a(0,0) + end do + s = s + s + ! double s for the off diagonal elements + l = k - 1 + ! -> u(k,k) at a(k-1,0) + do i = 0, k - 2 + aa = real( a( l ),KIND=dp) + ! u(k+i,k+i) + if( aa/=zero ) then + if( scale l(k,k) at a(0,1) + do i = 1, k - 1 + aa = real( a( l ),KIND=dp) + ! l(k-1+i,k-1+i) + if( aa/=zero ) then + if( scale u(k-1,k-1) at a(0,k-1) + aa = real( a( l ),KIND=dp) + ! u(k-1,k-1) + if( aa/=zero ) then + if( scale u(0,0) at a(0,k) + do j = k, n - 1 + aa = real( a( l ),KIND=dp) + ! -> u(j-k,j-k) + if( aa/=zero ) then + if( scale u(j,j) + if( aa/=zero ) then + if( scale l(0,0) at a(0,0) + do i = 0, k - 2 + aa = real( a( l ),KIND=dp) + ! l(i,i) + if( aa/=zero ) then + if( scale k-1 + (k-1)*lda or l(k-1,k-1) at a(k-1,k-1) + aa = real( a( l ),KIND=dp) + ! l(k-1,k-1) at a(k-1,k-1) + if( aa/=zero ) then + if( scale u(k,k) at a(k,0) + do i = 0, k - 1 + aa = real( a( l ),KIND=dp) + ! u(k+i,k+i) + if( aa/=zero ) then + if( scale l(k,k) at a(0,0) + do i = 0, k - 1 + aa = real( a( l ),KIND=dp) + ! l(k-1+i,k-1+i) + if( aa/=zero ) then + if( scale u(k,k) at a(0,k) + aa = real( a( l ),KIND=dp) + ! u(k,k) + if( aa/=zero ) then + if( scale u(0,0) at a(0,k+1) + do j = k + 1, n - 1 + aa = real( a( l ),KIND=dp) + ! -> u(j-k-1,j-k-1) + if( aa/=zero ) then + if( scale u(j,j) + if( aa/=zero ) then + if( scale u(k-1,k-1) at a(k-1,n) + aa = real( a( l ),KIND=dp) + ! u(k,k) + if( aa/=zero ) then + if( scale l(k,k) at a(0,0) + aa = real( a( l ),KIND=dp) + ! l(k,k) at a(0,0) + if( aa/=zero ) then + if( scale l(0,0) at a(0,1) + do i = 0, k - 2 + aa = real( a( l ),KIND=dp) + ! l(i,i) + if( aa/=zero ) then + if( scale k - 1 + k*lda or l(k-1,k-1) at a(k-1,k) + aa = real( a( l ),KIND=dp) + ! l(k-1,k-1) at a(k-1,k) + if( aa/=zero ) then + if( scale ZLANHP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex hermitian matrix A, supplied in packed form. + + real(dp) function stdlib_zlanhp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 0 + do j = 1, n + do i = k + 1, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + sum = abs( real( ap( k ),KIND=dp) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + k = 1 + do j = 1, n + sum = abs( real( ap( k ),KIND=dp) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is hermitian). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( real( ap( k ),KIND=dp) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( real( ap( k ),KIND=dp) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_zlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_zlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( real( ap( k ),KIND=dp)/=zero ) then + absa = abs( real( ap( k ),KIND=dp) ) + if( scale ZLANHS: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> Hessenberg matrix A. + + real(dp) function stdlib_zlanhs( norm, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + do j = 1, n + do i = 1, min( n, j+1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + do j = 1, n + sum = zero + do i = 1, min( n, j+1 ) + sum = sum + abs( a( i, j ) ) + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, min( n, j+1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + do j = 1, n + call stdlib_zlassq( min( n, j+1 ), a( 1, j ), 1, scale, sum ) + end do + value = scale*sqrt( sum ) + end if + stdlib_zlanhs = value + return + end function stdlib_zlanhs + + !> ZLANHT: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex Hermitian tridiagonal matrix A. + + pure real(dp) function stdlib_zlanht( norm, n, d, e ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(in) :: d(*) + complex(dp), intent(in) :: e(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i + real(dp) :: anorm, scale, sum + ! Intrinsic Functions + intrinsic :: abs,max,sqrt + ! Executable Statements + if( n<=0 ) then + anorm = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + anorm = abs( d( n ) ) + do i = 1, n - 1 + sum = abs( d( i ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + sum = abs( e( i ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + end do + else if( stdlib_lsame( norm, 'O' ) .or. norm=='1' .or.stdlib_lsame( norm, 'I' ) ) & + then + ! find norm1(a). + if( n==1 ) then + anorm = abs( d( 1 ) ) + else + anorm = abs( d( 1 ) )+abs( e( 1 ) ) + sum = abs( e( n-1 ) )+abs( d( n ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + do i = 2, n - 1 + sum = abs( d( i ) )+abs( e( i ) )+abs( e( i-1 ) ) + if( anorm < sum .or. stdlib_disnan( sum ) ) anorm = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( n>1 ) then + call stdlib_zlassq( n-1, e, 1, scale, sum ) + sum = 2*sum + end if + call stdlib_dlassq( n, d, 1, scale, sum ) + anorm = scale*sqrt( sum ) + end if + stdlib_zlanht = anorm + return + end function stdlib_zlanht + + !> ZLANSB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n symmetric band matrix A, with k super-diagonals. + + real(dp) function stdlib_zlansb( norm, uplo, n, k, ab, ldab,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, l + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( ab( k+1, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ab( 1, j ) ) + l = 1 - j + do i = j + 1, min( n, j+k ) + absa = abs( ab( l+i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( k>0 ) then + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_zlassq( min( j-1, k ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + l = k + 1 + else + do j = 1, n - 1 + call stdlib_zlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + l = 1 + end if + sum = 2*sum + else + l = 1 + end if + call stdlib_zlassq( n, ab( l, 1 ), ldab, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_zlansb = value + return + end function stdlib_zlansb + + !> ZLANSP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A, supplied in packed form. + + real(dp) function stdlib_zlansp( norm, uplo, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j, k + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,real,aimag,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + k = 1 + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + end do + else + k = 1 + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + work( j ) = sum + abs( ap( k ) ) + k = k + 1 + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( ap( k ) ) + k = k + 1 + do i = j + 1, n + absa = abs( ap( k ) ) + sum = sum + absa + work( i ) = work( i ) + absa + k = k + 1 + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + k = 2 + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_zlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + do j = 1, n - 1 + call stdlib_zlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + sum = 2*sum + k = 1 + do i = 1, n + if( real( ap( k ),KIND=dp)/=zero ) then + absa = abs( real( ap( k ),KIND=dp) ) + if( scale ZLANSY: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> complex symmetric matrix A. + + real(dp) function stdlib_zlansy( norm, uplo, n, a, lda, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm, uplo + integer(ilp), intent(in) :: lda, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, j + real(dp) :: absa, scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, j + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, n + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else if( ( stdlib_lsame( norm, 'I' ) ) .or. ( stdlib_lsame( norm, 'O' ) ) .or.( & + norm=='1' ) ) then + ! find normi(a) ( = norm1(a), since a is symmetric). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + sum = zero + do i = 1, j - 1 + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + work( j ) = sum + abs( a( j, j ) ) + end do + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + sum = work( j ) + abs( a( j, j ) ) + do i = j + 1, n + absa = abs( a( i, j ) ) + sum = sum + absa + work( i ) = work( i ) + absa + end do + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + scale = zero + sum = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 2, n + call stdlib_zlassq( j-1, a( 1, j ), 1, scale, sum ) + end do + else + do j = 1, n - 1 + call stdlib_zlassq( n-j, a( j+1, j ), 1, scale, sum ) + end do + end if + sum = 2*sum + call stdlib_zlassq( n, a, lda+1, scale, sum ) + value = scale*sqrt( sum ) + end if + stdlib_zlansy = value + return + end function stdlib_zlansy + + !> ZLANTB: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of an + !> n by n triangular band matrix A, with ( k + 1 ) diagonals. + + real(dp) function stdlib_zlantb( norm, uplo, diag, n, k, ab,ldab, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: k, ldab, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ab(ldab,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, l + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,max,min,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 2, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = max( k+2-j, 1 ), k + 1 + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = 1, min( n+1-j, k+1 ) + sum = abs( ab( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = max( k+2-j, 1 ), k + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = max( k+2-j, 1 ), k + 1 + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = 2, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + else + sum = zero + do i = 1, min( n+1-j, k+1 ) + sum = sum + abs( ab( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j - 1 + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = k + 1 - j + do i = max( 1, j-k ), j + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + l = 1 - j + do i = j + 1, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + l = 1 - j + do i = j, min( n, j+k ) + work( i ) = work( i ) + abs( ab( l+i, j ) ) + end do + end do + end if + end if + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 2, n + call stdlib_zlassq( min( j-1, k ),ab( max( k+2-j, 1 ), j ), 1, scale,& + sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_zlassq( min( j, k+1 ), ab( max( k+2-j, 1 ), j ),1, scale, sum ) + + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + if( k>0 ) then + do j = 1, n - 1 + call stdlib_zlassq( min( n-j, k ), ab( 2, j ), 1, scale,sum ) + end do + end if + else + scale = zero + sum = one + do j = 1, n + call stdlib_zlassq( min( n-j+1, k+1 ), ab( 1, j ), 1, scale,sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_zlantb = value + return + end function stdlib_zlantb + + !> ZLANTP: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> triangular matrix A, supplied in packed form. + + real(dp) function stdlib_zlantp( norm, uplo, diag, n, ap, work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j, k + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,sqrt + ! Executable Statements + if( n==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + k = 1 + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 2 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k + 1, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = k, k + j - 1 + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + j + end do + else + do j = 1, n + do i = k, k + n - j + sum = abs( ap( i ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + k = k + n - j + 1 + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + k = 1 + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( udiag ) then + sum = one + do i = k, k + j - 2 + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + j - 1 + sum = sum + abs( ap( i ) ) + end do + end if + k = k + j + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = k + 1, k + n - j + sum = sum + abs( ap( i ) ) + end do + else + sum = zero + do i = k, k + n - j + sum = sum + abs( ap( i ) ) + end do + end if + k = k + n - j + 1 + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + k = 1 + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + do i = 1, j - 1 + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + k = k + 1 + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = 1, j + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, n + work( i ) = one + end do + do j = 1, n + k = k + 1 + do i = j + 1, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + else + do i = 1, n + work( i ) = zero + end do + do j = 1, n + do i = j, n + work( i ) = work( i ) + abs( ap( k ) ) + k = k + 1 + end do + end do + end if + end if + value = zero + do i = 1, n + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 2, n + call stdlib_zlassq( j-1, ap( k ), 1, scale, sum ) + k = k + j + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_zlassq( j, ap( k ), 1, scale, sum ) + k = k + j + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = n + k = 2 + do j = 1, n - 1 + call stdlib_zlassq( n-j, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + else + scale = zero + sum = one + k = 1 + do j = 1, n + call stdlib_zlassq( n-j+1, ap( k ), 1, scale, sum ) + k = k + n - j + 1 + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_zlantp = value + return + end function stdlib_zlantp + + !> ZLANTR: returns the value of the one norm, or the Frobenius norm, or + !> the infinity norm, or the element of largest absolute value of a + !> trapezoidal or triangular matrix A. + + real(dp) function stdlib_zlantr( norm, uplo, diag, m, n, a, lda,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + real(dp), intent(out) :: work(*) + complex(dp), intent(in) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: udiag + integer(ilp) :: i, j + real(dp) :: scale, sum, value + ! Intrinsic Functions + intrinsic :: abs,min,sqrt + ! Executable Statements + if( min( m, n )==0 ) then + value = zero + else if( stdlib_lsame( norm, 'M' ) ) then + ! find max(abs(a(i,j))). + if( stdlib_lsame( diag, 'U' ) ) then + value = one + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j-1 ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j + 1, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + else + value = zero + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + do i = 1, min( m, j ) + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + else + do j = 1, n + do i = j, m + sum = abs( a( i, j ) ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end do + end if + end if + else if( ( stdlib_lsame( norm, 'O' ) ) .or. ( norm=='1' ) ) then + ! find norm1(a). + value = zero + udiag = stdlib_lsame( diag, 'U' ) + if( stdlib_lsame( uplo, 'U' ) ) then + do j = 1, n + if( ( udiag ) .and. ( j<=m ) ) then + sum = one + do i = 1, j - 1 + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = 1, min( m, j ) + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else + do j = 1, n + if( udiag ) then + sum = one + do i = j + 1, m + sum = sum + abs( a( i, j ) ) + end do + else + sum = zero + do i = j, m + sum = sum + abs( a( i, j ) ) + end do + end if + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + end if + else if( stdlib_lsame( norm, 'I' ) ) then + ! find normi(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, m + work( i ) = one + end do + do j = 1, n + do i = 1, min( m, j-1 ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = 1, min( m, j ) + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + do i = 1, min( m, n ) + work( i ) = one + end do + do i = n + 1, m + work( i ) = zero + end do + do j = 1, n + do i = j + 1, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + else + do i = 1, m + work( i ) = zero + end do + do j = 1, n + do i = j, m + work( i ) = work( i ) + abs( a( i, j ) ) + end do + end do + end if + end if + value = zero + do i = 1, m + sum = work( i ) + if( value < sum .or. stdlib_disnan( sum ) ) value = sum + end do + else if( ( stdlib_lsame( norm, 'F' ) ) .or. ( stdlib_lsame( norm, 'E' ) ) ) & + then + ! find normf(a). + if( stdlib_lsame( uplo, 'U' ) ) then + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 2, n + call stdlib_zlassq( min( m, j-1 ), a( 1, j ), 1, scale, sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_zlassq( min( m, j ), a( 1, j ), 1, scale, sum ) + end do + end if + else + if( stdlib_lsame( diag, 'U' ) ) then + scale = one + sum = min( m, n ) + do j = 1, n + call stdlib_zlassq( m-j, a( min( m, j+1 ), j ), 1, scale,sum ) + end do + else + scale = zero + sum = one + do j = 1, n + call stdlib_zlassq( m-j+1, a( j, j ), 1, scale, sum ) + end do + end if + end if + value = scale*sqrt( sum ) + end if + stdlib_zlantr = value + return + end function stdlib_zlantr + + !> Given two column vectors X and Y, let + !> A = ( X Y ). + !> The subroutine first computes the QR factorization of A = Q*R, + !> and then computes the SVD of the 2-by-2 upper triangular matrix R. + !> The smaller singular value of R is returned in SSMIN, which is used + !> as the measurement of the linear dependency of the vectors X and Y. + + pure subroutine stdlib_zlapll( n, x, incx, y, incy, ssmin ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx, incy, n + real(dp), intent(out) :: ssmin + ! Array Arguments + complex(dp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + + ! Local Scalars + real(dp) :: ssmax + complex(dp) :: a11, a12, a22, c, tau + ! Intrinsic Functions + intrinsic :: abs,conjg + ! Executable Statements + ! quick return if possible + if( n<=1 ) then + ssmin = zero + return + end if + ! compute the qr factorization of the n-by-2 matrix ( x y ) + call stdlib_zlarfg( n, x( 1 ), x( 1+incx ), incx, tau ) + a11 = x( 1 ) + x( 1 ) = cone + c = -conjg( tau )*stdlib_zdotc( n, x, incx, y, incy ) + call stdlib_zaxpy( n, c, x, incx, y, incy ) + call stdlib_zlarfg( n-1, y( 1+incy ), y( 1+2*incy ), incy, tau ) + a12 = y( 1 ) + a22 = y( 1+incy ) + ! compute the svd of 2-by-2 upper triangular matrix. + call stdlib_dlas2( abs( a11 ), abs( a12 ), abs( a22 ), ssmin, ssmax ) + return + end subroutine stdlib_zlapll + + !> ZLAQP2: computes a QR factorization with column pivoting of + !> the block A(OFFSET+1:M,1:N). + !> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_zlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: lda, m, n, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: vn1(*), vn2(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: i, itemp, j, mn, offpi, pvt + real(dp) :: temp, temp2, tol3z + complex(dp) :: aii + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,sqrt + ! Executable Statements + mn = min( m-offset, n ) + tol3z = sqrt(stdlib_dlamch('EPSILON')) + ! compute factorization. + loop_20: do i = 1, mn + offpi = offset + i + ! determine ith pivot column and swap if necessary. + pvt = ( i-1 ) + stdlib_idamax( n-i+1, vn1( i ), 1 ) + if( pvt/=i ) then + call stdlib_zswap( m, a( 1, pvt ), 1, a( 1, i ), 1 ) + itemp = jpvt( pvt ) + jpvt( pvt ) = jpvt( i ) + jpvt( i ) = itemp + vn1( pvt ) = vn1( i ) + vn2( pvt ) = vn2( i ) + end if + ! generate elementary reflector h(i). + if( offpi ZLAQPS: computes a step of QR factorization with column pivoting + !> of a complex M-by-N matrix A by using Blas-3. It tries to factorize + !> NB columns from A starting from the row OFFSET+1, and updates all + !> of the matrix with Blas-3 xGEMM. + !> In some cases, due to catastrophic cancellations, it cannot + !> factorize NB columns. Hence, the actual number of factorized + !> columns is returned in KB. + !> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. + + pure subroutine stdlib_zlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, & + ldf ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: kb + integer(ilp), intent(in) :: lda, ldf, m, n, nb, offset + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(inout) :: vn1(*), vn2(*) + complex(dp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*) + complex(dp), intent(out) :: tau(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: itemp, j, k, lastrk, lsticc, pvt, rk + real(dp) :: temp, temp2, tol3z + complex(dp) :: akk + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,min,nint,sqrt + ! Executable Statements + lastrk = min( m, n+offset ) + lsticc = 0 + k = 0 + tol3z = sqrt(stdlib_dlamch('EPSILON')) + ! beginning of while loop. + 10 continue + if( ( k1 ) then + do j = 1, k - 1 + f( k, j ) = conjg( f( k, j ) ) + end do + call stdlib_zgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1 ),lda, f( k, 1 ),& + ldf, cone, a( rk, k ), 1 ) + do j = 1, k - 1 + f( k, j ) = conjg( f( k, j ) ) + end do + end if + ! generate elementary reflector h(k). + if( rk1 ) then + call stdlib_zgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1 ), lda,& + a( rk, k ), 1, czero,auxv( 1 ), 1 ) + call stdlib_zgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1, 1 ), ldf,auxv( 1 ), 1, & + cone, f( 1, k ), 1 ) + end if + ! update the current row of a: + ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h. + if( k0 ) then + itemp = nint( vn2( lsticc ),KIND=ilp) + vn1( lsticc ) = stdlib_dznrm2( m-rk, a( rk+1, lsticc ), 1 ) + ! note: the computation of vn1( lsticc ) relies on the fact that + ! stdlib_snrm2 does not fail on vectors with norm below the value of + ! sqrt(stdlib_dlamch('s')) + vn2( lsticc ) = vn1( lsticc ) + lsticc = itemp + go to 60 + end if + return + end subroutine stdlib_zlaqps + + !> ZLAQR5:, called by ZLAQR0, performs a + !> single small-bulge multi-shift QR sweep. + + pure subroutine stdlib_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, & + ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & + ldz, n, nh, nshfts, nv + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), s(*), z(ldz,*) + complex(dp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(dp), parameter :: rzero = 0.0_dp + real(dp), parameter :: rone = 1.0_dp + + + ! Local Scalars + complex(dp) :: alpha, beta, cdum, refsum + real(dp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp + integer(ilp) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,& + m, m22, mbot, mtop, nbmps, ndcol, ns, nu + logical(lk) :: accum, bmp22 + ! Intrinsic Functions + intrinsic :: abs,real,conjg,aimag,max,min,mod + ! Local Arrays + complex(dp) :: vt(3) + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== if there are no shifts, then there is nothing to do. ==== + if( nshfts<2 )return + ! ==== if the active block is empty or 1-by-1, then there + ! . is nothing to do. ==== + if( ktop>=kbot )return + ! ==== nshfts is supposed to be even, but if it is odd, + ! . then simply reduce it by cone. ==== + ns = nshfts - mod( nshfts, 2 ) + ! ==== machine constants for deflation ==== + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp) / ulp ) + ! ==== use accumulated reflections to update far-from-diagonal + ! . entries ? ==== + accum = ( kacc22==1 ) .or. ( kacc22==2 ) + ! ==== clear trash ==== + if( ktop+2<=kbot )h( ktop+2, ktop ) = czero + ! ==== nbmps = number of 2-shift bulges in the chain ==== + nbmps = ns / 2 + ! ==== kdu = width of slab ==== + kdu = 4*nbmps + ! ==== create and chase chains of nbmps bulges ==== + loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps + ! jtop = index from which updates from the right start. + if( accum ) then + jtop = max( ktop, incol ) + else if( wantt ) then + jtop = 1 + else + jtop = ktop + end if + ndcol = incol + kdu + if( accum )call stdlib_zlaset( 'ALL', kdu, kdu, czero, cone, u, ldu ) + ! ==== near-the-diagonal bulge chase. the following loop + ! . performs the near-the-diagonal part of a small bulge + ! . multi-shift qr sweep. each 4*nbmps column diagonal + ! . chunk extends from column incol to column ndcol + ! . (including both column incol and column ndcol). the + ! . following loop chases a 2*nbmps+1 column long chain of + ! . nbmps bulges 2*nbmps columns to the right. (incol + ! . may be less than ktop and and ndcol may be greater than + ! . kbot indicating phantom columns from which to chase + ! . bulges before they are actually introduced or to which + ! . to chase bulges beyond column kbot.) ==== + loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) + ! ==== bulges number mtop to mbot are active double implicit + ! . shift bulges. there may or may not also be small + ! . 2-by-2 bulge, if there is room. the inactive bulges + ! . (if any) must wait until the active bulges have moved + ! . down the diagonal to make room. the phantom matrix + ! . paradigm described above helps keep track. ==== + mtop = max( 1, ( ktop-krcol ) / 2+1 ) + mbot = min( nbmps, ( kbot-krcol-1 ) / 2 ) + m22 = mbot + 1 + bmp22 = ( mbot=ktop ) then + if( h( k+1, k )/=czero ) then + tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) + if( tst1==rzero ) then + if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) + end if + if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then + h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==rzero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( & + k+1, k ) = czero + end if + end if + end if + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + kms = k - incol + do j = max( 1, ktop-incol ), kdu + refsum = v( 1, m22 )*( u( j, kms+1 )+v( 2, m22 )*u( j, kms+2 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2, m22 ) ) + end do + else if( wantz ) then + do j = iloz, ihiz + refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*z( j, k+2 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2, m22 ) ) + end do + end if + end if + ! ==== normal case: chain of 3-by-3 reflections ==== + loop_80: do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + if( k==ktop-1 ) then + call stdlib_zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),s( 2*m ), v( 1, m )& + ) + alpha = v( 1, m ) + call stdlib_zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) ) + else + ! ==== perform delayed transformation of row below + ! . mth bulge. exploit fact that first two elements + ! . of row are actually czero. ==== + refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 ) + h( k+3, k ) = -refsum + h( k+3, k+1 ) = -refsum*conjg( v( 2, m ) ) + h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3, m ) ) + ! ==== calculate reflection to move + ! . mth bulge cone step. ==== + beta = h( k+1, k ) + v( 2, m ) = h( k+2, k ) + v( 3, m ) = h( k+3, k ) + call stdlib_zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) ) + ! ==== a bulge may collapse because of vigilant + ! . deflation or destructive underflow. in the + ! . underflow case, try the two-small-subdiagonals + ! . trick to try to reinflate the bulge. ==== + if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero & + ) then + ! ==== typical case: not collapsed (yet). ==== + h( k+1, k ) = beta + h( k+2, k ) = czero + h( k+3, k ) = czero + else + ! ==== atypical case: collapsed. attempt to + ! . reintroduce ignoring h(k+1,k) and h(k+2,k). + ! . if the fill resulting from the new + ! . reflector is too large, then abandon it. + ! . otherwise, use the new cone. ==== + call stdlib_zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),s( 2*m ), vt ) + + alpha = vt( 1 ) + call stdlib_zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) ) + refsum = conjg( vt( 1 ) )*( h( k+1, k )+conjg( vt( 2 ) )*h( k+2, k ) ) + + if( cabs1( h( k+2, k )-refsum*vt( 2 ) )+cabs1( refsum*vt( 3 ) )>ulp*( & + cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) & + then + ! ==== starting a new bulge here would + ! . create non-negligible fill. use + ! . the old cone with trepidation. ==== + h( k+1, k ) = beta + h( k+2, k ) = czero + h( k+3, k ) = czero + else + ! ==== starting a new bulge here would + ! . create only negligible fill. + ! . replace the old reflector with + ! . the new cone. ==== + h( k+1, k ) = h( k+1, k ) - refsum + h( k+2, k ) = czero + h( k+3, k ) = czero + v( 1, m ) = vt( 1 ) + v( 2, m ) = vt( 2 ) + v( 3, m ) = vt( 3 ) + end if + end if + end if + ! ==== apply reflection from the right and + ! . the first column of update from the left. + ! . these updates are required for the vigilant + ! . deflation check. we still delay most of the + ! . updates from the left for efficiency. ==== + do j = jtop, min( kbot, k+3 ) + refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*h( j, k+2 )+v( 3, m )*h( j, k+3 & + ) ) + h( j, k+1 ) = h( j, k+1 ) - refsum + h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2, m ) ) + h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3, m ) ) + end do + ! ==== perform update from left for subsequent + ! . column. ==== + refsum = conjg( v( 1, m ) )*( h( k+1, k+1 )+conjg( v( 2, m ) )*h( k+2, k+1 )+& + conjg( v( 3, m ) )*h( k+3, k+1 ) ) + h( k+1, k+1 ) = h( k+1, k+1 ) - refsum + h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m ) + h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m ) + ! ==== the following convergence test requires that + ! . the tradition small-compared-to-nearby-diagonals + ! . criterion and the ahues + ! . criteria both be satisfied. the latter improves + ! . accuracy in some examples. falling back on an + ! . alternate convergence criterion when tst1 or tst2 + ! . is czero (as done here) is traditional but probably + ! . unnecessary. ==== + if( k=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) + if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) + if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) + if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) + if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) + if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) + end if + if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then + h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) + h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) + + scl = h11 + h12 + tst2 = h22*( h11 / scl ) + if( tst2==rzero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,& + k ) = czero + end if + end if + end do loop_80 + ! ==== multiply h by reflections from the left ==== + if( accum ) then + jbot = min( ndcol, kbot ) + else if( wantt ) then + jbot = n + else + jbot = kbot + end if + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = max( ktop, krcol + 2*m ), jbot + refsum = conjg( v( 1, m ) )*( h( k+1, j )+conjg( v( 2, m ) )*h( k+2, j )+& + conjg( v( 3, m ) )*h( k+3, j ) ) + h( k+1, j ) = h( k+1, j ) - refsum + h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m ) + h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m ) + end do + end do + ! ==== accumulate orthogonal transformations. ==== + if( accum ) then + ! ==== accumulate u. (if needed, update z later + ! . with an efficient matrix-matrix + ! . multiply.) ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + kms = k - incol + i2 = max( 1, ktop-incol ) + i2 = max( i2, kms-(krcol-incol)+1 ) + i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 ) + do j = i2, i4 + refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*u( j, kms+2 )+v( 3, m )*u( & + j, kms+3 ) ) + u( j, kms+1 ) = u( j, kms+1 ) - refsum + u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2, m ) ) + u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3, m ) ) + end do + end do + else if( wantz ) then + ! ==== u is not accumulated, so update z + ! . now by multiplying by reflections + ! . from the right. ==== + do m = mbot, mtop, -1 + k = krcol + 2*( m-1 ) + do j = iloz, ihiz + refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*z( j, k+2 )+v( 3, m )*z( j, & + k+3 ) ) + z( j, k+1 ) = z( j, k+1 ) - refsum + z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2, m ) ) + z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3, m ) ) + end do + end do + end if + ! ==== end of near-the-diagonal bulge chase. ==== + end do loop_145 + ! ==== use u (if accumulated) to update far-from-diagonal + ! . entries in h. if required, use u to update z as + ! . well. ==== + if( accum ) then + if( wantt ) then + jtop = 1 + jbot = n + else + jtop = ktop + jbot = kbot + end if + k1 = max( 1, ktop-incol ) + nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1 + ! ==== horizontal multiply ==== + do jcol = min( ndcol, kbot ) + 1, jbot, nh + jlen = min( nh, jbot-jcol+1 ) + call stdlib_zgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,& + jcol ), ldh, czero, wh,ldwh ) + call stdlib_zlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) + + end do + ! ==== vertical multiply ==== + do jrow = jtop, max( ktop, incol ) - 1, nv + jlen = min( nv, max( ktop, incol )-jrow ) + call stdlib_zgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( & + k1, k1 ),ldu, czero, wv, ldwv ) + call stdlib_zlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) + + end do + ! ==== z multiply (also vertical) ==== + if( wantz ) then + do jrow = iloz, ihiz, nv + jlen = min( nv, ihiz-jrow+1 ) + call stdlib_zgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, & + u( k1, k1 ),ldu, czero, wv, ldwv ) + call stdlib_zlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) + + end do + end if + end if + end do loop_180 + end subroutine stdlib_zlaqr5 + + !> ZLAQZ1: chases a 1x1 shift bulge in a matrix pencil down a single position + + pure subroutine stdlib_zlaqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & + q, ldq, nz, zstart, z, ldz ) + ! arguments + logical(lk), intent( in ) :: ilq, ilz + integer(ilp), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & + zstart, ihi + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + + + ! local variables + real(dp) :: c + complex(dp) :: s, temp + if( k+1 == ihi ) then + ! shift is located on the edge of the matrix, remove it + call stdlib_zlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp ) + b( ihi, ihi ) = temp + b( ihi, ihi-1 ) = czero + call stdlib_zrot( ihi-istartm, b( istartm, ihi ), 1, b( istartm,ihi-1 ), 1, c, s ) + + call stdlib_zrot( ihi-istartm+1, a( istartm, ihi ), 1, a( istartm,ihi-1 ), 1, c, s ) + + if ( ilz ) then + call stdlib_zrot( nz, z( 1, ihi-zstart+1 ), 1, z( 1, ihi-1-zstart+1 ), 1, c, s ) + + end if + else + ! normal operation, move bulge down + ! apply transformation from the right + call stdlib_zlartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp ) + b( k+1, k+1 ) = temp + b( k+1, k ) = czero + call stdlib_zrot( k+2-istartm+1, a( istartm, k+1 ), 1, a( istartm,k ), 1, c, s ) + + call stdlib_zrot( k-istartm+1, b( istartm, k+1 ), 1, b( istartm, k ),1, c, s ) + + if ( ilz ) then + call stdlib_zrot( nz, z( 1, k+1-zstart+1 ), 1, z( 1, k-zstart+1 ),1, c, s ) + + end if + ! apply transformation from the left + call stdlib_zlartg( a( k+1, k ), a( k+2, k ), c, s, temp ) + a( k+1, k ) = temp + a( k+2, k ) = czero + call stdlib_zrot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s ) + call stdlib_zrot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s ) + if ( ilq ) then + call stdlib_zrot( nq, q( 1, k+1-qstart+1 ), 1, q( 1, k+2-qstart+1 ), 1, c, conjg(& + s ) ) + end if + end if + end subroutine stdlib_zlaqz1 + + !> ZLAQZ3: Executes a single multishift QZ sweep + + pure subroutine stdlib_zlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, alpha,& + beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info ) + ! function arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & + nblock_desired, ldqc, ldzc + complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), qc( & + ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * ) + integer(ilp), intent( out ) :: info + + + ! local scalars + integer(ilp) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & + ishift, nblock, npos + real(dp) :: safmin, safmax, c, scale + complex(dp) :: temp, temp2, temp3, s + info = 0 + if ( nblock_desired < nshifts+1 ) then + info = -8 + end if + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = n*nblock_desired + return + else if ( lwork < n*nblock_desired ) then + info = -25 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLAQZ3', -info ) + return + end if + ! executable statements + ! get machine constants + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_dlabad( safmin, safmax ) + if ( ilo >= ihi ) then + return + end if + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + ns = nshifts + npos = max( nblock_desired-ns, 1 ) + ! the following block introduces the shifts and chases + ! them down one by one just enough to make space for + ! the other shifts. the near-the-diagonal block is + ! of size (ns+1) x ns. + call stdlib_zlaset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc ) + call stdlib_zlaset( 'FULL', ns, ns, czero, cone, zc, ldzc ) + do i = 1, ns + ! introduce the shift + scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) ) + if( scale >= safmin .and. scale <= safmax ) then + alpha( i ) = alpha( i )/scale + beta( i ) = beta( i )/scale + end if + temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo ) + temp3 = beta( i )*a( ilo+1, ilo ) + if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then + temp2 = cone + temp3 = czero + end if + call stdlib_zlartg( temp2, temp3, c, s, temp ) + call stdlib_zrot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s ) + call stdlib_zrot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s ) + call stdlib_zrot( ns+1, qc( 1, 1 ), 1, qc( 1, 2 ), 1, c,conjg( s ) ) + ! chase the shift down + do j = 1, ns-i + call stdlib_zlaqz1( .true., .true., j, 1, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & + ilo, ilo ), ldb, ns+1, 1, qc,ldqc, ns, 1, zc, ldzc ) + end do + end do + ! update the rest of the pencil + ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) + ! from the left with qc(1:ns+1,1:ns+1)' + sheight = ns+1 + swidth = istopm-( ilo+ns )+1 + if ( swidth > 0 ) then + call stdlib_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+& + ns ), lda, czero, work, sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) + + call stdlib_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+& + ns ), ldb, czero, work, sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) + + end if + if ( ilq ) then + call stdlib_zgemm( 'N', 'N', n, sheight, sheight, cone, q( 1, ilo ),ldq, qc, ldqc, & + czero, work, n ) + call stdlib_zlacpy( 'ALL', n, sheight, work, n, q( 1, ilo ), ldq ) + end if + ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) + ! from the right with zc(1:ns,1:ns) + sheight = ilo-1-istartm+1 + swidth = ns + if ( sheight > 0 ) then + call stdlib_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, & + zc, ldzc, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) + + call stdlib_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, & + zc, ldzc, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) + + end if + if ( ilz ) then + call stdlib_zgemm( 'N', 'N', n, swidth, swidth, cone, z( 1, ilo ),ldz, zc, ldzc, & + czero, work, n ) + call stdlib_zlacpy( 'ALL', n, swidth, work, n, z( 1, ilo ), ldz ) + end if + ! the following block chases the shifts down to the bottom + ! right block. if possible, a shift is moved down npos + ! positions at a time + k = ilo + do while ( k < ihi-ns ) + np = min( ihi-ns-k, npos ) + ! size of the near-the-diagonal block + nblock = ns+np + ! istartb points to the first row we will be updating + istartb = k+1 + ! istopb points to the last column we will be updating + istopb = k+nblock-1 + call stdlib_zlaset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc ) + call stdlib_zlaset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc ) + ! near the diagonal shift chase + do i = ns-1, 0, -1 + do j = 0, np-1 + ! move down the block with index k+i+j, updating + ! the (ns+np x ns+np) block: + ! (k:k+ns+np,k:k+ns+np-1) + call stdlib_zlaqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, & + ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(k+1:k+ns+np, k+ns+np:istopm) and + ! b(k+1:k+ns+np, k+ns+np:istopm) + ! from the left with qc(1:ns+np,1:ns+np)' + sheight = ns+np + swidth = istopm-( k+ns+np )+1 + if ( swidth > 0 ) then + call stdlib_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+& + ns+np ), lda, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & + ) + call stdlib_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+& + ns+np ), ldb, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & + ) + end if + if ( ilq ) then + call stdlib_zgemm( 'N', 'N', n, nblock, nblock, cone, q( 1, k+1 ),ldq, qc, ldqc, & + czero, work, n ) + call stdlib_zlacpy( 'ALL', n, nblock, work, n, q( 1, k+1 ), ldq ) + end if + ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) + ! from the right with zc(1:ns+np,1:ns+np) + sheight = k-istartm+1 + swidth = nblock + if ( sheight > 0 ) then + call stdlib_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, & + zc, ldzc, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) + + call stdlib_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, & + zc, ldzc, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) + + end if + if ( ilz ) then + call stdlib_zgemm( 'N', 'N', n, nblock, nblock, cone, z( 1, k ),ldz, zc, ldzc, & + czero, work, n ) + call stdlib_zlacpy( 'ALL', n, nblock, work, n, z( 1, k ), ldz ) + end if + k = k+np + end do + ! the following block removes the shifts from the bottom right corner + ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). + call stdlib_zlaset( 'FULL', ns, ns, czero, cone, qc, ldqc ) + call stdlib_zlaset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc ) + ! istartb points to the first row we will be updating + istartb = ihi-ns+1 + ! istopb points to the last column we will be updating + istopb = ihi + do i = 1, ns + ! chase the shift down to the bottom right corner + do ishift = ihi-i, ihi-1 + call stdlib_zlaqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & + ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) + end do + end do + ! update rest of the pencil + ! update a(ihi-ns+1:ihi, ihi+1:istopm) + ! from the left with qc(1:ns,1:ns)' + sheight = ns + swidth = istopm-( ihi+1 )+1 + if ( swidth > 0 ) then + call stdlib_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, & + ihi+1 ), lda, czero, work, sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & + ) + call stdlib_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, & + ihi+1 ), ldb, czero, work, sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & + ) + end if + if ( ilq ) then + call stdlib_zgemm( 'N', 'N', n, ns, ns, cone, q( 1, ihi-ns+1 ), ldq,qc, ldqc, czero,& + work, n ) + call stdlib_zlacpy( 'ALL', n, ns, work, n, q( 1, ihi-ns+1 ), ldq ) + end if + ! update a(istartm:ihi-ns,ihi-ns:ihi) + ! from the right with zc(1:ns+1,1:ns+1) + sheight = ihi-ns-istartm+1 + swidth = ns+1 + if ( sheight > 0 ) then + call stdlib_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), & + lda, zc, ldzc, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & + ) + call stdlib_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), & + ldb, zc, ldzc, czero, work,sheight ) + call stdlib_zlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & + ) + end if + if ( ilz ) then + call stdlib_zgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1, ihi-ns ), ldz,zc, ldzc, & + czero, work, n ) + call stdlib_zlacpy( 'ALL', n, ns+1, work, n, z( 1, ihi-ns ), ldz ) + end if + end subroutine stdlib_zlaqz3 + + !> ZLARGV: generates a vector of complex plane rotations with real + !> cosines, determined by elements of the complex vectors x and y. + !> For i = 1,2,...,n + !> ( c(i) s(i) ) ( x(i) ) = ( r(i) ) + !> ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) + !> where c(i)**2 + ABS(s(i))**2 = 1 + !> The following conventions are used (these are the same as in ZLARTG, + !> but differ from the BLAS1 routine ZROTG): + !> If y(i)=0, then c(i)=1 and s(i)=0. + !> If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. + + pure subroutine stdlib_zlargv( n, x, incx, y, incy, c, incc ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incc, incx, incy, n + ! Array Arguments + real(dp), intent(out) :: c(*) + complex(dp), intent(inout) :: x(*), y(*) + ! ===================================================================== + + + ! Local Scalars + ! logical first + integer(ilp) :: count, i, ic, ix, iy, j + real(dp) :: cs, d, di, dr, eps, f2, f2s, g2, g2s, safmin, safmn2, safmx2, scale + complex(dp) :: f, ff, fs, g, gs, r, sn + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,int,log,max,sqrt + ! Statement Functions + real(dp) :: abs1, abssq + ! Save Statement + ! save first, safmx2, safmin, safmn2 + ! Data Statements + ! data first / .true. / + ! Statement Function Definitions + abs1( ff ) = max( abs( real( ff,KIND=dp) ), abs( aimag( ff ) ) ) + abssq( ff ) = real( ff,KIND=dp)**2 + aimag( ff )**2 + ! Executable Statements + ! if( first ) then + ! first = .false. + safmin = stdlib_dlamch( 'S' ) + eps = stdlib_dlamch( 'E' ) + safmn2 = stdlib_dlamch( 'B' )**int( log( safmin / eps ) /log( stdlib_dlamch( 'B' ) )& + / two,KIND=ilp) + safmx2 = one / safmn2 + ! end if + ix = 1 + iy = 1 + ic = 1 + loop_60: do i = 1, n + f = x( ix ) + g = y( iy ) + ! use identical algorithm as in stdlib_zlartg + scale = max( abs1( f ), abs1( g ) ) + fs = f + gs = g + count = 0 + if( scale>=safmx2 ) then + 10 continue + count = count + 1 + fs = fs*safmn2 + gs = gs*safmn2 + scale = scale*safmn2 + if( scale>=safmx2 .and. count < 20 )go to 10 + else if( scale<=safmn2 ) then + if( g==czero ) then + cs = one + sn = czero + r = f + go to 50 + end if + 20 continue + count = count - 1 + fs = fs*safmx2 + gs = gs*safmx2 + scale = scale*safmx2 + if( scale<=safmn2 )go to 20 + end if + f2 = abssq( fs ) + g2 = abssq( gs ) + if( f2<=max( g2, one )*safmin ) then + ! this is a rare case: f is very small. + if( f==czero ) then + cs = zero + r = stdlib_dlapy2( real( g,KIND=dp), aimag( g ) ) + ! do complex/real division explicitly with two real + ! divisions + d = stdlib_dlapy2( real( gs,KIND=dp), aimag( gs ) ) + sn = cmplx( real( gs,KIND=dp) / d, -aimag( gs ) / d,KIND=dp) + go to 50 + end if + f2s = stdlib_dlapy2( real( fs,KIND=dp), aimag( fs ) ) + ! g2 and g2s are accurate + ! g2 is at least safmin, and g2s is at least safmn2 + g2s = sqrt( g2 ) + ! error in cs from underflow in f2s is at most + ! unfl / safmn2 .lt. sqrt(unfl*eps) .lt. eps + ! if max(g2,one)=g2, then f2 .lt. g2*safmin, + ! and so cs .lt. sqrt(safmin) + ! if max(g2,one)=one, then f2 .lt. safmin + ! and so cs .lt. sqrt(safmin)/safmn2 = sqrt(eps) + ! therefore, cs = f2s/g2s / sqrt( 1 + (f2s/g2s)**2 ) = f2s/g2s + cs = f2s / g2s + ! make sure abs(ff) = 1 + ! do complex/real division explicitly with 2 real divisions + if( abs1( f )>one ) then + d = stdlib_dlapy2( real( f,KIND=dp), aimag( f ) ) + ff = cmplx( real( f,KIND=dp) / d, aimag( f ) / d,KIND=dp) + else + dr = safmx2*real( f,KIND=dp) + di = safmx2*aimag( f ) + d = stdlib_dlapy2( dr, di ) + ff = cmplx( dr / d, di / d,KIND=dp) + end if + sn = ff*cmplx( real( gs,KIND=dp) / g2s, -aimag( gs ) / g2s,KIND=dp) + r = cs*f + sn*g + else + ! this is the most common case. + ! neither f2 nor f2/g2 are less than safmin + ! f2s cannot overflow, and it is accurate + f2s = sqrt( one+g2 / f2 ) + ! do the f2s(real)*fs(complex) multiply with two real + ! multiplies + r = cmplx( f2s*real( fs,KIND=dp), f2s*aimag( fs ),KIND=dp) + cs = one / f2s + d = f2 + g2 + ! do complex/real division explicitly with two real divisions + sn = cmplx( real( r,KIND=dp) / d, aimag( r ) / d,KIND=dp) + sn = sn*conjg( gs ) + if( count/=0 ) then + if( count>0 ) then + do j = 1, count + r = r*safmx2 + end do + else + do j = 1, -count + r = r*safmn2 + end do + end if + end if + end if + 50 continue + c( ic ) = cs + y( iy ) = sn + x( ix ) = r + ic = ic + incc + iy = iy + incy + ix = ix + incx + end do loop_60 + return + end subroutine stdlib_zlargv + + !> ZLARRV: computes the eigenvectors of the tridiagonal matrix + !> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. + !> The input eigenvalues should have been computed by DLARRE. + + pure subroutine stdlib_zlarrv( n, vl, vu, d, l, pivmin,isplit, m, dol, dou, minrgp,rtol1, & + rtol2, w, werr, wgap,iblock, indexw, gers, z, ldz, isuppz,work, iwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: dol, dou, ldz, m, n + integer(ilp), intent(out) :: info + real(dp), intent(in) :: minrgp, pivmin, vl, vu + real(dp), intent(inout) :: rtol1, rtol2 + ! Array Arguments + integer(ilp), intent(in) :: iblock(*), indexw(*), isplit(*) + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), l(*), w(*), werr(*), wgap(*) + real(dp), intent(in) :: gers(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxitr = 10 + + + + ! Local Scalars + logical(lk) :: eskip, needbs, stp2ii, tryrqc, usedbs, usedrq + integer(ilp) :: done, i, ibegin, idone, iend, ii, iindc1, iindc2, iindr, iindwk, iinfo,& + im, in, indeig, indld, indlld, indwrk, isupmn, isupmx, iter, itmp1, j, jblk, k, & + miniwsize, minwsize, nclus, ndepth, negcnt, newcls, newfst, newftt, newlst, newsiz, & + offset, oldcls, oldfst, oldien, oldlst, oldncl, p, parity, q, wbegin, wend, windex, & + windmn, windpl, zfrom, zto, zusedl, zusedu, zusedw + integer(ilp) :: indin1, indin2 + real(dp) :: bstres, bstw, eps, fudge, gap, gaptol, gl, gu, lambda, left, lgap, mingma, & + nrminv, resid, rgap, right, rqcorr, rqtol, savgap, sgndef, sigma, spdiam, ssigma, tau, & + tmp, tol, ztz + ! Intrinsic Functions + intrinsic :: abs,real,max,min + intrinsic :: cmplx + ! Executable Statements + info = 0 + ! quick return if possible + if( (n<=0).or.(m<=0) ) then + return + end if + ! the first n entries of work are reserved for the eigenvalues + indld = n+1 + indlld= 2*n+1 + indin1 = 3*n + 1 + indin2 = 4*n + 1 + indwrk = 5*n + 1 + minwsize = 12 * n + do i= 1,minwsize + work( i ) = zero + end do + ! iwork(iindr+1:iindr+n) hold the twist indices r for the + ! factorization used to compute the fp vector + iindr = 0 + ! iwork(iindc1+1:iinc2+n) are used to store the clusters of the current + ! layer and the one above. + iindc1 = n + iindc2 = 2*n + iindwk = 3*n + 1 + miniwsize = 7 * n + do i= 1,miniwsize + iwork( i ) = 0 + end do + zusedl = 1 + if(dol>1) then + ! set lower bound for use of z + zusedl = dol-1 + endif + zusedu = m + if(doudou) ) then + ibegin = iend + 1 + wbegin = wend + 1 + cycle loop_170 + end if + ! find local spectral diameter of the block + gl = gers( 2*ibegin-1 ) + gu = gers( 2*ibegin ) + do i = ibegin+1 , iend + gl = min( gers( 2*i-1 ), gl ) + gu = max( gers( 2*i ), gu ) + end do + spdiam = gu - gl + ! oldien is the last index of the previous block + oldien = ibegin - 1 + ! calculate the size of the current block + in = iend - ibegin + 1 + ! the number of eigenvalues in the current block + im = wend - wbegin + 1 + ! this is for a 1x1 block + if( ibegin==iend ) then + done = done+1 + z( ibegin, wbegin ) = cmplx( one, zero,KIND=dp) + isuppz( 2*wbegin-1 ) = ibegin + isuppz( 2*wbegin ) = ibegin + w( wbegin ) = w( wbegin ) + sigma + work( wbegin ) = w( wbegin ) + ibegin = iend + 1 + wbegin = wbegin + 1 + cycle loop_170 + end if + ! the desired (shifted) eigenvalues are stored in w(wbegin:wend) + ! note that these can be approximations, in this case, the corresp. + ! entries of werr give the size of the uncertainty interval. + ! the eigenvalue approximations will be refined when necessary as + ! high relative accuracy is required for the computation of the + ! corresponding eigenvectors. + call stdlib_dcopy( im, w( wbegin ), 1,work( wbegin ), 1 ) + ! we store in w the eigenvalue approximations w.r.t. the original + ! matrix t. + do i=1,im + w(wbegin+i-1) = w(wbegin+i-1)+sigma + end do + ! ndepth is the current depth of the representation tree + ndepth = 0 + ! parity is either 1 or 0 + parity = 1 + ! nclus is the number of clusters for the next level of the + ! representation tree, we start with nclus = 1 for the root + nclus = 1 + iwork( iindc1+1 ) = 1 + iwork( iindc1+2 ) = im + ! idone is the number of eigenvectors already computed in the current + ! block + idone = 0 + ! loop while( idonem ) then + info = -2 + return + endif + ! breadth first processing of the current level of the representation + ! tree: oldncl = number of clusters on current level + oldncl = nclus + ! reset nclus to count the number of child clusters + nclus = 0 + parity = 1 - parity + if( parity==0 ) then + oldcls = iindc1 + newcls = iindc2 + else + oldcls = iindc2 + newcls = iindc1 + end if + ! process the clusters on the current level + loop_150: do i = 1, oldncl + j = oldcls + 2*i + ! oldfst, oldlst = first, last index of current cluster. + ! cluster indices start with 1 and are relative + ! to wbegin when accessing w, wgap, werr, z + oldfst = iwork( j-1 ) + oldlst = iwork( j ) + if( ndepth>0 ) then + ! retrieve relatively robust representation (rrr) of cluster + ! that has been computed at the previous level + ! the rrr is stored in z and overwritten once the eigenvectors + ! have been computed or when the cluster is refined + if((dol==1).and.(dou==m)) then + ! get representation from location of the leftmost evalue + ! of the cluster + j = wbegin + oldfst - 1 + else + if(wbegin+oldfst-1dou) then + ! get representation from the right end of z array + j = dou + else + j = wbegin + oldfst - 1 + endif + endif + do k = 1, in - 1 + d( ibegin+k-1 ) = real( z( ibegin+k-1,j ),KIND=dp) + l( ibegin+k-1 ) = real( z( ibegin+k-1,j+1 ),KIND=dp) + end do + d( iend ) = real( z( iend, j ),KIND=dp) + sigma = real( z( iend, j+1 ),KIND=dp) + ! set the corresponding entries in z to zero + call stdlib_zlaset( 'FULL', in, 2, czero, czero,z( ibegin, j), ldz ) + + end if + ! compute dl and dll of current rrr + do j = ibegin, iend-1 + tmp = d( j )*l( j ) + work( indld-1+j ) = tmp + work( indlld-1+j ) = tmp*l( j ) + end do + if( ndepth>0 ) then + ! p and q are index of the first and last eigenvalue to compute + ! within the current block + p = indexw( wbegin-1+oldfst ) + q = indexw( wbegin-1+oldlst ) + ! offset for the arrays work, wgap and werr, i.e., the p-offset + ! through the q-offset elements of these arrays are to be used. + ! offset = p-oldfst + offset = indexw( wbegin ) - 1 + ! perform limited bisection (if necessary) to get approximate + ! eigenvalues to the precision needed. + call stdlib_dlarrb( in, d( ibegin ),work(indlld+ibegin-1),p, q, rtol1, & + rtol2, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ), iwork(& + iindwk ),pivmin, spdiam, in, iinfo ) + if( iinfo/=0 ) then + info = -1 + return + endif + ! we also recompute the extremal gaps. w holds all eigenvalues + ! of the unshifted matrix and must be used for computation + ! of wgap, the entries of work might stem from rrrs with + ! different shifts. the gaps from wbegin-1+oldfst to + ! wbegin-1+oldlst are correctly computed in stdlib_dlarrb. + ! however, we only allow the gaps to become greater since + ! this is what should happen when we decrease werr + if( oldfst>1) then + wgap( wbegin+oldfst-2 ) =max(wgap(wbegin+oldfst-2),w(wbegin+oldfst-1)-& + werr(wbegin+oldfst-1)- w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) ) + + endif + if( wbegin + oldlst -1 < wend ) then + wgap( wbegin+oldlst-1 ) =max(wgap(wbegin+oldlst-1),w(wbegin+oldlst)-& + werr(wbegin+oldlst)- w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) ) + endif + ! each time the eigenvalues in work get refined, we store + ! the newly found approximation with all shifts applied in w + do j=oldfst,oldlst + w(wbegin+j-1) = work(wbegin+j-1)+sigma + end do + end if + ! process the current node. + newfst = oldfst + loop_140: do j = oldfst, oldlst + if( j==oldlst ) then + ! we are at the right end of the cluster, this is also the + ! boundary of the child cluster + newlst = j + else if ( wgap( wbegin + j -1)>=minrgp* abs( work(wbegin + j -1) ) ) & + then + ! the right relative gap is big enough, the child cluster + ! (newfst,..,newlst) is well separated from the following + newlst = j + else + ! inside a child cluster, the relative gap is not + ! big enough. + cycle loop_140 + end if + ! compute size of child cluster found + newsiz = newlst - newfst + 1 + ! newftt is the place in z where the new rrr or the computed + ! eigenvector is to be stored + if((dol==1).and.(dou==m)) then + ! store representation at location of the leftmost evalue + ! of the cluster + newftt = wbegin + newfst - 1 + else + if(wbegin+newfst-1dou) then + ! store representation at the right end of z array + newftt = dou + else + newftt = wbegin + newfst - 1 + endif + endif + if( newsiz>1) then + ! current child is not a singleton but a cluster. + ! compute and store new representation of child. + ! compute left and right cluster gap. + ! lgap and rgap are not computed from work because + ! the eigenvalue approximations may stem from rrrs + ! different shifts. however, w hold all eigenvalues + ! of the unshifted matrix. still, the entries in wgap + ! have to be computed from work since the entries + ! in w might be of the same order so that gaps are not + ! exhibited correctly for very close eigenvalues. + if( newfst==1 ) then + lgap = max( zero,w(wbegin)-werr(wbegin) - vl ) + else + lgap = wgap( wbegin+newfst-2 ) + endif + rgap = wgap( wbegin+newlst-1 ) + ! compute left- and rightmost eigenvalue of child + ! to high precision in order to shift as close + ! as possible and obtain as large relative gaps + ! as possible + do k =1,2 + if(k==1) then + p = indexw( wbegin-1+newfst ) + else + p = indexw( wbegin-1+newlst ) + endif + offset = indexw( wbegin ) - 1 + call stdlib_dlarrb( in, d(ibegin),work( indlld+ibegin-1 ),p,p,rqtol, & + rqtol, offset,work(wbegin),wgap(wbegin),werr(wbegin),work( indwrk ),& + iwork( iindwk ), pivmin, spdiam,in, iinfo ) + end do + if((wbegin+newlst-1dou)) then + ! if the cluster contains no desired eigenvalues + ! skip the computation of that branch of the rep. tree + ! we could skip before the refinement of the extremal + ! eigenvalues of the child, but then the representation + ! tree could be different from the one when nothing is + ! skipped. for this reason we skip at this place. + idone = idone + newlst - newfst + 1 + goto 139 + endif + ! compute rrr of child cluster. + ! note that the new rrr is stored in z + ! stdlib_dlarrf needs lwork = 2*n + call stdlib_dlarrf( in, d( ibegin ), l( ibegin ),work(indld+ibegin-1),& + newfst, newlst, work(wbegin),wgap(wbegin), werr(wbegin),spdiam, lgap, & + rgap, pivmin, tau,work( indin1 ), work( indin2 ),work( indwrk ), iinfo ) + + ! in the complex case, stdlib_dlarrf cannot write + ! the new rrr directly into z and needs an intermediate + ! workspace + do k = 1, in-1 + z( ibegin+k-1, newftt ) =cmplx( work( indin1+k-1 ), zero,KIND=dp) + + z( ibegin+k-1, newftt+1 ) =cmplx( work( indin2+k-1 ), zero,KIND=dp) + + end do + z( iend, newftt ) =cmplx( work( indin1+in-1 ), zero,KIND=dp) + if( iinfo==0 ) then + ! a new rrr for the cluster was found by stdlib_dlarrf + ! update shift and store it + ssigma = sigma + tau + z( iend, newftt+1 ) = cmplx( ssigma, zero,KIND=dp) + ! work() are the midpoints and werr() the semi-width + ! note that the entries in w are unchanged. + do k = newfst, newlst + fudge =three*eps*abs(work(wbegin+k-1)) + work( wbegin + k - 1 ) =work( wbegin + k - 1) - tau + fudge = fudge +four*eps*abs(work(wbegin+k-1)) + ! fudge errors + werr( wbegin + k - 1 ) =werr( wbegin + k - 1 ) + fudge + ! gaps are not fudged. provided that werr is small + ! when eigenvalues are close, a zero gap indicates + ! that a new representation is needed for resolving + ! the cluster. a fudge could lead to a wrong decision + ! of judging eigenvalues 'separated' which in + ! reality are not. this could have a negative impact + ! on the orthogonality of the computed eigenvectors. + end do + nclus = nclus + 1 + k = newcls + 2*nclus + iwork( k-1 ) = newfst + iwork( k ) = newlst + else + info = -2 + return + endif + else + ! compute eigenvector of singleton + iter = 0 + tol = four * log(real(in,KIND=dp)) * eps + k = newfst + windex = wbegin + k - 1 + windmn = max(windex - 1,1) + windpl = min(windex + 1,m) + lambda = work( windex ) + done = done + 1 + ! check if eigenvector computation is to be skipped + if((windexdou)) then + eskip = .true. + goto 125 + else + eskip = .false. + endif + left = work( windex ) - werr( windex ) + right = work( windex ) + werr( windex ) + indeig = indexw( windex ) + ! note that since we compute the eigenpairs for a child, + ! all eigenvalue approximations are w.r.t the same shift. + ! in this case, the entries in work should be used for + ! computing the gaps since they exhibit even very small + ! differences in the eigenvalues, as opposed to the + ! entries in w which might "look" the same. + if( k == 1) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vl, the formula + ! lgap = max( zero, (sigma - vl) + lambda ) + ! can lead to an overestimation of the left gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small left gap. + lgap = eps*max(abs(left),abs(right)) + else + lgap = wgap(windmn) + endif + if( k == im) then + ! in the case range='i' and with not much initial + ! accuracy in lambda and vu, the formula + ! can lead to an overestimation of the right gap and + ! thus to inadequately early rqi 'convergence'. + ! prevent this by forcing a small right gap. + rgap = eps*max(abs(left),abs(right)) + else + rgap = wgap(windex) + endif + gap = min( lgap, rgap ) + if(( k == 1).or.(k == im)) then + ! the eigenvector support can become wrong + ! because significant entries could be cut off due to a + ! large gaptol parameter in lar1v. prevent this. + gaptol = zero + else + gaptol = gap * eps + endif + isupmn = in + isupmx = 1 + ! update wgap so that it holds the minimum gap + ! to the left or the right. this is crucial in the + ! case where bisection is used to ensure that the + ! eigenvalue is refined up to the required precision. + ! the correct value is restored afterwards. + savgap = wgap(windex) + wgap(windex) = gap + ! we want to use the rayleigh quotient correction + ! as often as possible since it converges quadratically + ! when we are close enough to the desired eigenvalue. + ! however, the rayleigh quotient can have the wrong sign + ! and lead us away from the desired eigenvalue. in this + ! case, the best we can do is to use bisection. + usedbs = .false. + usedrq = .false. + ! bisection is initially turned off unless it is forced + needbs = .not.tryrqc + 120 continue + ! check if bisection should be used to refine eigenvalue + if(needbs) then + ! take the bisection as new iterate + usedbs = .true. + itmp1 = iwork( iindr+windex ) + offset = indexw( wbegin ) - 1 + call stdlib_dlarrb( in, d(ibegin),work(indlld+ibegin-1),indeig,& + indeig,zero, two*eps, offset,work(wbegin),wgap(wbegin),werr(wbegin),& + work( indwrk ),iwork( iindwk ), pivmin, spdiam,itmp1, iinfo ) + if( iinfo/=0 ) then + info = -3 + return + endif + lambda = work( windex ) + ! reset twist index from inaccurate lambda to + ! force computation of true mingma + iwork( iindr+windex ) = 0 + endif + ! given lambda, compute the eigenvector. + call stdlib_zlar1v( in, 1, in, lambda, d( ibegin ),l( ibegin ), work(& + indld+ibegin-1),work(indlld+ibegin-1),pivmin, gaptol, z( ibegin, windex & + ),.not.usedbs, negcnt, ztz, mingma,iwork( iindr+windex ), isuppz( & + 2*windex-1 ),nrminv, resid, rqcorr, work( indwrk ) ) + if(iter == 0) then + bstres = resid + bstw = lambda + elseif(residtol*gap .and. abs( rqcorr )>rqtol*abs( lambda ) .and. .not. & + usedbs)then + ! we need to check that the rqcorr update doesn't + ! move the eigenvalue away from the desired one and + ! towards a neighbor. -> protection with bisection + if(indeig<=negcnt) then + ! the wanted eigenvalue lies to the left + sgndef = -one + else + ! the wanted eigenvalue lies to the right + sgndef = one + endif + ! we only use the rqcorr if it improves the + ! the iterate reasonably. + if( ( rqcorr*sgndef>=zero ).and.( lambda + rqcorr<= right).and.( & + lambda + rqcorr>= left)) then + usedrq = .true. + ! store new midpoint of bisection interval in work + if(sgndef==one) then + ! the current lambda is on the left of the true + ! eigenvalue + left = lambda + ! we prefer to assume that the error estimate + ! is correct. we could make the interval not + ! as a bracket but to be modified if the rqcorr + ! chooses to. in this case, the right side should + ! be modified as follows: + ! right = max(right, lambda + rqcorr) + else + ! the current lambda is on the right of the true + ! eigenvalue + right = lambda + ! see comment about assuming the error estimate is + ! correct above. + ! left = min(left, lambda + rqcorr) + endif + work( windex ) =half * (right + left) + ! take rqcorr since it has the correct sign and + ! improves the iterate reasonably + lambda = lambda + rqcorr + ! update width of error interval + werr( windex ) =half * (right-left) + else + needbs = .true. + endif + if(right-leftzto) then + do ii = zto+1,isupmx + z( ii, windex ) = zero + end do + endif + call stdlib_zdscal( zto-zfrom+1, nrminv,z( zfrom, windex ), 1 ) + 125 continue + ! update w + w( windex ) = lambda+sigma + ! recompute the gaps on the left and right + ! but only allow them to become larger and not + ! smaller (which can only happen through "bad" + ! cancellation and doesn't reflect the theory + ! where the initial gaps are underestimated due + ! to werr being too crude.) + if(.not.eskip) then + if( k>1) then + wgap( windmn ) = max( wgap(windmn),w(windex)-werr(windex)- w(& + windmn)-werr(windmn) ) + endif + if( windex ZLATDF: computes the contribution to the reciprocal Dif-estimate + !> by solving for x in Z * x = b, where b is chosen such that the norm + !> of x is as large as possible. It is assumed that LU decomposition + !> of Z has been computed by ZGETC2. On entry RHS = f holds the + !> contribution from earlier solved sub-systems, and on return RHS = x. + !> The factorization of Z returned by ZGETC2 has the form + !> Z = P * L * U * Q, where P and Q are permutation matrices. L is lower + !> triangular with unit diagonal elements and U is upper triangular. + + pure subroutine stdlib_zlatdf( ijob, n, z, ldz, rhs, rdsum, rdscal, ipiv,jpiv ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ijob, ldz, n + real(dp), intent(inout) :: rdscal, rdsum + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*), jpiv(*) + complex(dp), intent(inout) :: rhs(*), z(ldz,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxdim = 2 + + + + ! Local Scalars + integer(ilp) :: i, info, j, k + real(dp) :: rtemp, scale, sminu, splus + complex(dp) :: bm, bp, pmone, temp + ! Local Arrays + real(dp) :: rwork(maxdim) + complex(dp) :: work(4*maxdim), xm(maxdim), xp(maxdim) + ! Intrinsic Functions + intrinsic :: abs,real,sqrt + ! Executable Statements + if( ijob/=2 ) then + ! apply permutations ipiv to rhs + call stdlib_zlaswp( 1, rhs, ldz, 1, n-1, ipiv, 1 ) + ! solve for l-part choosing rhs either to +1 or -1. + pmone = -cone + loop_10: do j = 1, n - 1 + bp = rhs( j ) + cone + bm = rhs( j ) - cone + splus = one + ! lockahead for l- part rhs(1:n-1) = +-1 + ! splus and smin computed more efficiently than in bsolve[1]. + splus = splus + real( stdlib_zdotc( n-j, z( j+1, j ), 1, z( j+1,j ), 1 ),KIND=dp) + + sminu = real( stdlib_zdotc( n-j, z( j+1, j ), 1, rhs( j+1 ), 1 ),KIND=dp) + splus = splus*real( rhs( j ),KIND=dp) + if( splus>sminu ) then + rhs( j ) = bp + else if( sminu>splus ) then + rhs( j ) = bm + else + ! in this case the updating sums are equal and we can + ! choose rhs(j) +1 or -1. the first time this happens we + ! choose -1, thereafter +1. this is a simple way to get + ! good estimates of matrices like byers well-known example + ! (see [1]). (not done in bsolve.) + rhs( j ) = rhs( j ) + pmone + pmone = cone + end if + ! compute the remaining r.h.s. + temp = -rhs( j ) + call stdlib_zaxpy( n-j, temp, z( j+1, j ), 1, rhs( j+1 ), 1 ) + end do loop_10 + ! solve for u- part, lockahead for rhs(n) = +-1. this is not done + ! in bsolve and will hopefully give us a better estimate because + ! any ill-conditioning of the original matrix is transferred to u + ! and not to l. u(n, n) is an approximation to sigma_min(lu). + call stdlib_zcopy( n-1, rhs, 1, work, 1 ) + work( n ) = rhs( n ) + cone + rhs( n ) = rhs( n ) - cone + splus = zero + sminu = zero + do i = n, 1, -1 + temp = cone / z( i, i ) + work( i ) = work( i )*temp + rhs( i ) = rhs( i )*temp + do k = i + 1, n + work( i ) = work( i ) - work( k )*( z( i, k )*temp ) + rhs( i ) = rhs( i ) - rhs( k )*( z( i, k )*temp ) + end do + splus = splus + abs( work( i ) ) + sminu = sminu + abs( rhs( i ) ) + end do + if( splus>sminu )call stdlib_zcopy( n, work, 1, rhs, 1 ) + ! apply the permutations jpiv to the computed solution (rhs) + call stdlib_zlaswp( 1, rhs, ldz, 1, n-1, jpiv, -1 ) + ! compute the sum of squares + call stdlib_zlassq( n, rhs, 1, rdscal, rdsum ) + return + end if + ! entry ijob = 2 + ! compute approximate nullvector xm of z + call stdlib_zgecon( 'I', n, z, ldz, one, rtemp, work, rwork, info ) + call stdlib_zcopy( n, work( n+1 ), 1, xm, 1 ) + ! compute rhs + call stdlib_zlaswp( 1, xm, ldz, 1, n-1, ipiv, -1 ) + temp = cone / sqrt( stdlib_zdotc( n, xm, 1, xm, 1 ) ) + call stdlib_zscal( n, temp, xm, 1 ) + call stdlib_zcopy( n, xm, 1, xp, 1 ) + call stdlib_zaxpy( n, cone, rhs, 1, xp, 1 ) + call stdlib_zaxpy( n, -cone, xm, 1, rhs, 1 ) + call stdlib_zgesc2( n, z, ldz, rhs, ipiv, jpiv, scale ) + call stdlib_zgesc2( n, z, ldz, xp, ipiv, jpiv, scale ) + if( stdlib_dzasum( n, xp, 1 )>stdlib_dzasum( n, rhs, 1 ) )call stdlib_zcopy( n, xp, 1, & + rhs, 1 ) + ! compute the sum of squares + call stdlib_zlassq( n, rhs, 1, rdscal, rdsum ) + return + end subroutine stdlib_zlatdf + + !> ZLAUNHR_COL_GETRFNP: computes the modified LU factorization without + !> pivoting of a complex general M-by-N matrix A. The factorization has + !> the form: + !> A - S = L * U, + !> where: + !> S is a m-by-n diagonal sign matrix with the diagonal D, so that + !> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed + !> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing + !> i-1 steps of Gaussian elimination. This means that the diagonal + !> element at each step of "modified" Gaussian elimination is + !> at least one in absolute value (so that division-by-zero not + !> not possible during the division by the diagonal element); + !> L is a M-by-N lower triangular matrix with unit diagonal elements + !> (lower trapezoidal if M > N); + !> and U is a M-by-N upper triangular matrix + !> (upper trapezoidal if M < N). + !> This routine is an auxiliary routine used in the Householder + !> reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is + !> applied to an M-by-N matrix A with orthonormal columns, where each + !> element is bounded by one in absolute value. With the choice of + !> the matrix S above, one can show that the diagonal element at each + !> step of Gaussian elimination is the largest (in absolute value) in + !> the column on or below the diagonal, so that no pivoting is required + !> for numerical stability [1]. + !> For more details on the Householder reconstruction algorithm, + !> including the modified LU factorization, see [1]. + !> This is the blocked right-looking version of the algorithm, + !> calling Level 3 BLAS to update the submatrix. To factorize a block, + !> this routine calls the recursive routine ZLAUNHR_COL_GETRFNP2. + !> [1] "Reconstructing Householder vectors from tall-skinny QR", + !> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen, + !> E. Solomonik, J. Parallel Distrib. Comput., + !> vol. 85, pp. 3-31, 2015. + + pure subroutine stdlib_zlaunhr_col_getrfnp( m, n, a, lda, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: d(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_zlaunhr_col_getrfnp2( m, n, a, lda, d, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks. + call stdlib_zlaunhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo ) + + if( j+jb<=n ) then + ! compute block row of u. + call stdlib_ztrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,& + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_zlaunhr_col_getrfnp + + !> ZPBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and banded, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_zpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b,ldb, x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, l, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_zpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zpbtrs( uplo, n, kd, 1, afb, ldafb, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zpbrfs + + !> ZPBTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite band matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + + pure subroutine stdlib_zpbtrf( uplo, n, kd, ab, ldab, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + ! Array Arguments + complex(dp), intent(inout) :: ab(ldab,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 32 + integer(ilp), parameter :: ldwork = nbmax+1 + + + + ! Local Scalars + integer(ilp) :: i, i2, i3, ib, ii, j, jj, nb + ! Local Arrays + complex(dp) :: work(ldwork,nbmax) + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + info = 0 + if( ( .not.stdlib_lsame( uplo, 'U' ) ) .and.( .not.stdlib_lsame( uplo, 'L' ) ) ) & + then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( ldabkd ) then + ! use unblocked code + call stdlib_zpbtf2( uplo, n, kd, ab, ldab, info ) + else + ! use blocked code + if( stdlib_lsame( uplo, 'U' ) ) then + ! compute the cholesky factorization of a hermitian band + ! matrix, given the upper triangle of the matrix in band + ! storage. + ! zero the upper triangle of the work array. + do j = 1, nb + do i = 1, j - 1 + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_70: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_zpotf2( uplo, ib, ab( kd+1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 a12 a13 + ! a22 a23 + ! a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a12, a22 and + ! a23 are empty if ib = kd. the upper triangle of a13 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a12 + call stdlib_ztrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', & + ib, i2, cone,ab( kd+1, i ), ldab-1,ab( kd+1-ib, i+ib ), ldab-1 ) + ! update a22 + call stdlib_zherk( 'UPPER', 'CONJUGATE TRANSPOSE', i2, ib,-one, ab( kd+& + 1-ib, i+ib ), ldab-1, one,ab( kd+1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the lower triangle of a13 into the work array. + do jj = 1, i3 + do ii = jj, ib + work( ii, jj ) = ab( ii-jj+1, jj+i+kd-1 ) + end do + end do + ! update a13 (in the work array). + call stdlib_ztrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', & + ib, i3, cone,ab( kd+1, i ), ldab-1, work, ldwork ) + ! update a23 + if( i2>0 )call stdlib_zgemm( 'CONJUGATE TRANSPOSE','NO TRANSPOSE', i2, & + i3, ib, -cone,ab( kd+1-ib, i+ib ), ldab-1, work,ldwork, cone, ab( 1+ib, & + i+kd ),ldab-1 ) + ! update a33 + call stdlib_zherk( 'UPPER', 'CONJUGATE TRANSPOSE', i3, ib,-one, work, & + ldwork, one,ab( kd+1, i+kd ), ldab-1 ) + ! copy the lower triangle of a13 back into place. + do jj = 1, i3 + do ii = jj, ib + ab( ii-jj+1, jj+i+kd-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_70 + else + ! compute the cholesky factorization of a hermitian band + ! matrix, given the lower triangle of the matrix in band + ! storage. + ! zero the lower triangle of the work array. + do j = 1, nb + do i = j + 1, nb + work( i, j ) = zero + end do + end do + ! process the band matrix one diagonal block at a time. + loop_140: do i = 1, n, nb + ib = min( nb, n-i+1 ) + ! factorize the diagonal block + call stdlib_zpotf2( uplo, ib, ab( 1, i ), ldab-1, ii ) + if( ii/=0 ) then + info = i + ii - 1 + go to 150 + end if + if( i+ib<=n ) then + ! update the relevant part of the trailing submatrix. + ! if a11 denotes the diagonal block which has just been + ! factorized, then we need to update the remaining + ! blocks in the diagram: + ! a11 + ! a21 a22 + ! a31 a32 a33 + ! the numbers of rows and columns in the partitioning + ! are ib, i2, i3 respectively. the blocks a21, a22 and + ! a32 are empty if ib = kd. the lower triangle of a31 + ! lies outside the band. + i2 = min( kd-ib, n-i-ib+1 ) + i3 = min( ib, n-i-kd+1 ) + if( i2>0 ) then + ! update a21 + call stdlib_ztrsm( 'RIGHT', 'LOWER','CONJUGATE TRANSPOSE', 'NON-UNIT', & + i2,ib, cone, ab( 1, i ), ldab-1,ab( 1+ib, i ), ldab-1 ) + ! update a22 + call stdlib_zherk( 'LOWER', 'NO TRANSPOSE', i2, ib, -one,ab( 1+ib, i ), & + ldab-1, one,ab( 1, i+ib ), ldab-1 ) + end if + if( i3>0 ) then + ! copy the upper triangle of a31 into the work array. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + work( ii, jj ) = ab( kd+1-jj+ii, jj+i-1 ) + end do + end do + ! update a31 (in the work array). + call stdlib_ztrsm( 'RIGHT', 'LOWER','CONJUGATE TRANSPOSE', 'NON-UNIT', & + i3,ib, cone, ab( 1, i ), ldab-1, work,ldwork ) + ! update a32 + if( i2>0 )call stdlib_zgemm( 'NO TRANSPOSE','CONJUGATE TRANSPOSE', i3, & + i2, ib,-cone, work, ldwork, ab( 1+ib, i ),ldab-1, cone, ab( 1+kd-ib, i+& + ib ),ldab-1 ) + ! update a33 + call stdlib_zherk( 'LOWER', 'NO TRANSPOSE', i3, ib, -one,work, ldwork, & + one, ab( 1, i+kd ),ldab-1 ) + ! copy the upper triangle of a31 back into place. + do jj = 1, ib + do ii = 1, min( jj, i3 ) + ab( kd+1-jj+ii, jj+i-1 ) = work( ii, jj ) + end do + end do + end if + end if + end do loop_140 + end if + end if + return + 150 continue + return + end subroutine stdlib_zpbtrf + + !> ZPFTRS: solves a system of linear equations A*X = B with a Hermitian + !> positive definite matrix A using the Cholesky factorization + !> A = U**H*U or A = L*L**H computed by ZPFTRF. + + pure subroutine stdlib_zpftrs( transr, uplo, n, nrhs, a, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(dp), intent(in) :: a(0:*) + complex(dp), intent(inout) :: b(ldb,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, normaltransr + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb ZPORFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite, + !> and provides error bounds and backward error estimates for the + !> solution. + + pure subroutine stdlib_zporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ==================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zpotrs( uplo, n, 1, af, ldaf, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_zpotrs( uplo, n, 1, af, ldaf, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zpotrs( uplo, n, 1, af, ldaf, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zporfs + + !> ZPOTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zpotrf( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) then + ! use unblocked code. + call stdlib_zpotrf2( uplo, n, a, lda, info ) + else + ! use blocked code. + if( upper ) then + ! compute the cholesky factorization a = u**h *u. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_zherk( 'UPPER', 'CONJUGATE TRANSPOSE', jb, j-1,-one, a( 1, j ), & + lda, one, a( j, j ), lda ) + call stdlib_zpotrf2( 'UPPER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block row. + call stdlib_zgemm( 'CONJUGATE TRANSPOSE', 'NO TRANSPOSE', jb,n-j-jb+1, j-1,& + -cone, a( 1, j ), lda,a( 1, j+jb ), lda, cone, a( j, j+jb ),lda ) + call stdlib_ztrsm( 'LEFT', 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT', jb, & + n-j-jb+1, cone, a( j, j ),lda, a( j, j+jb ), lda ) + end if + end do + else + ! compute the cholesky factorization a = l*l**h. + do j = 1, n, nb + ! update and factorize the current diagonal block and test + ! for non-positive-definiteness. + jb = min( nb, n-j+1 ) + call stdlib_zherk( 'LOWER', 'NO TRANSPOSE', jb, j-1, -one,a( j, 1 ), lda, one,& + a( j, j ), lda ) + call stdlib_zpotrf2( 'LOWER', jb, a( j, j ), lda, info ) + if( info/=0 )go to 30 + if( j+jb<=n ) then + ! compute the current block column. + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',n-j-jb+1, jb, j-1,& + -cone, a( j+jb, 1 ),lda, a( j, 1 ), lda, cone, a( j+jb, j ),lda ) + call stdlib_ztrsm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','NON-UNIT', n-j-& + jb+1, jb, cone, a( j, j ),lda, a( j+jb, j ), lda ) + end if + end do + end if + end if + go to 40 + 30 continue + info = info + j - 1 + 40 continue + return + end subroutine stdlib_zpotrf + + !> ZPOTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by ZPOTRF. + + pure subroutine stdlib_zpotri( uplo, n, a, lda, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 )return + ! form inv(u) * inv(u)**h or inv(l)**h * inv(l). + call stdlib_zlauum( uplo, n, a, lda, info ) + return + end subroutine stdlib_zpotri + + !> ZPPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_zpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ==================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zpptrs( uplo, n, 1, afp, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_zpptrs( uplo, n, 1, afp, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zpptrs( uplo, n, 1, afp, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zpprfs + + !> ZPPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_zppsv( uplo, n, nrhs, ap, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + complex(dp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZPPSVX: uses the Cholesky factorization A = U**H * U or A = L * L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix stored in + !> packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zppsvx( fact, uplo, n, nrhs, ap, afp, equed, s, b, ldb,x, ldx, rcond, ferr,& + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(inout) :: s(*) + complex(dp), intent(inout) :: afp(*), ap(*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(dp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( stdlib_lsame( fact, 'F' ) .and. .not.( rcequ .or. stdlib_lsame( equed, 'N' ) )& + ) then + info = -7 + else + if( rcequ ) then + smin = bignum + smax = zero + do j = 1, n + smin = min( smin, s( j ) ) + smax = max( smax, s( j ) ) + end do + if( smin<=zero ) then + info = -8 + else if( n>0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlanhp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zppcon( uplo, n, afp, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zpptrs( uplo, n, nrhs, afp, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_zpprfs( uplo, n, nrhs, ap, afp, b, ldb, x, ldx, ferr, berr,work, rwork, & + info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPPTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by ZPPTRF. + + pure subroutine stdlib_zpptri( uplo, n, ap, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(inout) :: ap(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: j, jc, jj, jjn + real(dp) :: ajj + ! Intrinsic Functions + intrinsic :: real + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPPTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_ztptri( uplo, 'NON-UNIT', n, ap, info ) + if( info>0 )return + if( upper ) then + ! compute the product inv(u) * inv(u)**h. + jj = 0 + do j = 1, n + jc = jj + 1 + jj = jj + j + if( j>1 )call stdlib_zhpr( 'UPPER', j-1, one, ap( jc ), 1, ap ) + ajj = real( ap( jj ),KIND=dp) + call stdlib_zdscal( j, ajj, ap( jc ), 1 ) + end do + else + ! compute the product inv(l)**h * inv(l). + jj = 1 + do j = 1, n + jjn = jj + n - j + 1 + ap( jj ) = real( stdlib_zdotc( n-j+1, ap( jj ), 1, ap( jj ), 1 ),KIND=dp) + if( j ZPTEQR: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric positive definite tridiagonal matrix by first factoring the + !> matrix using DPTTRF and then calling ZBDSQR to compute the singular + !> values of the bidiagonal factor. + !> This routine computes the eigenvalues of the positive definite + !> tridiagonal matrix to high relative accuracy. This means that if the + !> eigenvalues range over many orders of magnitude in size, then the + !> small eigenvalues and corresponding eigenvectors will be computed + !> more accurately than, for example, with the standard QR method. + !> The eigenvectors of a full or band positive definite Hermitian matrix + !> can also be found if ZHETRD, ZHPTRD, or ZHBTRD has been used to + !> reduce this matrix to tridiagonal form. (The reduction to + !> tridiagonal form, however, may preclude the possibility of obtaining + !> high relative accuracy in the small eigenvalues of the original + !> matrix, if these eigenvalues range over many orders of magnitude.) + + pure subroutine stdlib_zpteqr( compz, n, d, e, z, ldz, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(ldz,*) + ! ==================================================================== + + ! Local Arrays + complex(dp) :: c(1,1), vt(1,1) + ! Local Scalars + integer(ilp) :: i, icompz, nru + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or. ( icompz>0 .and. ldz0 )z( 1, 1 ) = cone + return + end if + if( icompz==2 )call stdlib_zlaset( 'FULL', n, n, czero, cone, z, ldz ) + ! call stdlib_dpttrf to factor the matrix. + call stdlib_dpttrf( n, d, e, info ) + if( info/=0 )return + do i = 1, n + d( i ) = sqrt( d( i ) ) + end do + do i = 1, n - 1 + e( i ) = e( i )*d( i ) + end do + ! call stdlib_zbdsqr to compute the singular values/vectors of the + ! bidiagonal factor. + if( icompz>0 ) then + nru = n + else + nru = 0 + end if + call stdlib_zbdsqr( 'LOWER', n, 0, nru, 0, d, e, vt, 1, z, ldz, c, 1,work, info ) + + ! square the singular values. + if( info==0 ) then + do i = 1, n + d( i ) = d( i )*d( i ) + end do + else + info = n + info + end if + return + end subroutine stdlib_zpteqr + + !> ZPTTRS: solves a tridiagonal system of the form + !> A * X = B + !> using the factorization A = U**H *D* U or A = L*D*L**H computed by ZPTTRF. + !> D is a diagonal matrix specified in the vector D, U (or L) is a unit + !> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in + !> the vector E, and X and B are N by NRHS matrices. + + pure subroutine stdlib_zpttrs( uplo, n, nrhs, d, e, b, ldb, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(in) :: d(*) + complex(dp), intent(inout) :: b(ldb,*) + complex(dp), intent(in) :: e(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: iuplo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments. + info = 0 + upper = ( uplo=='U' .or. uplo=='U' ) + if( .not.upper .and. .not.( uplo=='L' .or. uplo=='L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb=nrhs ) then + call stdlib_zptts2( iuplo, n, nrhs, d, e, b, ldb ) + else + do j = 1, nrhs, nb + jb = min( nrhs-j+1, nb ) + call stdlib_zptts2( iuplo, n, jb, d, e, b( 1, j ), ldb ) + end do + end if + return + end subroutine stdlib_zpttrs + + !> ZSPCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric packed matrix A using the + !> factorization A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zspcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ip, kase + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm0 .and. ap( ip )==zero )return + ip = ip - i + end do + else + ! lower triangular storage: examine d from top to bottom. + ip = 1 + do i = 1, n + if( ipiv( i )>0 .and. ap( ip )==zero )return + ip = ip + n - i + 1 + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_zsptrs( uplo, n, 1, ap, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_zspcon + + !> ZSPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_zsprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, work,& + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zsptrs( uplo, n, 1, afp, ipiv, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_zsptrs( uplo, n, 1, afp, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zsptrs( uplo, n, 1, afp, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zsprfs + + !> ZSPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is symmetric and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + + pure subroutine stdlib_zspsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZSPSVX: uses the diagonal pivoting factorization A = U*D*U**T or + !> A = L*D*L**T to compute the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix stored + !> in packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zspsvx( fact, uplo, n, nrhs, ap, afp, ipiv, b, ldb, x,ldx, rcond, ferr, & + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(inout) :: afp(*) + complex(dp), intent(in) :: ap(*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlansp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zspcon( uplo, n, afp, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zsptrs( uplo, n, nrhs, afp, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_zsprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZSTEMR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> Depending on the number of desired eigenvalues, these are computed either + !> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are + !> computed by the use of various suitable L D L^T factorizations near clusters + !> of close eigenvalues (referred to as RRRs, Relatively Robust + !> Representations). An informal sketch of the algorithm follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> For more details, see: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Further Details + !> 1.ZSTEMR works only on machines which follow IEEE-754 + !> floating-point standard in their handling of infinities and NaNs. + !> This permits the use of efficient inner loops avoiding a check for + !> zero divisors. + !> 2. LAPACK routines can be used to reduce a complex Hermitean matrix to + !> real symmetric tridiagonal form. + !> (Any complex Hermitean tridiagonal matrix has real values on its diagonal + !> and potentially complex numbers on its off-diagonals. By applying a + !> similarity transform with an appropriate diagonal matrix + !> diag(1,e^{i \phy_1}, ... , e^{i \phy_{n-1}}), the complex Hermitean + !> matrix can be transformed into a real symmetric matrix and complex + !> arithmetic can be entirely avoided.) + !> While the eigenvectors of the real symmetric tridiagonal matrix are real, + !> the eigenvectors of original complex Hermitean matrix have complex entries + !> in general. + !> Since LAPACK drivers overwrite the matrix data with the eigenvectors, + !> ZSTEMR accepts complex workspace to facilitate interoperability + !> with ZUNMTR or ZUPMTR. + + pure subroutine stdlib_zstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, nzc, & + isuppz, tryrac, work, lwork,iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + logical(lk), intent(inout) :: tryrac + integer(ilp), intent(in) :: il, iu, ldz, nzc, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: w(*), work(*) + complex(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: minrgp = 1.0e-3_dp + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, valeig, wantz, zquery + integer(ilp) :: i, ibegin, iend, ifirst, iil, iindbl, iindw, iindwk, iinfo, iinspl, & + iiu, ilast, in, indd, inde2, inderr, indgp, indgrs, indwrk, itmp, itmp2, j, jblk, jj, & + liwmin, lwmin, nsplit, nzcmin, offset, wbegin, wend + real(dp) :: bignum, cs, eps, pivmin, r1, r2, rmax, rmin, rtol1, rtol2, safmin, scale, & + smlnum, sn, thresh, tmp, tnrm, wl, wu + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ).or.( liwork==-1 ) ) + zquery = ( nzc==-1 ) + ! stdlib_dstemr needs work of size 6*n, iwork of size 3*n. + ! in addition, stdlib_dlarre needs work of size 6*n, iwork of size 5*n. + ! furthermore, stdlib_zlarrv needs work of size 12*n, iwork of size 7*n. + if( wantz ) then + lwmin = 18*n + liwmin = 10*n + else + ! need less workspace if only the eigenvalues are wanted + lwmin = 12*n + liwmin = 8*n + endif + wl = zero + wu = zero + iil = 0 + iiu = 0 + nsplit = 0 + if( valeig ) then + ! we do not reference vl, vu in the cases range = 'i','a' + ! the interval (wl, wu] contains all the wanted eigenvalues. + ! it is either given by the user or computed in stdlib_dlarre. + wl = vl + wu = vu + elseif( indeig ) then + ! we do not reference il, iu in the cases range = 'v','a' + iil = il + iiu = iu + endif + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( valeig .and. n>0 .and. wu<=wl ) then + info = -7 + else if( indeig .and. ( iil<1 .or. iil>n ) ) then + info = -8 + else if( indeig .and. ( iiun ) ) then + info = -9 + else if( ldz<1 .or. ( wantz .and. ldz=d( 1 ) ) then + m = 1 + w( 1 ) = d( 1 ) + end if + end if + if( wantz.and.(.not.zquery) ) then + z( 1, 1 ) = one + isuppz(1) = 1 + isuppz(2) = 1 + end if + return + end if + if( n==2 ) then + if( .not.wantz ) then + call stdlib_dlae2( d(1), e(1), d(2), r1, r2 ) + else if( wantz.and.(.not.zquery) ) then + call stdlib_dlaev2( d(1), e(1), d(2), r1, r2, cs, sn ) + end if + if( alleig.or.(valeig.and.(r2>wl).and.(r2<=wu)).or.(indeig.and.(iil==1)) ) & + then + m = m+1 + w( m ) = r2 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = -sn + z( 2, m ) = cs + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + if( alleig.or.(valeig.and.(r1>wl).and.(r1<=wu)).or.(indeig.and.(iiu==2)) ) & + then + m = m+1 + w( m ) = r1 + if( wantz.and.(.not.zquery) ) then + z( 1, m ) = cs + z( 2, m ) = sn + ! note: at most one of sn and cs can be zero. + if (sn/=zero) then + if (cs/=zero) then + isuppz(2*m-1) = 1 + isuppz(2*m) = 2 + else + isuppz(2*m-1) = 1 + isuppz(2*m) = 1 + end if + else + isuppz(2*m-1) = 2 + isuppz(2*m) = 2 + end if + endif + endif + else + ! continue with general n + indgrs = 1 + inderr = 2*n + 1 + indgp = 3*n + 1 + indd = 4*n + 1 + inde2 = 5*n + 1 + indwrk = 6*n + 1 + iinspl = 1 + iindbl = n + 1 + iindw = 2*n + 1 + iindwk = 3*n + 1 + ! scale matrix to allowable range, if necessary. + ! the allowable range is related to the pivmin parameter; see the + ! comments in stdlib_dlarrd. the preference for scaling small values + ! up is heuristic; we expect users' matrices not to be close to the + ! rmax threshold. + scale = one + tnrm = stdlib_dlanst( 'M', n, d, e ) + if( tnrm>zero .and. tnrmrmax ) then + scale = rmax / tnrm + end if + if( scale/=one ) then + call stdlib_dscal( n, scale, d, 1 ) + call stdlib_dscal( n-1, scale, e, 1 ) + tnrm = tnrm*scale + if( valeig ) then + ! if eigenvalues in interval have to be found, + ! scale (wl, wu] accordingly + wl = wl*scale + wu = wu*scale + endif + end if + ! compute the desired eigenvalues of the tridiagonal after splitting + ! into smaller subblocks if the corresponding off-diagonal elements + ! are small + ! thresh is the splitting parameter for stdlib_dlarre + ! a negative thresh forces the old splitting criterion based on the + ! size of the off-diagonal. a positive thresh switches to splitting + ! which preserves relative accuracy. + if( tryrac ) then + ! test whether the matrix warrants the more expensive relative approach. + call stdlib_dlarrr( n, d, e, iinfo ) + else + ! the user does not care about relative accurately eigenvalues + iinfo = -1 + endif + ! set the splitting criterion + if (iinfo==0) then + thresh = eps + else + thresh = -eps + ! relative accuracy is desired but t does not guarantee it + tryrac = .false. + endif + if( tryrac ) then + ! copy original diagonal, needed to guarantee relative accuracy + call stdlib_dcopy(n,d,1,work(indd),1) + endif + ! store the squares of the offdiagonal values of t + do j = 1, n-1 + work( inde2+j-1 ) = e(j)**2 + end do + ! set the tolerance parameters for bisection + if( .not.wantz ) then + ! stdlib_dlarre computes the eigenvalues to full precision. + rtol1 = four * eps + rtol2 = four * eps + else + ! stdlib_dlarre computes the eigenvalues to less than full precision. + ! stdlib_zlarrv will refine the eigenvalue approximations, and we only + ! need less accurate initial bisection in stdlib_dlarre. + ! note: these settings do only affect the subset case and stdlib_dlarre + rtol1 = sqrt(eps) + rtol2 = max( sqrt(eps)*5.0e-3_dp, four * eps ) + endif + call stdlib_dlarre( range, n, wl, wu, iil, iiu, d, e,work(inde2), rtol1, rtol2, & + thresh, nsplit,iwork( iinspl ), m, w, work( inderr ),work( indgp ), iwork( iindbl ),& + iwork( iindw ), work( indgrs ), pivmin,work( indwrk ), iwork( iindwk ), iinfo ) + + if( iinfo/=0 ) then + info = 10 + abs( iinfo ) + return + end if + ! note that if range /= 'v', stdlib_dlarre computes bounds on the desired + ! part of the spectrum. all desired eigenvalues are contained in + ! (wl,wu] + if( wantz ) then + ! compute the desired eigenvectors corresponding to the computed + ! eigenvalues + call stdlib_zlarrv( n, wl, wu, d, e,pivmin, iwork( iinspl ), m,1, m, minrgp, & + rtol1, rtol2,w, work( inderr ), work( indgp ), iwork( iindbl ),iwork( iindw ), & + work( indgrs ), z, ldz,isuppz, work( indwrk ), iwork( iindwk ), iinfo ) + if( iinfo/=0 ) then + info = 20 + abs( iinfo ) + return + end if + else + ! stdlib_dlarre computes eigenvalues of the (shifted) root representation + ! stdlib_zlarrv returns the eigenvalues of the unshifted matrix. + ! however, if the eigenvectors are not desired by the user, we need + ! to apply the corresponding shifts from stdlib_dlarre to obtain the + ! eigenvalues of the original matrix. + do j = 1, m + itmp = iwork( iindbl+j-1 ) + w( j ) = w( j ) + e( iwork( iinspl+itmp-1 ) ) + end do + end if + if ( tryrac ) then + ! refine computed eigenvalues so that they are relatively accurate + ! with respect to the original matrix t. + ibegin = 1 + wbegin = 1 + loop_39: do jblk = 1, iwork( iindbl+m-1 ) + iend = iwork( iinspl+jblk-1 ) + in = iend - ibegin + 1 + wend = wbegin - 1 + ! check if any eigenvalues have to be refined in this block + 36 continue + if( wend1 .or. n==2 ) then + if( .not. wantz ) then + call stdlib_dlasrt( 'I', m, w, iinfo ) + if( iinfo/=0 ) then + info = 3 + return + end if + else + do j = 1, m - 1 + i = 0 + tmp = w( j ) + do jj = j + 1, m + if( w( jj ) ZSYCON: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by ZSYTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zsycon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_zsytrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_zsycon + + !> ZSYCON_ROOK: estimates the reciprocal of the condition number (in the + !> 1-norm) of a complex symmetric matrix A using the factorization + !> A = U*D*U**T or A = L*D*L**T computed by ZSYTRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zsycon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==czero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==czero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**t) or inv(u*d*u**t). + call stdlib_zsytrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_zsycon_rook + + !> ZSYRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is symmetric indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_zsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zsytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**t). + call stdlib_zsytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zsytrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zsyrfs + + !> ZSYSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_zsysv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N symmetric matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**T (or L**T) is the transpose of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is symmetric and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> ZSYTRF_RK is called to compute the factorization of a complex + !> symmetric matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine ZSYTRS_3. + + pure subroutine stdlib_zsysv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSV_ROOK: computes the solution to a complex system of linear + !> equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is symmetric and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> ZSYTRF_ROOK is called to compute the factorization of a complex + !> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling ZSYTRS_ROOK. + + pure subroutine stdlib_zsysv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYSVX: uses the diagonal pivoting factorization to compute the + !> solution to a complex system of linear equations A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zsysvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: af(ldaf,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlansy( 'I', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zsycon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zsytrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_zsyrfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZTBCON: estimates the reciprocal of the condition number of a + !> triangular band matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_ztbcon( norm, uplo, diag, n, kd, ab, ldab, rcond, work,rwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, n + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: ab(ldab,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab 0. + if( anorm>zero ) then + ! estimate the 1-norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_zlatbs( uplo, 'NO TRANSPOSE', diag, normin, n, kd,ab, ldab, work, & + scale, rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_zlatbs( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, kd, ab, ldab,& + work, scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_izamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale ZTFTRI: computes the inverse of a triangular matrix A stored in RFP + !> format. + !> This is a Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_ztftri( transr, uplo, diag, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo, diag + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(inout) :: a(0:*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( .not.stdlib_lsame( diag, 'N' ) .and. .not.stdlib_lsame( diag, 'U' ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_ztrtri( 'L', diag, n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_ztrmm( 'R', 'L', 'N', diag, n2, n1, -cone, a( 0 ),n, a( n1 ), n ) + + call stdlib_ztrtri( 'U', diag, n2, a( n ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ztrmm( 'L', 'U', 'C', diag, n2, n1, cone, a( n ), n,a( n1 ), n ) + + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_ztrtri( 'L', diag, n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_ztrmm( 'L', 'L', 'C', diag, n1, n2, -cone, a( n2 ),n, a( 0 ), n ) + + call stdlib_ztrtri( 'U', diag, n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ztrmm( 'R', 'U', 'N', diag, n1, n2, cone, a( n1 ),n, a( 0 ), n ) + + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_ztrtri( 'U', diag, n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_ztrmm( 'L', 'U', 'N', diag, n1, n2, -cone, a( 0 ),n1, a( n1*n1 ), & + n1 ) + call stdlib_ztrtri( 'L', diag, n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ztrmm( 'R', 'L', 'C', diag, n1, n2, cone, a( 1 ),n1, a( n1*n1 ), & + n1 ) + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_ztrtri( 'U', diag, n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_ztrmm( 'R', 'U', 'C', diag, n2, n1, -cone,a( n2*n2 ), n2, a( 0 ), & + n2 ) + call stdlib_ztrtri( 'L', diag, n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + if( info>0 )return + call stdlib_ztrmm( 'L', 'L', 'N', diag, n2, n1, cone,a( n1*n2 ), n2, a( 0 ), & + n2 ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_ztrtri( 'L', diag, k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_ztrmm( 'R', 'L', 'N', diag, k, k, -cone, a( 1 ),n+1, a( k+1 ), n+& + 1 ) + call stdlib_ztrtri( 'U', diag, k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ztrmm( 'L', 'U', 'C', diag, k, k, cone, a( 0 ), n+1,a( k+1 ), n+1 & + ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_ztrtri( 'L', diag, k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_ztrmm( 'L', 'L', 'C', diag, k, k, -cone, a( k+1 ),n+1, a( 0 ), n+& + 1 ) + call stdlib_ztrtri( 'U', diag, k, a( k ), n+1, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ztrmm( 'R', 'U', 'N', diag, k, k, cone, a( k ), n+1,a( 0 ), n+1 ) + + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_ztrtri( 'U', diag, k, a( k ), k, info ) + if( info>0 )return + call stdlib_ztrmm( 'L', 'U', 'N', diag, k, k, -cone, a( k ), k,a( k*( k+1 ) ),& + k ) + call stdlib_ztrtri( 'L', diag, k, a( 0 ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ztrmm( 'R', 'L', 'C', diag, k, k, cone, a( 0 ), k,a( k*( k+1 ) ), & + k ) + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_ztrtri( 'U', diag, k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_ztrmm( 'R', 'U', 'C', diag, k, k, -cone,a( k*( k+1 ) ), k, a( 0 ),& + k ) + call stdlib_ztrtri( 'L', diag, k, a( k*k ), k, info ) + if( info>0 )info = info + k + if( info>0 )return + call stdlib_ztrmm( 'L', 'L', 'N', diag, k, k, cone, a( k*k ), k,a( 0 ), k ) + + end if + end if + end if + return + end subroutine stdlib_ztftri + + !> ZTGSJA: computes the generalized singular value decomposition (GSVD) + !> of two complex upper triangular (or trapezoidal) matrices A and B. + !> On entry, it is assumed that matrices A and B have the following + !> forms, which may be obtained by the preprocessing subroutine ZGGSVP + !> from a general M-by-N matrix A and P-by-N matrix B: + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L >= 0; + !> L ( 0 0 A23 ) + !> M-K-L ( 0 0 0 ) + !> N-K-L K L + !> A = K ( 0 A12 A13 ) if M-K-L < 0; + !> M-K ( 0 0 A23 ) + !> N-K-L K L + !> B = L ( 0 0 B13 ) + !> P-L ( 0 0 0 ) + !> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular + !> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, + !> otherwise A23 is (M-K)-by-L upper trapezoidal. + !> On exit, + !> U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), + !> where U, V and Q are unitary matrices. + !> R is a nonsingular upper triangular matrix, and D1 + !> and D2 are ``diagonal'' matrices, which are of the following + !> structures: + !> If M-K-L >= 0, + !> K L + !> D1 = K ( I 0 ) + !> L ( 0 C ) + !> M-K-L ( 0 0 ) + !> K L + !> D2 = L ( 0 S ) + !> P-L ( 0 0 ) + !> N-K-L K L + !> ( 0 R ) = K ( 0 R11 R12 ) K + !> L ( 0 0 R22 ) L + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), + !> S = diag( BETA(K+1), ... , BETA(K+L) ), + !> C**2 + S**2 = I. + !> R is stored in A(1:K+L,N-K-L+1:N) on exit. + !> If M-K-L < 0, + !> K M-K K+L-M + !> D1 = K ( I 0 0 ) + !> M-K ( 0 C 0 ) + !> K M-K K+L-M + !> D2 = M-K ( 0 S 0 ) + !> K+L-M ( 0 0 I ) + !> P-L ( 0 0 0 ) + !> N-K-L K M-K K+L-M + !> ( 0 R ) = K ( 0 R11 R12 R13 ) + !> M-K ( 0 0 R22 R23 ) + !> K+L-M ( 0 0 0 R33 ) + !> where + !> C = diag( ALPHA(K+1), ... , ALPHA(M) ), + !> S = diag( BETA(K+1), ... , BETA(M) ), + !> C**2 + S**2 = I. + !> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored + !> ( 0 R22 R23 ) + !> in B(M-K+1:L,N+M-K-L+1:N) on exit. + !> The computation of the unitary transformation matrices U, V or Q + !> is optional. These matrices may either be formed explicitly, or they + !> may be postmultiplied into input matrices U1, V1, or Q1. + + pure subroutine stdlib_ztgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & + alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobq, jobu, jobv + integer(ilp), intent(out) :: info, ncycle + integer(ilp), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p + real(dp), intent(in) :: tola, tolb + ! Array Arguments + real(dp), intent(out) :: alpha(*), beta(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: maxit = 40 + real(dp), parameter :: hugenum = huge(zero) + + + + ! Local Scalars + logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv + integer(ilp) :: i, j, kcycle + real(dp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin + complex(dp) :: a2, b2, snq, snu, snv + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,min,huge + ! Executable Statements + ! decode and test the input parameters + initu = stdlib_lsame( jobu, 'I' ) + wantu = initu .or. stdlib_lsame( jobu, 'U' ) + initv = stdlib_lsame( jobv, 'I' ) + wantv = initv .or. stdlib_lsame( jobv, 'V' ) + initq = stdlib_lsame( jobq, 'I' ) + wantq = initq .or. stdlib_lsame( jobq, 'Q' ) + info = 0 + if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -1 + else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -2 + else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( p<0 ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda=-hugenum) ) then + if( gamma=beta( k+i ) ) then + call stdlib_zdscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) + else + call stdlib_zdscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) + call stdlib_zcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + else + alpha( k+i ) = zero + beta( k+i ) = one + call stdlib_zcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) + end if + end do + ! post-assignment + do i = m + 1, k + l + alpha( i ) = zero + beta( i ) = one + end do + if( k+l ZTGSY2: solves the generalized Sylvester equation + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, + !> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, + !> N-by-N and M-by-N, respectively. A, B, D and E are upper triangular + !> (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output + !> scaling factor chosen to avoid overflow. + !> In matrix notation solving equation (1) corresponds to solve + !> Zx = scale * b, where Z is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Ik is the identity matrix of size k and X**H is the conjuguate transpose of X. + !> kron(X, Y) is the Kronecker product between the matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case is used to compute an estimate of Dif[(A, D), (B, E)] = + !> = sigma_min(Z) using reverse communication with ZLACON. + !> ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL + !> of an upper bound on the separation between to matrix pairs. Then + !> the input (A, D), (B, E) are sub-pencils of two matrix pairs in + !> ZTGSYL. + + pure subroutine stdlib_ztgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, rdsum, rdscal,info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n + integer(ilp), intent(out) :: info + real(dp), intent(inout) :: rdscal, rdsum + real(dp), intent(out) :: scale + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + complex(dp), intent(inout) :: c(ldc,*), f(ldf,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ldz = 2 + + + ! Local Scalars + logical(lk) :: notran + integer(ilp) :: i, ierr, j, k + real(dp) :: scaloc + complex(dp) :: alpha + ! Local Arrays + integer(ilp) :: ipiv(ldz), jpiv(ldz) + complex(dp) :: rhs(ldz), z(ldz,ldz) + ! Intrinsic Functions + intrinsic :: cmplx,conjg,max + ! Executable Statements + ! decode and test input parameters + info = 0 + ierr = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>2 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda0 )info = ierr + if( ijob==0 ) then + call stdlib_zgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp),c( 1, k ), 1 ) + + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp),f( 1, k ), 1 ) + + end do + scale = scale*scaloc + end if + else + call stdlib_zlatdf( ijob, ldz, z, ldz, rhs, rdsum, rdscal,ipiv, jpiv ) + + end if + ! unpack solution vector(s) + c( i, j ) = rhs( 1 ) + f( i, j ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining equation. + if( i>1 ) then + alpha = -rhs( 1 ) + call stdlib_zaxpy( i-1, alpha, a( 1, i ), 1, c( 1, j ), 1 ) + call stdlib_zaxpy( i-1, alpha, d( 1, i ), 1, f( 1, j ), 1 ) + end if + if( j0 )info = ierr + call stdlib_zgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc ) + if( scaloc/=one ) then + do k = 1, n + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp), c( 1, k ),1 ) + + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! unpack solution vector(s) + c( i, j ) = rhs( 1 ) + f( i, j ) = rhs( 2 ) + ! substitute r(i, j) and l(i, j) into remaining equation. + do k = 1, j - 1 + f( i, k ) = f( i, k ) + rhs( 1 )*conjg( b( k, j ) ) +rhs( 2 )*conjg( e( k, & + j ) ) + end do + do k = i + 1, m + c( k, j ) = c( k, j ) - conjg( a( i, k ) )*rhs( 1 ) -conjg( d( i, k ) )& + *rhs( 2 ) + end do + end do loop_70 + end do loop_80 + end if + return + end subroutine stdlib_ztgsy2 + + !> ZTGSYL: solves the generalized Sylvester equation: + !> A * R - L * B = scale * C (1) + !> D * R - L * E = scale * F + !> where R and L are unknown m-by-n matrices, (A, D), (B, E) and + !> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, + !> respectively, with complex entries. A, B, D and E are upper + !> triangular (i.e., (A,D) and (B,E) in generalized Schur form). + !> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 + !> is an output scaling factor chosen to avoid overflow. + !> In matrix notation (1) is equivalent to solve Zx = scale*b, where Z + !> is defined as + !> Z = [ kron(In, A) -kron(B**H, Im) ] (2) + !> [ kron(In, D) -kron(E**H, Im) ], + !> Here Ix is the identity matrix of size x and X**H is the conjugate + !> transpose of X. Kron(X, Y) is the Kronecker product between the + !> matrices X and Y. + !> If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b + !> is solved for, which is equivalent to solve for R and L in + !> A**H * R + D**H * L = scale * C (3) + !> R * B**H + L * E**H = scale * -F + !> This case (TRANS = 'C') is used to compute an one-norm-based estimate + !> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) + !> and (B,E), using ZLACON. + !> If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of + !> Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the + !> reciprocal of the smallest singular value of Z. + !> This is a level-3 BLAS algorithm. + + pure subroutine stdlib_ztgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, & + ldf, scale, dif, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n + integer(ilp), intent(out) :: info + real(dp), intent(out) :: dif, scale + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + complex(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*) + complex(dp), intent(inout) :: c(ldc,*), f(ldf,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! replaced various illegal calls to stdlib_ccopy by calls to stdlib_claset. + ! sven hammarling, 1/5/02. + + + ! Local Scalars + logical(lk) :: lquery, notran + integer(ilp) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, & + p, pq, q + real(dp) :: dscale, dsum, scale2, scaloc + ! Intrinsic Functions + intrinsic :: real,cmplx,max,sqrt + ! Executable Statements + ! decode and test input parameters + info = 0 + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -1 + else if( notran ) then + if( ( ijob<0 ) .or. ( ijob>4 ) ) then + info = -2 + end if + end if + if( info==0 ) then + if( m<=0 ) then + info = -3 + else if( n<=0 ) then + info = -4 + else if( lda=3 ) then + ifunc = ijob - 2 + call stdlib_zlaset( 'F', m, n, czero, czero, c, ldc ) + call stdlib_zlaset( 'F', m, n, czero, czero, f, ldf ) + else if( ijob>=1 .and. notran ) then + isolve = 2 + end if + end if + if( ( mb<=1 .and. nb<=1 ) .or. ( mb>=m .and. nb>=n ) )then + ! use unblocked level 2 solver + loop_30: do iround = 1, isolve + scale = one + dscale = zero + dsum = one + pq = m*n + call stdlib_ztgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,& + ldf, scale, dsum, dscale,info ) + if( dscale/=zero ) then + if( ijob==1 .or. ijob==3 ) then + dif = sqrt( real( 2*m*n,KIND=dp) ) / ( dscale*sqrt( dsum ) ) + else + dif = sqrt( real( pq,KIND=dp) ) / ( dscale*sqrt( dsum ) ) + end if + end if + if( isolve==2 .and. iround==1 ) then + if( notran ) then + ifunc = ijob + end if + scale2 = scale + call stdlib_zlacpy( 'F', m, n, c, ldc, work, m ) + call stdlib_zlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m ) + call stdlib_zlaset( 'F', m, n, czero, czero, c, ldc ) + call stdlib_zlaset( 'F', m, n, czero, czero, f, ldf ) + else if( isolve==2 .and. iround==2 ) then + call stdlib_zlacpy( 'F', m, n, work, m, c, ldc ) + call stdlib_zlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf ) + scale = scale2 + end if + end do loop_30 + return + end if + ! determine block structure of a + p = 0 + i = 1 + 40 continue + if( i>m )go to 50 + p = p + 1 + iwork( p ) = i + i = i + mb + if( i>=m )go to 50 + go to 40 + 50 continue + iwork( p+1 ) = m + 1 + if( iwork( p )==iwork( p+1 ) )p = p - 1 + ! determine block structure of b + q = p + 1 + j = 1 + 60 continue + if( j>n )go to 70 + q = q + 1 + iwork( q ) = j + j = j + nb + if( j>=n )go to 70 + go to 60 + 70 continue + iwork( q+1 ) = n + 1 + if( iwork( q )==iwork( q+1 ) )q = q - 1 + if( notran ) then + loop_150: do iround = 1, isolve + ! solve (i, j) - subsystem + ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j) + ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j) + ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q + pq = 0 + scale = one + dscale = zero + dsum = one + loop_130: do j = p + 2, q + js = iwork( j ) + je = iwork( j+1 ) - 1 + nb = je - js + 1 + loop_120: do i = p, 1, -1 + is = iwork( i ) + ie = iwork( i+1 ) - 1 + mb = ie - is + 1 + call stdlib_ztgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), & + ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, & + scaloc, dsum, dscale,linfo ) + if( linfo>0 )info = linfo + pq = pq + mb*nb + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp),c( 1, k ), 1 ) + + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_zscal( is-1, cmplx( scaloc, zero,KIND=dp),c( 1, k ), 1 ) + + call stdlib_zscal( is-1, cmplx( scaloc, zero,KIND=dp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),c( ie+1, k ), & + 1 ) + call stdlib_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),f( ie+1, k ), & + 1 ) + end do + do k = je + 1, n + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp),c( 1, k ), 1 ) + + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp),f( 1, k ), 1 ) + + end do + scale = scale*scaloc + end if + ! substitute r(i,j) and l(i,j) into remaining equation. + if( i>1 ) then + call stdlib_zgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=dp), a(& + 1, is ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=dp),c( 1, js ), & + ldc ) + call stdlib_zgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=dp), d(& + 1, is ), ldd,c( is, js ), ldc, cmplx( one, zero,KIND=dp),f( 1, js ), & + ldf ) + end if + if( j0 )info = linfo + if( scaloc/=one ) then + do k = 1, js - 1 + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp), c( 1, k ),1 ) + + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp), f( 1, k ),1 ) + + end do + do k = js, je + call stdlib_zscal( is-1, cmplx( scaloc, zero,KIND=dp),c( 1, k ), 1 ) + + call stdlib_zscal( is-1, cmplx( scaloc, zero,KIND=dp),f( 1, k ), 1 ) + + end do + do k = js, je + call stdlib_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),c( ie+1, k ), 1 ) + + call stdlib_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),f( ie+1, k ), 1 ) + + end do + do k = je + 1, n + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp), c( 1, k ),1 ) + + call stdlib_zscal( m, cmplx( scaloc, zero,KIND=dp), f( 1, k ),1 ) + + end do + scale = scale*scaloc + end if + ! substitute r(i,j) and l(i,j) into remaining equation. + if( j>p+2 ) then + call stdlib_zgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=dp), c( is,& + js ), ldc,b( 1, js ), ldb, cmplx( one, zero,KIND=dp),f( is, 1 ), ldf ) + + call stdlib_zgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=dp), f( is,& + js ), ldf,e( 1, js ), lde, cmplx( one, zero,KIND=dp),f( is, 1 ), ldf ) + + end if + if( i

ZTPCON: estimates the reciprocal of the condition number of a packed + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_ztpcon( norm, uplo, diag, n, ap, rcond, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZTPCON', -info ) + return + end if + ! quick return if possible + if( n==0 ) then + rcond = one + return + end if + rcond = zero + smlnum = stdlib_dlamch( 'SAFE MINIMUM' )*real( max( 1, n ),KIND=dp) + ! compute the norm of the triangular matrix a. + anorm = stdlib_zlantp( norm, uplo, diag, n, ap, rwork ) + ! continue only if anorm > 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_zlatps( uplo, 'NO TRANSPOSE', diag, normin, n, ap,work, scale, & + rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_zlatps( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, ap, work, & + scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_izamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale ZTPLQT: computes a blocked LQ factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_ztplqt( m, n, l, mb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, mb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, nb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( mb<1 .or. (mb>m .and. m>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_ztplqt2( ib, nb, lb, a(i,i), lda, b( i, 1 ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**t to b(i+ib:m,:) from the right + if( i+ib<=m ) then + call stdlib_ztprfb( 'R', 'N', 'F', 'R', m-i-ib+1, nb, ib, lb,b( i, 1 ), ldb, t( & + 1, i ), ldt,a( i+ib, i ), lda, b( i+ib, 1 ), ldb,work, m-i-ib+1) + end if + end do + return + end subroutine stdlib_ztplqt + + !> ZTPMLQT: applies a complex unitary matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_ztpmlqt( side, trans, m, n, k, l, mb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, mb, ldt + ! Array Arguments + complex(dp), intent(in) :: v(ldv,*), t(ldt,*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, nb, lb, kf, ldaq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldaq = max( 1, k ) + else if ( right ) then + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( mb<1 .or. (mb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_ztprfb( 'L', 'C', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + do i = 1, k, mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_ztprfb( 'R', 'N', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. tran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = 0 + end if + call stdlib_ztprfb( 'L', 'N', 'F', 'R', nb, n, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + kf = ((k-1)/mb)*mb+1 + do i = kf, 1, -mb + ib = min( mb, k-i+1 ) + nb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = nb-n+l-i+1 + end if + call stdlib_ztprfb( 'R', 'C', 'F', 'R', m, nb, ib, lb,v( i, 1 ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_ztpmlqt + + !> ZTPMQRT: applies a complex orthogonal matrix Q obtained from a + !> "triangular-pentagonal" complex block reflector H to a general + !> complex matrix C, which consists of two blocks A and B. + + pure subroutine stdlib_ztpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, & + work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt + ! Array Arguments + complex(dp), intent(in) :: v(ldv,*), t(ldt,*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran + integer(ilp) :: i, ib, mb, lb, kf, ldaq, ldvq + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! Test The Input Arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + tran = stdlib_lsame( trans, 'C' ) + notran = stdlib_lsame( trans, 'N' ) + if ( left ) then + ldvq = max( 1, m ) + ldaq = max( 1, k ) + else if ( right ) then + ldvq = max( 1, n ) + ldaq = max( 1, m ) + end if + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 ) then + info = -5 + else if( l<0 .or. l>k ) then + info = -6 + else if( nb<1 .or. (nb>k .and. k>0) ) then + info = -7 + else if( ldv=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_ztprfb( 'L', 'C', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. notran ) then + do i = 1, k, nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_ztprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + else if( left .and. notran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( m-l+i+ib-1, m ) + if( i>=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_ztprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( i, 1 ), lda, b, ldb, work, ib ) + end do + else if( right .and. tran ) then + kf = ((k-1)/nb)*nb+1 + do i = kf, 1, -nb + ib = min( nb, k-i+1 ) + mb = min( n-l+i+ib-1, n ) + if( i>=l ) then + lb = 0 + else + lb = mb-n+l-i+1 + end if + call stdlib_ztprfb( 'R', 'C', 'F', 'C', m, mb, ib, lb,v( 1, i ), ldv, t( 1, i ), & + ldt,a( 1, i ), lda, b, ldb, work, m ) + end do + end if + return + end subroutine stdlib_ztpmqrt + + !> ZTPQRT: computes a blocked QR factorization of a complex + !> "triangular-pentagonal" matrix C, which is composed of a + !> triangular block A and pentagonal block B, using the compact + !> WY representation for Q. + + pure subroutine stdlib_ztpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldt, n, m, l, nb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, lb, mb, iinfo + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( l<0 .or. (l>min(m,n) .and. min(m,n)>=0)) then + info = -3 + else if( nb<1 .or. (nb>n .and. n>0)) then + info = -4 + else if( lda=l ) then + lb = 0 + else + lb = mb-m+l-i+1 + end if + call stdlib_ztpqrt2( mb, ib, lb, a(i,i), lda, b( 1, i ), ldb,t(1, i ), ldt, iinfo ) + + ! update by applying h**h to b(:,i+ib:n) from the left + if( i+ib<=n ) then + call stdlib_ztprfb( 'L', 'C', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1, i ), ldb, t( & + 1, i ), ldt,a( i, i+ib ), lda, b( 1, i+ib ), ldb,work, ib ) + end if + end do + return + end subroutine stdlib_ztpqrt + + !> ZTRCON: estimates the reciprocal of the condition number of a + !> triangular matrix A, in either the 1-norm or the infinity-norm. + !> The norm of A is computed and an estimate is obtained for + !> norm(inv(A)), then the reciprocal of the condition number is + !> computed as + !> RCOND = 1 / ( norm(A) * norm(inv(A)) ). + + subroutine stdlib_ztrcon( norm, uplo, diag, n, a, lda, rcond, work,rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: diag, norm, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nounit, onenrm, upper + character :: normin + integer(ilp) :: ix, kase, kase1 + real(dp) :: ainvnm, anorm, scale, smlnum, xnorm + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + nounit = stdlib_lsame( diag, 'N' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.nounit .and. .not.stdlib_lsame( diag, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda 0. + if( anorm>zero ) then + ! estimate the norm of the inverse of a. + ainvnm = zero + normin = 'N' + if( onenrm ) then + kase1 = 1 + else + kase1 = 2 + end if + kase = 0 + 10 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + if( kase==kase1 ) then + ! multiply by inv(a). + call stdlib_zlatrs( uplo, 'NO TRANSPOSE', diag, normin, n, a,lda, work, scale,& + rwork, info ) + else + ! multiply by inv(a**h). + call stdlib_zlatrs( uplo, 'CONJUGATE TRANSPOSE', diag, normin,n, a, lda, work,& + scale, rwork, info ) + end if + normin = 'Y' + ! multiply by 1/scale if doing so will not cause overflow. + if( scale/=one ) then + ix = stdlib_izamax( n, work, 1 ) + xnorm = cabs1( work( ix ) ) + if( scale ZTRSYL: solves the complex Sylvester matrix equation: + !> op(A)*X + X*op(B) = scale*C or + !> op(A)*X - X*op(B) = scale*C, + !> where op(A) = A or A**H, and A and B are both upper triangular. A is + !> M-by-M and B is N-by-N; the right hand side C and the solution X are + !> M-by-N; and scale is an output scale factor, set <= 1 to avoid + !> overflow in X. + + subroutine stdlib_ztrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb, c,ldc, scale, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trana, tranb + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: isgn, lda, ldb, ldc, m, n + real(dp), intent(out) :: scale + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: notrna, notrnb + integer(ilp) :: j, k, l + real(dp) :: bignum, da11, db, eps, scaloc, sgn, smin, smlnum + complex(dp) :: a11, suml, sumr, vec, x11 + ! Local Arrays + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min + ! Executable Statements + ! decode and test input parameters + notrna = stdlib_lsame( trana, 'N' ) + notrnb = stdlib_lsame( tranb, 'N' ) + info = 0 + if( .not.notrna .and. .not.stdlib_lsame( trana, 'C' ) ) then + info = -1 + else if( .not.notrnb .and. .not.stdlib_lsame( tranb, 'C' ) ) then + info = -2 + else if( isgn/=1 .and. isgn/=-1 ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldaone ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_zladiv( vec*cmplx( scaloc,KIND=dp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_zdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_30 + else if( .not.notrna .and. notrnb ) then + ! solve a**h *x + isgn*x*b = scale*c. + ! the (k,l)th block of x is determined starting from + ! upper-left corner column by column by + ! a**h(k,k)*x(k,l) + isgn*x(k,l)*b(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 l-1 + ! r(k,l) = sum [a**h(i,k)*x(i,l)] + isgn*sum [x(k,j)*b(j,l)] + ! i=1 j=1 + loop_60: do l = 1, n + do k = 1, m + suml = stdlib_zdotc( k-1, a( 1, k ), 1, c( 1, l ), 1 ) + sumr = stdlib_zdotu( l-1, c( k, 1 ), ldc, b( 1, l ), 1 ) + vec = c( k, l ) - ( suml+sgn*sumr ) + scaloc = one + a11 = conjg( a( k, k ) ) + sgn*b( l, l ) + da11 = abs( real( a11,KIND=dp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=dp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_zladiv( vec*cmplx( scaloc,KIND=dp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_zdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_60 + else if( .not.notrna .and. .not.notrnb ) then + ! solve a**h*x + isgn*x*b**h = c. + ! the (k,l)th block of x is determined starting from + ! upper-right corner column by column by + ! a**h(k,k)*x(k,l) + isgn*x(k,l)*b**h(l,l) = c(k,l) - r(k,l) + ! where + ! k-1 + ! r(k,l) = sum [a**h(i,k)*x(i,l)] + + ! i=1 + ! n + ! isgn*sum [x(k,j)*b**h(l,j)]. + ! j=l+1 + loop_90: do l = n, 1, -1 + do k = 1, m + suml = stdlib_zdotc( k-1, a( 1, k ), 1, c( 1, l ), 1 ) + sumr = stdlib_zdotc( n-l, c( k, min( l+1, n ) ), ldc,b( l, min( l+1, n ) ), & + ldb ) + vec = c( k, l ) - ( suml+sgn*conjg( sumr ) ) + scaloc = one + a11 = conjg( a( k, k )+sgn*b( l, l ) ) + da11 = abs( real( a11,KIND=dp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=dp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_zladiv( vec*cmplx( scaloc,KIND=dp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_zdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_90 + else if( notrna .and. .not.notrnb ) then + ! solve a*x + isgn*x*b**h = c. + ! the (k,l)th block of x is determined starting from + ! bottom-left corner column by column by + ! a(k,k)*x(k,l) + isgn*x(k,l)*b**h(l,l) = c(k,l) - r(k,l) + ! where + ! m n + ! r(k,l) = sum [a(k,i)*x(i,l)] + isgn*sum [x(k,j)*b**h(l,j)] + ! i=k+1 j=l+1 + loop_120: do l = n, 1, -1 + do k = m, 1, -1 + suml = stdlib_zdotu( m-k, a( k, min( k+1, m ) ), lda,c( min( k+1, m ), l ), 1 & + ) + sumr = stdlib_zdotc( n-l, c( k, min( l+1, n ) ), ldc,b( l, min( l+1, n ) ), & + ldb ) + vec = c( k, l ) - ( suml+sgn*conjg( sumr ) ) + scaloc = one + a11 = a( k, k ) + sgn*conjg( b( l, l ) ) + da11 = abs( real( a11,KIND=dp) ) + abs( aimag( a11 ) ) + if( da11<=smin ) then + a11 = smin + da11 = smin + info = 1 + end if + db = abs( real( vec,KIND=dp) ) + abs( aimag( vec ) ) + if( da11one ) then + if( db>bignum*da11 )scaloc = one / db + end if + x11 = stdlib_zladiv( vec*cmplx( scaloc,KIND=dp), a11 ) + if( scaloc/=one ) then + do j = 1, n + call stdlib_zdscal( m, scaloc, c( 1, j ), 1 ) + end do + scale = scale*scaloc + end if + c( k, l ) = x11 + end do + end do loop_120 + end if + return + end subroutine stdlib_ztrsyl + + !> ZUNBDB5: orthogonalizes the column vector + !> X = [ X1 ] + !> [ X2 ] + !> with respect to the columns of + !> Q = [ Q1 ] . + !> [ Q2 ] + !> The columns of Q must be orthonormal. + !> If the projection is zero according to Kahan's "twice is enough" + !> criterion, then some other vector from the orthogonal complement + !> is returned. This vector is chosen in an arbitrary but deterministic + !> way. + + pure subroutine stdlib_zunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(in) :: q1(ldq1,*), q2(ldq2,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x1(*), x2(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, j + ! Intrinsic Function + intrinsic :: max + ! Executable Statements + ! test input arguments + info = 0 + if( m1 < 0 ) then + info = -1 + else if( m2 < 0 ) then + info = -2 + else if( n < 0 ) then + info = -3 + else if( incx1 < 1 ) then + info = -5 + else if( incx2 < 1 ) then + info = -7 + else if( ldq1 < max( 1, m1 ) ) then + info = -9 + else if( ldq2 < max( 1, m2 ) ) then + info = -11 + else if( lwork < n ) then + info = -13 + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB5', -info ) + return + end if + ! project x onto the orthogonal complement of q + call stdlib_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2,work, lwork, & + childinfo ) + ! if the projection is nonzero, then return + if( stdlib_dznrm2(m1,x1,incx1) /= czero.or. stdlib_dznrm2(m2,x2,incx2) /= czero ) & + then + return + end if + ! project each standard basis vector e_1,...,e_m1 in turn, stopping + ! when a nonzero projection is found + do i = 1, m1 + do j = 1, m1 + x1(j) = czero + end do + x1(i) = cone + do j = 1, m2 + x2(j) = czero + end do + call stdlib_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, childinfo ) + if( stdlib_dznrm2(m1,x1,incx1) /= czero.or. stdlib_dznrm2(m2,x2,incx2) /= czero ) & + then + return + end if + end do + ! project each standard basis vector e_(m1+1),...,e_(m1+m2) in turn, + ! stopping when a nonzero projection is found + do i = 1, m2 + do j = 1, m1 + x1(j) = czero + end do + do j = 1, m2 + x2(j) = czero + end do + x2(i) = cone + call stdlib_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & + lwork, childinfo ) + if( stdlib_dznrm2(m1,x1,incx1) /= czero.or. stdlib_dznrm2(m2,x2,incx2) /= czero ) & + then + return + end if + end do + return + end subroutine stdlib_zunbdb5 + + !> ZUNCSD: computes the CS decomposition of an M-by-M partitioned + !> unitary matrix X: + !> [ I 0 0 | 0 0 0 ] + !> [ 0 C 0 | 0 -S 0 ] + !> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H + !> X = [-----------] = [---------] [---------------------] [---------] . + !> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] + !> [ 0 S 0 | 0 C 0 ] + !> [ 0 0 I | 0 0 0 ] + !> X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, + !> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are + !> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in + !> which R = MIN(P,M-P,Q,M-Q). + + recursive subroutine stdlib_zuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, x11, & + ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, & + work, lwork, rwork, lrwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, signs, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, ldx11, ldx12, ldx21, ldx22, & + lrwork, lwork, m, p, q + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: theta(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*), work(*) + + complex(dp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) + + ! =================================================================== + + ! Local Scalars + character :: transt, signst + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, itauq2, j, lbbcsdwork, & + lbbcsdworkmin, lbbcsdworkopt, lorbdbwork, lorbdbworkmin, lorbdbworkopt, lorglqwork, & + lorglqworkmin, lorglqworkopt, lorgqrwork, lorgqrworkmin, lorgqrworkopt, lworkmin, & + lworkopt, p1, q1 + logical(lk) :: colmajor, defaultsigns, lquery, wantu1, wantu2, wantv1t, wantv2t + integer(ilp) :: lrworkmin, lrworkopt + logical(lk) :: lrquery + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + wantv2t = stdlib_lsame( jobv2t, 'Y' ) + colmajor = .not. stdlib_lsame( trans, 'T' ) + defaultsigns = .not. stdlib_lsame( signs, 'O' ) + lquery = lwork == -1 + lrquery = lrwork == -1 + if( m < 0 ) then + info = -7 + else if( p < 0 .or. p > m ) then + info = -8 + else if( q < 0 .or. q > m ) then + info = -9 + else if ( colmajor .and. ldx11 < max( 1, p ) ) then + info = -11 + else if (.not. colmajor .and. ldx11 < max( 1, q ) ) then + info = -11 + else if (colmajor .and. ldx12 < max( 1, p ) ) then + info = -13 + else if (.not. colmajor .and. ldx12 < max( 1, m-q ) ) then + info = -13 + else if (colmajor .and. ldx21 < max( 1, m-p ) ) then + info = -15 + else if (.not. colmajor .and. ldx21 < max( 1, q ) ) then + info = -15 + else if (colmajor .and. ldx22 < max( 1, m-p ) ) then + info = -17 + else if (.not. colmajor .and. ldx22 < max( 1, m-q ) ) then + info = -17 + else if( wantu1 .and. ldu1 < p ) then + info = -20 + else if( wantu2 .and. ldu2 < m-p ) then + info = -22 + else if( wantv1t .and. ldv1t < q ) then + info = -24 + else if( wantv2t .and. ldv2t < m-q ) then + info = -26 + end if + ! work with transpose if convenient + if( info == 0 .and. min( p, m-p ) < min( q, m-q ) ) then + if( colmajor ) then + transt = 'T' + else + transt = 'N' + end if + if( defaultsigns ) then + signst = 'O' + else + signst = 'D' + end if + call stdlib_zuncsd( jobv1t, jobv2t, jobu1, jobu2, transt, signst, m,q, p, x11, & + ldx11, x21, ldx21, x12, ldx12, x22,ldx22, theta, v1t, ldv1t, v2t, ldv2t, u1, ldu1,& + u2, ldu2, work, lwork, rwork, lrwork, iwork,info ) + return + end if + ! work with permutation [ 0 i; i 0 ] * x * [ 0 i; i 0 ] if + ! convenient + if( info == 0 .and. m-q < q ) then + if( defaultsigns ) then + signst = 'O' + else + signst = 'D' + end if + call stdlib_zuncsd( jobu2, jobu1, jobv2t, jobv1t, trans, signst, m,m-p, m-q, x22, & + ldx22, x21, ldx21, x12, ldx12, x11,ldx11, theta, u2, ldu2, u1, ldu1, v2t, ldv2t, & + v1t,ldv1t, work, lwork, rwork, lrwork, iwork, info ) + return + end if + ! compute workspace + if( info == 0 ) then + ! real workspace + iphi = 2 + ib11d = iphi + max( 1, q - 1 ) + ib11e = ib11d + max( 1, q ) + ib12d = ib11e + max( 1, q - 1 ) + ib12e = ib12d + max( 1, q ) + ib21d = ib12e + max( 1, q - 1 ) + ib21e = ib21d + max( 1, q ) + ib22d = ib21e + max( 1, q - 1 ) + ib22e = ib22d + max( 1, q ) + ibbcsd = ib22e + max( 1, q - 1 ) + call stdlib_zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, theta, u1, & + ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, theta, theta, theta, theta, theta,theta, & + theta, theta, rwork, -1, childinfo ) + lbbcsdworkopt = int( rwork(1),KIND=ilp) + lbbcsdworkmin = lbbcsdworkopt + lrworkopt = ibbcsd + lbbcsdworkopt - 1 + lrworkmin = ibbcsd + lbbcsdworkmin - 1 + rwork(1) = lrworkopt + ! complex workspace + itaup1 = 2 + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m - p ) + itauq2 = itauq1 + max( 1, q ) + iorgqr = itauq2 + max( 1, m - q ) + call stdlib_zungqr( m-q, m-q, m-q, u1, max(1,m-q), u1, work, -1,childinfo ) + lorgqrworkopt = int( work(1),KIND=ilp) + lorgqrworkmin = max( 1, m - q ) + iorglq = itauq2 + max( 1, m - q ) + call stdlib_zunglq( m-q, m-q, m-q, u1, max(1,m-q), u1, work, -1,childinfo ) + lorglqworkopt = int( work(1),KIND=ilp) + lorglqworkmin = max( 1, m - q ) + iorbdb = itauq2 + max( 1, m - q ) + call stdlib_zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & + ldx22, theta, theta, u1, u2,v1t, v2t, work, -1, childinfo ) + lorbdbworkopt = int( work(1),KIND=ilp) + lorbdbworkmin = lorbdbworkopt + lworkopt = max( iorgqr + lorgqrworkopt, iorglq + lorglqworkopt,iorbdb + & + lorbdbworkopt ) - 1 + lworkmin = max( iorgqr + lorgqrworkmin, iorglq + lorglqworkmin,iorbdb + & + lorbdbworkmin ) - 1 + work(1) = max(lworkopt,lworkmin) + if( lwork < lworkmin.and. .not. ( lquery .or. lrquery ) ) then + info = -22 + else if( lrwork < lrworkmin.and. .not. ( lquery .or. lrquery ) ) then + info = -24 + else + lorgqrwork = lwork - iorgqr + 1 + lorglqwork = lwork - iorglq + 1 + lorbdbwork = lwork - iorbdb + 1 + lbbcsdwork = lrwork - ibbcsd + 1 + end if + end if + ! abort if any illegal arguments + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNCSD', -info ) + return + else if( lquery .or. lrquery ) then + return + end if + ! transform to bidiagonal block form + call stdlib_zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12, x21,ldx21, x22, & + ldx22, theta, rwork(iphi), work(itaup1),work(itaup2), work(itauq1), work(itauq2),work(& + iorbdb), lorbdbwork, childinfo ) + ! accumulate householder reflectors + if( colmajor ) then + if( wantu1 .and. p > 0 ) then + call stdlib_zlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_zungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqrwork, & + info) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_zungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqrwork,& + info ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zlacpy( 'U', q-1, q-1, x11(1,2), ldx11, v1t(2,2),ldv1t ) + v1t(1, 1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_zunglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglqwork, info ) + end if + if( wantv2t .and. m-q > 0 ) then + call stdlib_zlacpy( 'U', p, m-q, x12, ldx12, v2t, ldv2t ) + if( m-p > q) then + call stdlib_zlacpy( 'U', m-p-q, m-p-q, x22(q+1,p+1), ldx22,v2t(p+1,p+1), & + ldv2t ) + end if + if( m > q ) then + call stdlib_zunglq( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorglq), & + lorglqwork, info ) + end if + end if + else + if( wantu1 .and. p > 0 ) then + call stdlib_zlacpy( 'U', q, p, x11, ldx11, u1, ldu1 ) + call stdlib_zunglq( p, p, q, u1, ldu1, work(itaup1), work(iorglq),lorglqwork, & + info) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zlacpy( 'U', q, m-p, x21, ldx21, u2, ldu2 ) + call stdlib_zunglq( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorglq), lorglqwork,& + info ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zlacpy( 'L', q-1, q-1, x11(2,1), ldx11, v1t(2,2),ldv1t ) + v1t(1, 1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_zungqr( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorgqr), & + lorgqrwork, info ) + end if + if( wantv2t .and. m-q > 0 ) then + p1 = min( p+1, m ) + q1 = min( q+1, m ) + call stdlib_zlacpy( 'L', m-q, p, x12, ldx12, v2t, ldv2t ) + if( m > p+q ) then + call stdlib_zlacpy( 'L', m-p-q, m-p-q, x22(p1,q1), ldx22,v2t(p+1,p+1), ldv2t ) + + end if + call stdlib_zungqr( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorgqr), & + lorgqrwork, info ) + end if + end if + ! compute the csd of the matrix in bidiagonal-block form + call stdlib_zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q, theta,rwork(iphi), & + u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, rwork(ib11d), rwork(ib11e), rwork(ib12d),& + rwork(ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),& + lbbcsdwork, info ) + ! permute rows and columns to place identity submatrices in top- + ! left corner of (1,1)-block and/or bottom-right corner of (1,2)- + ! block and/or bottom-right corner of (2,1)-block and/or top-left + ! corner of (2,2)-block + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + if( colmajor ) then + call stdlib_zlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + else + call stdlib_zlapmr( .false., m-p, m-p, u2, ldu2, iwork ) + end if + end if + if( m > 0 .and. wantv2t ) then + do i = 1, p + iwork(i) = m - p - q + i + end do + do i = p + 1, m - q + iwork(i) = i - p + end do + if( .not. colmajor ) then + call stdlib_zlapmt( .false., m-q, m-q, v2t, ldv2t, iwork ) + else + call stdlib_zlapmr( .false., m-q, m-q, v2t, ldv2t, iwork ) + end if + end if + return + ! end stdlib_zuncsd + end subroutine stdlib_zuncsd + + !> ZUNGHR: generates a complex unitary matrix Q which is defined as the + !> product of IHI-ILO elementary reflectors of order N, as returned by + !> ZGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + pure subroutine stdlib_zunghr( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lwkopt, nb, nh + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nh = ihi - ilo + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda0 ) then + ! generate q(ilo+1:ihi,ilo+1:ihi) + call stdlib_zungqr( nh, nh, nh, a( ilo+1, ilo+1 ), lda, tau( ilo ),work, lwork, & + iinfo ) + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunghr + + !> ZUNGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors of order N, as returned by + !> ZHETRD: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_zungtr( uplo, n, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: i, iinfo, j, lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 ) then + ! generate q(2:n,2:n) + call stdlib_zungqr( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zungtr + + !> ZUNHR_COL: takes an M-by-N complex matrix Q_in with orthonormal columns + !> as input, stored in A, and performs Householder Reconstruction (HR), + !> i.e. reconstructs Householder vectors V(i) implicitly representing + !> another M-by-N matrix Q_out, with the property that Q_in = Q_out*S, + !> where S is an N-by-N diagonal matrix with diagonal entries + !> equal to +1 or -1. The Householder vectors (columns V(i) of V) are + !> stored in A on output, and the diagonal entries of S are stored in D. + !> Block reflectors are also returned in T + !> (same output format as ZGEQRT). + + pure subroutine stdlib_zunhr_col( m, n, nb, a, lda, t, ldt, d, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: d(*), t(ldt,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. n>m ) then + info = -2 + else if( nb<1 ) then + info = -3 + else if( ldan ) then + call stdlib_ztrsm( 'R', 'U', 'N', 'N', m-n, n, cone, a, lda,a( n+1, 1 ), lda ) + + end if + ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n) + ! as a sequence of upper-triangular blocks with nb-size column + ! blocking. + ! loop over the column blocks of size nb of the array a(1:m,1:n) + ! and the array t(1:nb,1:n), jb is the column index of a column + ! block, jnb is the column block size at each step jb. + nplusone = n + 1 + do jb = 1, n, nb + ! (2-0) determine the column block size jnb. + jnb = min( nplusone-jb, nb ) + ! (2-1) copy the upper-triangular part of the current jnb-by-jnb + ! diagonal block u(jb) (of the n-by-n matrix u) stored + ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part + ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1) + ! column-by-column, total jnb*(jnb+1)/2 elements. + jbtemp1 = jb - 1 + do j = jb, jb+jnb-1 + call stdlib_zcopy( j-jbtemp1, a( jb, j ), 1, t( 1, j ), 1 ) + end do + ! (2-2) perform on the upper-triangular part of the current + ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored + ! in t(1:jnb,jb:jb+jnb-1) the following operation in place: + ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper- + ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication + ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb + ! diagonal block s(jb) of the n-by-n sign matrix s from the + ! right means changing the sign of each j-th column of the block + ! u(jb) according to the sign of the diagonal element of the block + ! s(jb), i.e. s(j,j) that is stored in the array element d(j). + do j = jb, jb+jnb-1 + if( d( j )==cone ) then + call stdlib_zscal( j-jbtemp1, -cone, t( 1, j ), 1 ) + end if + end do + ! (2-3) perform the triangular solve for the current block + ! matrix x(jb): + ! x(jb) * (a(jb)**t) = b(jb), where: + ! a(jb)**t is a jnb-by-jnb unit upper-triangular + ! coefficient block, and a(jb)=v1(jb), which + ! is a jnb-by-jnb unit lower-triangular block + ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1). + ! the n-by-n matrix v1 is the upper part + ! of the m-by-n lower-trapezoidal matrix v + ! stored in a(1:m,1:n); + ! b(jb) is a jnb-by-jnb upper-triangular right-hand + ! side block, b(jb) = (-1)*u(jb)*s(jb), and + ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1); + ! x(jb) is a jnb-by-jnb upper-triangular solution + ! block, x(jb) is the upper-triangular block + ! reflector t(jb), and x(jb) is stored + ! in t(1:jnb,jb:jb+jnb-1). + ! in other words, we perform the triangular solve for the + ! upper-triangular block t(jb): + ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb). + ! even though the blocks x(jb) and b(jb) are upper- + ! triangular, the routine stdlib_ztrsm will access all jnb**2 + ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore, + ! we need to set to zero the elements of the block + ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call + ! to stdlib_ztrsm. + ! (2-3a) set the elements to zero. + jbtemp2 = jb - 2 + do j = jb, jb+jnb-2 + do i = j-jbtemp2, nb + t( i, j ) = czero + end do + end do + ! (2-3b) perform the triangular solve. + call stdlib_ztrsm( 'R', 'L', 'C', 'U', jnb, jnb, cone,a( jb, jb ), lda, t( 1, jb ), & + ldt ) + end do + return + end subroutine stdlib_zunhr_col + + !> ZUNMHR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> IHI-ILO elementary reflectors, as returned by ZGEHRD: + !> Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + pure subroutine stdlib_zunmhr( side, trans, m, n, ilo, ihi, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(in) :: ihi, ilo, lda, ldc, lwork, m, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, nh, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + nh = ihi - ilo + left = stdlib_lsame( side, 'L' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 .or. ilo>max( 1, nq ) ) then + info = -5 + else if( ihinq ) then + info = -6 + else if( lda ZUNMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by ZHETRD: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_zunmtr( side, uplo, trans, m, n, a, lda, tau, c, ldc,work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, lquery, upper + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + ! nq is the order of q and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'C' ) )& + then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda ZUPGTR: generates a complex unitary matrix Q which is defined as the + !> product of n-1 elementary reflectors H(i) of order n, as returned by + !> ZHPTRD using packed storage: + !> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), + !> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). + + pure subroutine stdlib_zupgtr( uplo, n, ap, tau, q, ldq, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, n + ! Array Arguments + complex(dp), intent(in) :: ap(*), tau(*) + complex(dp), intent(out) :: q(ldq,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, iinfo, ij, j + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input arguments + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ldq1 ) then + ! generate q(2:n,2:n) + call stdlib_zung2r( n-1, n-1, n-1, q( 2, 2 ), ldq, tau, work,iinfo ) + end if + end if + return + end subroutine stdlib_zupgtr + + !> ZUPMTR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix of order nq, with nq = m if + !> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + !> nq-1 elementary reflectors, as returned by ZHPTRD using packed + !> storage: + !> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + !> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + pure subroutine stdlib_zupmtr( side, uplo, trans, m, n, ap, tau, c, ldc, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldc, m, n + ! Array Arguments + complex(dp), intent(inout) :: ap(*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: forwrd, left, notran, upper + integer(ilp) :: i, i1, i2, i3, ic, ii, jc, mi, ni, nq + complex(dp) :: aii, taui + ! Intrinsic Functions + intrinsic :: conjg,max + ! Executable Statements + ! test the input arguments + info = 0 + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + upper = stdlib_lsame( uplo, 'U' ) + ! nq is the order of q + if( left ) then + nq = m + else + nq = n + end if + if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldc ZCPOSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> ZCPOSV first attempts to factorize the matrix in COMPLEX and use this + !> factorization within an iterative refinement procedure to produce a + !> solution with COMPLEX*16 normwise backward error quality (see below). + !> If the approach fails the method switches to a COMPLEX*16 + !> factorization and solve. + !> The iterative refinement is not going to be a winning strategy if + !> the ratio COMPLEX performance over COMPLEX*16 performance is too + !> small. A reasonable strategy should take the number of right-hand + !> sides and the size of the matrix into account. This might be done + !> with a call to ILAENV in the future. Up to now, we always try + !> iterative refinement. + !> The iterative refinement process is stopped if + !> ITER > ITERMAX + !> or for all the RHS we have: + !> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX + !> where + !> o ITER is the number of the current iteration in the iterative + !> refinement process + !> o RNRM is the infinity-norm of the residual + !> o XNRM is the infinity-norm of the solution + !> o ANRM is the infinity-operator-norm of the matrix A + !> o EPS is the machine epsilon returned by DLAMCH('Epsilon') + !> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 + !> respectively. + + subroutine stdlib_zcposv( uplo, n, nrhs, a, lda, b, ldb, x, ldx, work,swork, rwork, iter, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info, iter + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(sp), intent(out) :: swork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: b(ldb,*) + complex(dp), intent(out) :: work(n,*), x(ldx,*) + ! ===================================================================== + ! Parameters + logical(lk), parameter :: doitref = .true. + integer(ilp), parameter :: itermax = 30 + real(dp), parameter :: bwdmax = 1.0e+00_dp + + + + + ! Local Scalars + integer(ilp) :: i, iiter, ptsa, ptsx + real(dp) :: anrm, cte, eps, rnrm, xnrm + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + iter = 0 + ! test the input parameters. + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldaxnrm*cte )go to 10 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion. we are good to exit. + iter = 0 + return + 10 continue + loop_30: do iiter = 1, itermax + ! convert r (in work) from double precision to single precision + ! and store the result in sx. + call stdlib_zlag2c( n, nrhs, work, n, swork( ptsx ), n, info ) + if( info/=0 ) then + iter = -2 + go to 40 + end if + ! solve the system sa*sx = sr. + call stdlib_cpotrs( uplo, n, nrhs, swork( ptsa ), n, swork( ptsx ), n,info ) + ! convert sx back to double precision and update the current + ! iterate. + call stdlib_clag2z( n, nrhs, swork( ptsx ), n, work, n, info ) + do i = 1, nrhs + call stdlib_zaxpy( n, cone, work( 1, i ), 1, x( 1, i ), 1 ) + end do + ! compute r = b - ax (r is work). + call stdlib_zlacpy( 'ALL', n, nrhs, b, ldb, work, n ) + call stdlib_zhemm( 'L', uplo, n, nrhs, cnegone, a, lda, x, ldx, cone,work, n ) + + ! check whether the nrhs normwise backward errors satisfy the + ! stopping criterion. if yes, set iter=iiter>0 and return. + do i = 1, nrhs + xnrm = cabs1( x( stdlib_izamax( n, x( 1, i ), 1 ), i ) ) + rnrm = cabs1( work( stdlib_izamax( n, work( 1, i ), 1 ), i ) ) + if( rnrm>xnrm*cte )go to 20 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion, we are good to exit. + iter = iiter + return + 20 continue + end do loop_30 + ! if we are at this place of the code, this is because we have + ! performed iter=itermax iterations and never satisfied the + ! stopping criterion, set up the iter flag accordingly and follow + ! up on double precision routine. + iter = -itermax - 1 + 40 continue + ! single-precision iterative refinement failed to converge to a + ! satisfactory solution, so we resort to double precision. + call stdlib_zpotrf( uplo, n, a, lda, info ) + if( info/=0 )return + call stdlib_zlacpy( 'ALL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zpotrs( uplo, n, nrhs, a, lda, x, ldx, info ) + return + end subroutine stdlib_zcposv + + !> ZGBBRD: reduces a complex general m-by-n band matrix A to real upper + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> The routine computes B, and optionally forms Q or P**H, or computes + !> Q**H*C for a given matrix C. + + pure subroutine stdlib_zgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & + ldc, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc + ! Array Arguments + real(dp), intent(out) :: d(*), e(*), rwork(*) + complex(dp), intent(inout) :: ab(ldab,*), c(ldc,*) + complex(dp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: wantb, wantc, wantpt, wantq + integer(ilp) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,& + mu0, nr, nrt + real(dp) :: abst, rc + complex(dp) :: ra, rb, rs, t + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min + ! Executable Statements + ! test the input parameters + wantb = stdlib_lsame( vect, 'B' ) + wantq = stdlib_lsame( vect, 'Q' ) .or. wantb + wantpt = stdlib_lsame( vect, 'P' ) .or. wantb + wantc = ncc>0 + klu1 = kl + ku + 1 + info = 0 + if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ncc<0 ) then + info = -4 + else if( kl<0 ) then + info = -5 + else if( ku<0 ) then + info = -6 + else if( ldab1 ) then + ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce + ! first to lower bidiagonal form and then transform to upper + ! bidiagonal + if( ku>0 ) then + ml0 = 1 + mu0 = 2 + else + ml0 = 2 + mu0 = 1 + end if + ! wherever possible, plane rotations are generated and applied in + ! vector operations of length nr over the index set j1:j2:klu1. + ! the complex sines of the plane rotations are stored in work, + ! and the real cosines in rwork. + klm = min( m-1, kl ) + kun = min( n-1, ku ) + kb = klm + kun + kb1 = kb + 1 + inca = kb1*ldab + nr = 0 + j1 = klm + 2 + j2 = 1 - kun + loop_90: do i = 1, minmn + ! reduce i-th column and i-th row of matrix to bidiagonal form + ml = klm + 1 + mu = kun + 1 + loop_80: do kk = 1, kb + j1 = j1 + kb + j2 = j2 + kb + ! generate plane rotations to annihilate nonzero elements + ! which have been created below the band + if( nr>0 )call stdlib_zlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & + rwork( j1 ), kb1 ) + ! apply plane rotations from the left + do l = 1, kb + if( j2-klm+l-1>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_zlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & + klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 ) + end do + if( ml>ml0 ) then + if( ml<=m-i+1 ) then + ! generate plane rotation to annihilate a(i+ml-1,i) + ! within the band, and apply rotation from the left + call stdlib_zlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), & + work( i+ml-1 ), ra ) + ab( ku+ml-1, i ) = ra + if( in ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j-1,j+ku) above the band + ! and store it in work(n+1:2*n) + work( j+kun ) = work( j )*ab( 1, j+kun ) + ab( 1, j+kun ) = rwork( j )*ab( 1, j+kun ) + end do + ! generate plane rotations to annihilate nonzero elements + ! which have been generated above the band + if( nr>0 )call stdlib_zlargv( nr, ab( 1, j1+kun-1 ), inca,work( j1+kun ), kb1,& + rwork( j1+kun ),kb1 ) + ! apply plane rotations from the right + do l = 1, kb + if( j2+l-1>m ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_zlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& + kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 ) + end do + if( ml==ml0 .and. mu>mu0 ) then + if( mu<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+mu-1) + ! within the band, and apply rotation from the right + call stdlib_zlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( & + i+mu-1 ), work( i+mu-1 ), ra ) + ab( ku-mu+3, i+mu-2 ) = ra + call stdlib_zrot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1,ab( ku-& + mu+3, i+mu-1 ), 1,rwork( i+mu-1 ), work( i+mu-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kb1 + end if + if( wantpt ) then + ! accumulate product of plane rotations in p**h + do j = j1, j2, kb1 + call stdlib_zrot( n, pt( j+kun-1, 1 ), ldpt,pt( j+kun, 1 ), ldpt, rwork(& + j+kun ),conjg( work( j+kun ) ) ) + end do + end if + if( j2+kb>m ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kb1 + end if + do j = j1, j2, kb1 + ! create nonzero element a(j+kl+ku,j+ku-1) below the + ! band and store it in work(1:n) + work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) + ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun ) + end do + if( ml>ml0 ) then + ml = ml - 1 + else + mu = mu - 1 + end if + end do loop_80 + end do loop_90 + end if + if( ku==0 .and. kl>0 ) then + ! a has been reduced to complex lower bidiagonal form + ! transform lower bidiagonal form to upper bidiagonal by applying + ! plane rotations from the left, overwriting superdiagonal + ! elements on subdiagonal elements + do i = 1, min( m-1, n ) + call stdlib_zlartg( ab( 1, i ), ab( 2, i ), rc, rs, ra ) + ab( 1, i ) = ra + if( i0 .and. m1 ) then + rb = -conjg( rs )*ab( ku, i ) + ab( ku, i ) = rc*ab( ku, i ) + end if + if( wantpt )call stdlib_zrot( n, pt( i, 1 ), ldpt, pt( m+1, 1 ), ldpt,rc, & + conjg( rs ) ) + end do + end if + end if + ! make diagonal and superdiagonal elements real, storing them in d + ! and e + t = ab( ku+1, 1 ) + loop_120: do i = 1, minmn + abst = abs( t ) + d( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( wantq )call stdlib_zscal( m, t, q( 1, i ), 1 ) + if( wantc )call stdlib_zscal( ncc, conjg( t ), c( i, 1 ), ldc ) + if( i ZGBRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is banded, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_zgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb,ipiv, b, ldb, x, & + ldx, ferr, berr, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, k, kase, kk, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kl<0 ) then + info = -3 + else if( ku<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldabsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zgbtrs( trans, n, kl, ku, 1, afb, ldafb, ipiv, work, n,info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_zgbtrs( transt, n, kl, ku, 1, afb, ldafb, ipiv,work, n, info ) + + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zgbtrs( transn, n, kl, ku, 1, afb, ldafb, ipiv,work, n, info ) + + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zgbrfs + + !> ZGBSV: computes the solution to a complex system of linear equations + !> A * X = B, where A is a band matrix of order N with KL subdiagonals + !> and KU superdiagonals, and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as A = L * U, where L is a product of permutation + !> and unit lower triangular matrices with KL subdiagonals, and U is + !> upper triangular with KL+KU superdiagonals. The factored form of A + !> is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_zgbsv( n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( kl<0 ) then + info = -2 + else if( ku<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab<2*kl+ku+1 ) then + info = -6 + else if( ldb ZGBSVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a band matrix of order N with KL subdiagonals and KU + !> superdiagonals, and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zgbsvx( fact, trans, n, kl, ku, nrhs, ab, ldab, afb,ldafb, ipiv, equed, r, & + c, b, ldb, x, ldx,rcond, ferr, berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kl, ku, ldab, ldafb, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(inout) :: c(*), r(*) + complex(dp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + ! moved setting of info = n+1 so info does not subsequently get + ! overwritten. sven, 17 mar 05. + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j, j1, j2 + real(dp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: abs,max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kl<0 ) then + info = -4 + else if( ku<0 ) then + info = -5 + else if( nrhs<0 ) then + info = -6 + else if( ldab0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -14 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + anorm = zero + do j = 1, info + do i = max( ku+2-j, 1 ), min( n+ku+1-j, kl+ku+1 ) + anorm = max( anorm, abs( ab( i, j ) ) ) + end do + end do + rpvgrw = stdlib_zlantb( 'M', 'U', 'N', info, min( info-1, kl+ku ),afb( max( 1, & + kl+ku+2-info ), 1 ), ldafb,rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = anorm / rpvgrw + end if + rwork( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_zlangb( norm, n, kl, ku, ab, ldab, rwork ) + rpvgrw = stdlib_zlantb( 'M', 'U', 'N', n, kl+ku, afb, ldafb, rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_zlangb( 'M', n, kl, ku, ab, ldab, rwork ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_zgbcon( norm, n, kl, ku, afb, ldafb, ipiv, anorm, rcond,work, rwork, info ) + + ! compute the solution matrix x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zgbtrs( trans, n, kl, ku, nrhs, afb, ldafb, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_zgbrfs( trans, n, kl, ku, nrhs, ab, ldab, afb, ldafb, ipiv,b, ldb, x, ldx, & + ferr, berr, work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZGEBRD: reduces a general complex M-by-N matrix A to upper or lower + !> bidiagonal form B by a unitary transformation: Q**H * A * P = B. + !> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + pure subroutine stdlib_zgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: taup(*), tauq(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input parameters + info = 0 + nb = max( 1, stdlib_ilaenv( 1, 'ZGEBRD', ' ', m, n, -1, -1 ) ) + lwkopt = ( m+n )*nb + work( 1 ) = real( lwkopt,KIND=dp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda1 .and. nb=( m+n )*nbmin ) then + nb = lwork / ( m+n ) + else + nb = 1 + nx = minmn + end if + end if + end if + else + nx = minmn + end if + do i = 1, minmn - nx, nb + ! reduce rows and columns i:i+ib-1 to bidiagonal form and return + ! the matrices x and y which are needed to update the unreduced + ! part of the matrix + call stdlib_zlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & + taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) + ! update the trailing submatrix a(i+ib:m,i+ib:n), using + ! an update of the form a := a - v*y**h - x*u**h + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -& + cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda ) + + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, & + work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda ) + ! copy diagonal and off-diagonal elements of b back into a + if( m>=n ) then + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j, j+1 ) = e( j ) + end do + else + do j = i, i + nb - 1 + a( j, j ) = d( j ) + a( j+1, j ) = e( j ) + end do + end if + end do + ! use unblocked code to reduce the remainder of the matrix + call stdlib_zgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & + work, iinfo ) + work( 1 ) = ws + return + end subroutine stdlib_zgebrd + + !> ZGEHRD: reduces a complex general matrix A to upper Hessenberg form H by + !> an unitary similarity transformation: Q**H * A * Q = H . + + pure subroutine stdlib_zgehrd( n, ilo, ihi, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, lda, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: nbmax = 64 + integer(ilp), parameter :: ldt = nbmax+1 + integer(ilp), parameter :: tsize = ldt*nbmax + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ib, iinfo, iwt, j, ldwork, lwkopt, nb, nbmin, nh, nx + complex(dp) :: ei + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters + info = 0 + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -2 + else if( ihin ) then + info = -3 + else if( lda1 .and. nb=(n*nbmin + tsize) ) then + nb = (lwork-tsize) / n + else + nb = 1 + end if + end if + end if + end if + ldwork = n + if( nb=nh ) then + ! use unblocked code below + i = ilo + else + ! use blocked code + iwt = 1 + n*nb + do i = ilo, ihi - 1 - nx, nb + ib = min( nb, ihi-i ) + ! reduce columns i:i+ib-1 to hessenberg form, returning the + ! matrices v and t of the block reflector h = i - v*t*v**h + ! which performs the reduction, and also the matrix y = a*v*t + call stdlib_zlahr2( ihi, i, ib, a( 1, i ), lda, tau( i ),work( iwt ), ldt, work, & + ldwork ) + ! apply the block reflector h to a(1:ihi,i+ib:ihi) from the + ! right, computing a := a - y * v**h. v(i+ib,ib-1) must be set + ! to 1 + ei = a( i+ib, i+ib-1 ) + a( i+ib, i+ib-1 ) = cone + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',ihi, ihi-i-ib+1,ib, -& + cone, work, ldwork, a( i+ib, i ), lda, cone,a( 1, i+ib ), lda ) + a( i+ib, i+ib-1 ) = ei + ! apply the block reflector h to a(1:i,i+1:i+ib-1) from the + ! right + call stdlib_ztrmm( 'RIGHT', 'LOWER', 'CONJUGATE TRANSPOSE','UNIT', i, ib-1,cone, & + a( i+1, i ), lda, work, ldwork ) + do j = 0, ib-2 + call stdlib_zaxpy( i, -cone, work( ldwork*j+1 ), 1,a( 1, i+j+1 ), 1 ) + end do + ! apply the block reflector h to a(i+1:ihi,i+ib:n) from the + ! left + call stdlib_zlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE',ihi-i, & + n-i-ib+1, ib, a( i+1, i ), lda,work( iwt ), ldt, a( i+1, i+ib ), lda,work, & + ldwork ) + end do + end if + ! use unblocked code to reduce the rest of the matrix + call stdlib_zgehd2( n, i, ihi, a, lda, tau, work, iinfo ) + work( 1 ) = lwkopt + return + end subroutine stdlib_zgehrd + + !> ZGELQT: computes a blocked LQ factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_zgelqt( m, n, mb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, mb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + integer(ilp) :: i, ib, iinfo, k + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( mb<1 .or. (mb>min(m,n) .and. min(m,n)>0 ))then + info = -3 + else if( lda ZGELS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, or its conjugate-transpose, using a QR + !> or LQ factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an underdetermined system A**H * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**H * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_zgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, tpsd + integer(ilp) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize + real(dp) :: anrm, bignum, bnrm, smlnum + ! Local Arrays + real(dp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: real,max,min + ! Executable Statements + ! test the input arguments. + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + nb = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMQR', 'LN', m, nrhs, n,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMQR', 'LC', m, nrhs, n,-1 ) ) + end if + else + nb = stdlib_ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 ) + if( tpsd ) then + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMLQ', 'LC', n, nrhs, m,-1 ) ) + else + nb = max( nb, stdlib_ilaenv( 1, 'ZUNMLQ', 'LN', n, nrhs, m,-1 ) ) + end if + end if + wsize = max( 1, mn+max( mn, nrhs )*nb ) + work( 1 ) = real( wsize,KIND=dp) + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZGELS ', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( min( m, n, nrhs )==0 ) then + call stdlib_zlaset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb ) + return + end if + ! get machine parameters + smlnum = stdlib_dlamch( 'S' ) / stdlib_dlamch( 'P' ) + bignum = one / smlnum + call stdlib_dlabad( smlnum, bignum ) + ! scale a, b if max element outside range [smlnum,bignum] + anrm = stdlib_zlange( 'M', m, n, a, lda, rwork ) + iascl = 0 + if( anrm>zero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + go to 50 + end if + brow = m + if( tpsd )brow = n + bnrm = stdlib_zlange( 'M', brow, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if( m>=n ) then + ! compute qr factorization of a + call stdlib_zgeqrf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least n, optimally n*nb + if( .not.tpsd ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) + call stdlib_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1 ), & + b, ldb, work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + scllen = n + else + ! underdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs) + call stdlib_ztrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,& + ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = zero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = czero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_zunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_zgelqf( m, n, a, lda, work( 1 ), work( mn+1 ), lwork-mn,info ) + ! workspace at least m, optimally m*nb. + if( .not.tpsd ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_ztrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & + info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = czero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs) + call stdlib_zunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1 ), & + b, ldb, work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**h * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_zunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1 ), b, ldb,& + work( mn+1 ), lwork-mn,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs) + call stdlib_ztrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, & + b, ldb, info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_zlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_zlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( wsize,KIND=dp) + return + end subroutine stdlib_zgels + + !> ZGEQP3: computes a QR factorization with column pivoting of a + !> matrix A: A*P = Q*R using Level 3 BLAS. + + pure subroutine stdlib_zgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, m, n + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: tau(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: inb = 1 + integer(ilp), parameter :: inbmin = 2 + integer(ilp), parameter :: ixover = 3 + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, & + sminmn, sn, topbmn + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + ! ==================== + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 ) then + na = min( m, nfxd ) + ! cc call stdlib_zgeqr2( m, na, a, lda, tau, work, info ) + call stdlib_zgeqrf( m, na, a, lda, tau, work, lwork, info ) + iws = max( iws, int( work( 1 ),KIND=ilp) ) + if( na1 ) .and. ( nb=nbmin ) .and. ( nb ZGEQRT: computes a blocked QR factorization of a complex M-by-N matrix A + !> using the compact WY representation of Q. + + pure subroutine stdlib_zgeqrt( m, n, nb, a, lda, t, ldt, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, m, n, nb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk), parameter :: use_recursive_qr = .true. + integer(ilp) :: i, ib, iinfo, k + + ! Executable Statements + ! test the input arguments + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nb<1 .or. ( nb>min(m,n) .and. min(m,n)>0 ) )then + info = -3 + else if( lda ZGERFS: improves the computed solution to a system of linear + !> equations and provides error bounds and backward error estimates for + !> the solution. + + pure subroutine stdlib_zgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zgetrs( trans, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_zgetrs( transt, n, 1, af, ldaf, ipiv, work, n,info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zgetrs( transn, n, 1, af, ldaf, ipiv, work, n,info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zgerfs + + !> ZGETRF: computes an LU factorization of a general M-by-N matrix A + !> using partial pivoting with row interchanges. + !> The factorization has the form + !> A = P * L * U + !> where P is a permutation matrix, L is lower triangular with unit + !> diagonal elements (lower trapezoidal if m > n), and U is upper + !> triangular (upper trapezoidal if m < n). + !> This is the right-looking Level 3 BLAS version of the algorithm. + + pure subroutine stdlib_zgetrf( m, n, a, lda, ipiv, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: i, iinfo, j, jb, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=min( m, n ) ) then + ! use unblocked code. + call stdlib_zgetrf2( m, n, a, lda, ipiv, info ) + else + ! use blocked code. + do j = 1, min( m, n ), nb + jb = min( min( m, n )-j+1, nb ) + ! factor diagonal and subdiagonal blocks and test for exact + ! singularity. + call stdlib_zgetrf2( m-j+1, jb, a( j, j ), lda, ipiv( j ), iinfo ) + ! adjust info and the pivot indices. + if( info==0 .and. iinfo>0 )info = iinfo + j - 1 + do i = j, min( m, j+jb-1 ) + ipiv( i ) = j - 1 + ipiv( i ) + end do + ! apply interchanges to columns 1:j-1. + call stdlib_zlaswp( j-1, a, lda, j, j+jb-1, ipiv, 1 ) + if( j+jb<=n ) then + ! apply interchanges to columns j+jb:n. + call stdlib_zlaswp( n-j-jb+1, a( 1, j+jb ), lda, j, j+jb-1,ipiv, 1 ) + ! compute block row of u. + call stdlib_ztrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,& + a( j, j ), lda, a( j, j+jb ),lda ) + if( j+jb<=m ) then + ! update trailing submatrix. + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -& + cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda ) + + end if + end if + end do + end if + return + end subroutine stdlib_zgetrf + + !> ZGGGLM: solves a general Gauss-Markov linear model (GLM) problem: + !> minimize || y ||_2 subject to d = A*x + B*y + !> x + !> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a + !> given N-vector. It is assumed that M <= N <= M+P, and + !> rank(A) = M and rank( A B ) = N. + !> Under these assumptions, the constrained equation is always + !> consistent, and there is a unique solution x and a minimal 2-norm + !> solution y, which is obtained using a generalized QR factorization + !> of the matrices (A, B) given by + !> A = Q*(R), B = Q*T*Z. + !> (0) + !> In particular, if matrix B is square nonsingular, then the problem + !> GLM is equivalent to the following weighted linear least squares + !> problem + !> minimize || inv(B)*(d-A*x) ||_2 + !> x + !> where inv(B) denotes the inverse of B. + + pure subroutine stdlib_zggglm( n, m, p, a, lda, b, ldb, d, x, y, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), d(*) + complex(dp), intent(out) :: work(*), x(*), y(*) + ! =================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, lopt, lwkmin, lwkopt, nb, nb1, nb2, nb3, nb4, np + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + np = min( n, p ) + lquery = ( lwork==-1 ) + if( n<0 ) then + info = -1 + else if( m<0 .or. m>n ) then + info = -2 + else if( p<0 .or. pm ) then + call stdlib_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', n-m, 1,b( m+1, m+p-n+1 ), & + ldb, d( m+1 ), n-m, info ) + if( info>0 ) then + info = 1 + return + end if + call stdlib_zcopy( n-m, d( m+1 ), 1, y( m+p-n+1 ), 1 ) + end if + ! set y1 = 0 + do i = 1, m + p - n + y( i ) = czero + end do + ! update d1 = d1 - t12*y2 + call stdlib_zgemv( 'NO TRANSPOSE', m, n-m, -cone, b( 1, m+p-n+1 ), ldb,y( m+p-n+1 ), 1,& + cone, d, 1 ) + ! solve triangular system: r11*x = d1 + if( m>0 ) then + call stdlib_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', m, 1, a, lda,d, m, info ) + + if( info>0 ) then + info = 2 + return + end if + ! copy d to x + call stdlib_zcopy( m, d, 1, x, 1 ) + end if + ! backward transformation y = z**h *y + call stdlib_zunmrq( 'LEFT', 'CONJUGATE TRANSPOSE', p, 1, np,b( max( 1, n-p+1 ), 1 ), & + ldb, work( m+1 ), y,max( 1, p ), work( m+np+1 ), lwork-m-np, info ) + work( 1 ) = m + np + max( lopt, int( work( m+np+1 ),KIND=ilp) ) + return + end subroutine stdlib_zggglm + + !> ZGGHD3: reduces a pair of complex matrices (A,B) to generalized upper + !> Hessenberg form using unitary transformations, where A is a + !> general matrix and B is upper triangular. The form of the + !> generalized eigenvalue problem is + !> A*x = lambda*B*x, + !> and B is typically made upper triangular by computing its QR + !> factorization and moving the unitary matrix Q to the left side + !> of the equation. + !> This subroutine simultaneously reduces A to a Hessenberg matrix H: + !> Q**H*A*Z = H + !> and transforms B to another upper triangular matrix T: + !> Q**H*B*Z = T + !> in order to reduce the problem to its standard form + !> H*y = lambda*T*y + !> where y = Z**H*x. + !> The unitary matrices Q and Z are determined as products of Givens + !> rotations. They may either be formed explicitly, or they may be + !> postmultiplied into input matrices Q1 and Z1, so that + !> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H + !> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H + !> If Q1 is the unitary matrix from the QR factorization of B in the + !> original equation A*x = lambda*B*x, then ZGGHD3 reduces the original + !> problem to generalized Hessenberg form. + !> This is a blocked variant of CGGHRD, using matrix-matrix + !> multiplications for parts of the computation to enhance performance. + + pure subroutine stdlib_zgghd3( compq, compz, n, ilo, ihi, a, lda, b, ldb, q,ldq, z, ldz, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz + integer(ilp), intent(in) :: ihi, ilo, lda, ldb, ldq, ldz, n, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: blk22, initq, initz, lquery, wantq, wantz + character :: compq2, compz2 + integer(ilp) :: cola, i, ierr, j, j0, jcol, jj, jrow, k, kacc22, len, lwkopt, n2nb, nb,& + nblst, nbmin, nh, nnb, nx, ppw, ppwo, pw, top, topq + real(dp) :: c + complex(dp) :: c1, c2, ctemp, s, s1, s2, temp, temp1, temp2, temp3 + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,max + ! Executable Statements + ! decode and test the input parameters. + info = 0 + nb = stdlib_ilaenv( 1, 'ZGGHD3', ' ', n, ilo, ihi, -1 ) + lwkopt = max( 6*n*nb, 1 ) + work( 1 ) = cmplx( lwkopt,KIND=dp) + initq = stdlib_lsame( compq, 'I' ) + wantq = initq .or. stdlib_lsame( compq, 'V' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 ) then + info = -4 + else if( ihi>n .or. ihi1 )call stdlib_zlaset( 'LOWER', n-1, n-1, czero, czero, b(2, 1), ldb ) + ! quick return if possible + nh = ihi - ilo + 1 + if( nh<=1 ) then + work( 1 ) = cone + return + end if + ! determine the blocksize. + nbmin = stdlib_ilaenv( 2, 'ZGGHD3', ' ', n, ilo, ihi, -1 ) + if( nb>1 .and. nb=6*n*nbmin ) then + nb = lwork / ( 6*n ) + else + nb = 1 + end if + end if + end if + end if + if( nb=nh ) then + ! use unblocked code below + jcol = ilo + else + ! use blocked code + kacc22 = stdlib_ilaenv( 16, 'ZGGHD3', ' ', n, ilo, ihi, -1 ) + blk22 = kacc22==2 + do jcol = ilo, ihi-2, nb + nnb = min( nb, ihi-jcol-1 ) + ! initialize small unitary factors that will hold the + ! accumulated givens rotations in workspace. + ! n2nb denotes the number of 2*nnb-by-2*nnb factors + ! nblst denotes the (possibly smaller) order of the last + ! factor. + n2nb = ( ihi-jcol-1 ) / nnb - 1 + nblst = ihi - jcol - n2nb*nnb + call stdlib_zlaset( 'ALL', nblst, nblst, czero, cone, work, nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_zlaset( 'ALL', 2*nnb, 2*nnb, czero, cone,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! reduce columns jcol:jcol+nnb-1 of a to hessenberg form. + do j = jcol, jcol+nnb-1 + ! reduce jth column of a. store cosines and sines in jth + ! column of a and b, respectively. + do i = ihi, j+2, -1 + temp = a( i-1, j ) + call stdlib_zlartg( temp, a( i, j ), c, s, a( i-1, j ) ) + a( i, j ) = cmplx( c,KIND=dp) + b( i, j ) = s + end do + ! accumulate givens rotations into workspace array. + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + ctemp = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = ctemp*temp - s*work( jj ) + work( jj ) = conjg( s )*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + ctemp = a( i, j ) + s = b( i, j ) + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = ctemp*temp - s*work( jj ) + work( jj ) = conjg( s )*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + ! top denotes the number of top rows in a and b that will + ! not be updated during the next steps. + if( jcol<=2 ) then + top = 0 + else + top = jcol + end if + ! propagate transformations through b and replace stored + ! left sines/cosines by right sines/cosines. + do jj = n, j+1, -1 + ! update jjth column of b. + do i = min( jj+1, ihi ), j+2, -1 + ctemp = a( i, j ) + s = b( i, j ) + temp = b( i, jj ) + b( i, jj ) = ctemp*temp - conjg( s )*b( i-1, jj ) + b( i-1, jj ) = s*temp + ctemp*b( i-1, jj ) + end do + ! annihilate b( jj+1, jj ). + if( jj0 ) then + do i = jj, 1, -1 + c = real( a( j+1+i, j ),KIND=dp) + call stdlib_zrot( ihi-top, a( top+1, j+i+1 ), 1,a( top+1, j+i ), 1, c,-& + conjg( b( j+1+i, j ) ) ) + end do + end if + ! update (j+1)th column of a by transformations from left. + if ( j < jcol + nnb - 1 ) then + len = 1 + j - jcol + ! multiply with the trailing accumulated unitary + ! matrix, which takes the form + ! [ u11 u12 ] + ! u = [ ], + ! [ u21 u22 ] + ! where u21 is a len-by-len matrix and u12 is lower + ! triangular. + jrow = ihi - nblst + 1 + call stdlib_zgemv( 'CONJUGATE', nblst, len, cone, work,nblst, a( jrow, j+1 & + ), 1, czero,work( pw ), 1 ) + ppw = pw + len + do i = jrow, jrow+nblst-len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_ztrmv( 'LOWER', 'CONJUGATE', 'NON-UNIT',nblst-len, work( & + len*nblst + 1 ), nblst,work( pw+len ), 1 ) + call stdlib_zgemv( 'CONJUGATE', len, nblst-len, cone,work( (len+1)*nblst - & + len + 1 ), nblst,a( jrow+nblst-len, j+1 ), 1, cone,work( pw+len ), 1 ) + + ppw = pw + do i = jrow, jrow+nblst-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ! multiply with the other accumulated unitary + ! matrices, which take the form + ! [ u11 u12 0 ] + ! [ ] + ! u = [ u21 u22 0 ], + ! [ ] + ! [ 0 0 i ] + ! where i denotes the (nnb-len)-by-(nnb-len) identity + ! matrix, u21 is a len-by-len upper triangular matrix + ! and u12 is an nnb-by-nnb lower triangular matrix. + ppwo = 1 + nblst*nblst + j0 = jrow - nnb + do jrow = j0, jcol+1, -nnb + ppw = pw + len + do i = jrow, jrow+nnb-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + ppw = pw + do i = jrow+nnb, jrow+nnb+len-1 + work( ppw ) = a( i, j+1 ) + ppw = ppw + 1 + end do + call stdlib_ztrmv( 'UPPER', 'CONJUGATE', 'NON-UNIT', len,work( ppwo + & + nnb ), 2*nnb, work( pw ),1 ) + call stdlib_ztrmv( 'LOWER', 'CONJUGATE', 'NON-UNIT', nnb,work( ppwo + & + 2*len*nnb ),2*nnb, work( pw + len ), 1 ) + call stdlib_zgemv( 'CONJUGATE', nnb, len, cone,work( ppwo ), 2*nnb, a( & + jrow, j+1 ), 1,cone, work( pw ), 1 ) + call stdlib_zgemv( 'CONJUGATE', len, nnb, cone,work( ppwo + 2*len*nnb + & + nnb ), 2*nnb,a( jrow+nnb, j+1 ), 1, cone,work( pw+len ), 1 ) + ppw = pw + do i = jrow, jrow+len+nnb-1 + a( i, j+1 ) = work( ppw ) + ppw = ppw + 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + ! apply accumulated unitary matrices to a. + cola = n - jcol - nnb + 1 + j = ihi - nblst + 1 + call stdlib_zgemm( 'CONJUGATE', 'NO TRANSPOSE', nblst,cola, nblst, cone, work, & + nblst,a( j, jcol+nnb ), lda, czero, work( pw ),nblst ) + call stdlib_zlacpy( 'ALL', nblst, cola, work( pw ), nblst,a( j, jcol+nnb ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of + ! [ u11 u12 ] + ! u = [ ] + ! [ u21 u22 ], + ! where all blocks are nnb-by-nnb, u21 is upper + ! triangular and u12 is lower triangular. + call stdlib_zunm22( 'LEFT', 'CONJUGATE', 2*nnb, cola, nnb,nnb, work( ppwo )& + , 2*nnb,a( j, jcol+nnb ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_zgemm( 'CONJUGATE', 'NO TRANSPOSE', 2*nnb,cola, 2*nnb, cone, & + work( ppwo ), 2*nnb,a( j, jcol+nnb ), lda, czero, work( pw ),2*nnb ) + + call stdlib_zlacpy( 'ALL', 2*nnb, cola, work( pw ), 2*nnb,a( j, jcol+nnb ),& + lda ) + end if + ppwo = ppwo + 4*nnb*nnb + end do + ! apply accumulated unitary matrices to q. + if( wantq ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, cone, q( & + topq, j ), ldq,work, nblst, czero, work( pw ), nh ) + call stdlib_zlacpy( 'ALL', nh, nblst, work( pw ), nh,q( topq, j ), ldq ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_zunm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,q( topq, j ), ldq, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, & + cone, q( topq, j ), ldq,work( ppwo ), 2*nnb, czero, work( pw ),nh ) + + call stdlib_zlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,q( topq, j ), ldq ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! accumulate right givens rotations if required. + if ( wantz .or. top>0 ) then + ! initialize small unitary factors that will hold the + ! accumulated givens rotations in workspace. + call stdlib_zlaset( 'ALL', nblst, nblst, czero, cone, work,nblst ) + pw = nblst * nblst + 1 + do i = 1, n2nb + call stdlib_zlaset( 'ALL', 2*nnb, 2*nnb, czero, cone,work( pw ), 2*nnb ) + + pw = pw + 4*nnb*nnb + end do + ! accumulate givens rotations into workspace array. + do j = jcol, jcol+nnb-1 + ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1 + len = 2 + j - jcol + jrow = j + n2nb*nnb + 2 + do i = ihi, jrow, -1 + ctemp = a( i, j ) + a( i, j ) = czero + s = b( i, j ) + b( i, j ) = czero + do jj = ppw, ppw+len-1 + temp = work( jj + nblst ) + work( jj + nblst ) = ctemp*temp -conjg( s )*work( jj ) + work( jj ) = s*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - nblst - 1 + end do + ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb + j0 = jrow - nnb + do jrow = j0, j+2, -nnb + ppw = ppwo + len = 2 + j - jcol + do i = jrow+nnb-1, jrow, -1 + ctemp = a( i, j ) + a( i, j ) = czero + s = b( i, j ) + b( i, j ) = czero + do jj = ppw, ppw+len-1 + temp = work( jj + 2*nnb ) + work( jj + 2*nnb ) = ctemp*temp -conjg( s )*work( jj ) + work( jj ) = s*temp + ctemp*work( jj ) + end do + len = len + 1 + ppw = ppw - 2*nnb - 1 + end do + ppwo = ppwo + 4*nnb*nnb + end do + end do + else + call stdlib_zlaset( 'LOWER', ihi - jcol - 1, nnb, czero, czero,a( jcol + 2, & + jcol ), lda ) + call stdlib_zlaset( 'LOWER', ihi - jcol - 1, nnb, czero, czero,b( jcol + 2, & + jcol ), ldb ) + end if + ! apply accumulated unitary matrices to a and b. + if ( top>0 ) then + j = ihi - nblst + 1 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, cone, a( & + 1, j ), lda,work, nblst, czero, work( pw ), top ) + call stdlib_zlacpy( 'ALL', top, nblst, work( pw ), top,a( 1, j ), lda ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_zunm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,a( 1, j ), lda, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + cone, a( 1, j ), lda,work( ppwo ), 2*nnb, czero,work( pw ), top ) + + call stdlib_zlacpy( 'ALL', top, 2*nnb, work( pw ), top,a( 1, j ), lda ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + j = ihi - nblst + 1 + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,nblst, nblst, cone, b( & + 1, j ), ldb,work, nblst, czero, work( pw ), top ) + call stdlib_zlacpy( 'ALL', top, nblst, work( pw ), top,b( 1, j ), ldb ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_zunm22( 'RIGHT', 'NO TRANSPOSE', top, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,b( 1, j ), ldb, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', top,2*nnb, 2*nnb, & + cone, b( 1, j ), ldb,work( ppwo ), 2*nnb, czero,work( pw ), top ) + + call stdlib_zlacpy( 'ALL', top, 2*nnb, work( pw ), top,b( 1, j ), ldb ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + ! apply accumulated unitary matrices to z. + if( wantz ) then + j = ihi - nblst + 1 + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + else + topq = 1 + nh = n + end if + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,nblst, nblst, cone, z( & + topq, j ), ldz,work, nblst, czero, work( pw ), nh ) + call stdlib_zlacpy( 'ALL', nh, nblst, work( pw ), nh,z( topq, j ), ldz ) + + ppwo = nblst*nblst + 1 + j0 = j - nnb + do j = j0, jcol+1, -nnb + if ( initq ) then + topq = max( 2, j - jcol + 1 ) + nh = ihi - topq + 1 + end if + if ( blk22 ) then + ! exploit the structure of u. + call stdlib_zunm22( 'RIGHT', 'NO TRANSPOSE', nh, 2*nnb,nnb, nnb, work( & + ppwo ), 2*nnb,z( topq, j ), ldz, work( pw ),lwork-pw+1, ierr ) + else + ! ignore the structure of u. + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', nh,2*nnb, 2*nnb, & + cone, z( topq, j ), ldz,work( ppwo ), 2*nnb, czero, work( pw ),nh ) + + call stdlib_zlacpy( 'ALL', nh, 2*nnb, work( pw ), nh,z( topq, j ), ldz ) + + end if + ppwo = ppwo + 4*nnb*nnb + end do + end if + end do + end if + ! use unblocked code to reduce the rest of the matrix + ! avoid re-initialization of modified q and z. + compq2 = compq + compz2 = compz + if ( jcol/=ilo ) then + if ( wantq )compq2 = 'V' + if ( wantz )compz2 = 'V' + end if + if ( jcol ZGGLSE: solves the linear equality-constrained least squares (LSE) + !> problem: + !> minimize || c - A*x ||_2 subject to B*x = d + !> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given + !> M-vector, and d is a given P-vector. It is assumed that + !> P <= N <= M+P, and + !> rank(B) = P and rank( (A) ) = N. + !> ( (B) ) + !> These conditions ensure that the LSE problem has a unique solution, + !> which is obtained using a generalized RQ factorization of the + !> matrices (B, A) given by + !> B = (0 R)*Q, A = Z*T*Q. + + pure subroutine stdlib_zgglse( m, n, p, a, lda, b, ldb, c, d, x, work, lwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, p + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), c(*), d(*) + complex(dp), intent(out) :: work(*), x(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lopt, lwkmin, lwkopt, mn, nb, nb1, nb2, nb3, nb4, nr + ! Intrinsic Functions + intrinsic :: int,max,min + ! Executable Statements + ! test the input parameters + info = 0 + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( p<0 .or. p>n .or. p0 ) then + call stdlib_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', p, 1,b( 1, n-p+1 ), ldb, d,& + p, info ) + if( info>0 ) then + info = 1 + return + end if + ! put the solution in x + call stdlib_zcopy( p, d, 1, x( n-p+1 ), 1 ) + ! update c1 + call stdlib_zgemv( 'NO TRANSPOSE', n-p, p, -cone, a( 1, n-p+1 ), lda,d, 1, cone, c, & + 1 ) + end if + ! solve r11*x1 = c1 for x1 + if( n>p ) then + call stdlib_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n-p, 1,a, lda, c, n-p, & + info ) + if( info>0 ) then + info = 2 + return + end if + ! put the solutions in x + call stdlib_zcopy( n-p, c, 1, x, 1 ) + end if + ! compute the residual vector: + if( m0 )call stdlib_zgemv( 'NO TRANSPOSE', nr, n-m, -cone, a( n-p+1, m+1 ),lda, d(& + nr+1 ), 1, cone, c( n-p+1 ), 1 ) + else + nr = p + end if + if( nr>0 ) then + call stdlib_ztrmv( 'UPPER', 'NO TRANSPOSE', 'NON UNIT', nr,a( n-p+1, n-p+1 ), lda, & + d, 1 ) + call stdlib_zaxpy( nr, -cone, d, 1, c( n-p+1 ), 1 ) + end if + ! backward transformation x = q**h*x + call stdlib_zunmrq( 'LEFT', 'CONJUGATE TRANSPOSE', n, 1, p, b, ldb,work( 1 ), x, n, & + work( p+mn+1 ), lwork-p-mn, info ) + work( 1 ) = p + mn + max( lopt, int( work( p+mn+1 ),KIND=ilp) ) + return + end subroutine stdlib_zgglse + + !> ZGTCON: estimates the reciprocal of the condition number of a complex + !> tridiagonal matrix A using the LU factorization as computed by + !> ZGTTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zgtcon( norm, n, dl, d, du, du2, ipiv, anorm, rcond,work, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: norm + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: d(*), dl(*), du(*), du2(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: onenrm + integer(ilp) :: i, kase, kase1 + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: cmplx + ! Executable Statements + ! test the input arguments. + info = 0 + onenrm = norm=='1' .or. stdlib_lsame( norm, 'O' ) + if( .not.onenrm .and. .not.stdlib_lsame( norm, 'I' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm ZGTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is tridiagonal, and provides + !> error bounds and backward error estimates for the solution. + + pure subroutine stdlib_zgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2,ipiv, b, ldb, x, & + ldx, ferr, berr, work, rwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: b(ldb,*), d(*), df(*), dl(*), dlf(*), du(*), du2(*), duf(*) + + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + ! Local Scalars + logical(lk) :: notran + character :: transn, transt + integer(ilp) :: count, i, j, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + notran = stdlib_lsame( trans, 'N' ) + if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( trans, & + 'C' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zgttrs( trans, n, 1, dlf, df, duf, du2, ipiv, work, n,info ) + call stdlib_zaxpy( n, cmplx( one,KIND=dp), work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(op(a)))* + ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(op(a)) is the inverse of op(a) + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(op(a))*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(op(a)) * diag(w), + ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 70 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(op(a)**h). + call stdlib_zgttrs( transt, n, 1, dlf, df, duf, du2, ipiv, work,n, info ) + + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else + ! multiply by inv(op(a))*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zgttrs( transn, n, 1, dlf, df, duf, du2, ipiv, work,n, info ) + + end if + go to 70 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_110 + return + end subroutine stdlib_zgtrfs + + !> ZGTSVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, + !> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_zgtsvx( fact, trans, n, nrhs, dl, d, du, dlf, df, duf,du2, ipiv, b, & + ldb, x, ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: b(ldb,*), d(*), dl(*), du(*) + complex(dp), intent(inout) :: df(*), dlf(*), du2(*), duf(*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact, notran + character :: norm + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + notran = stdlib_lsame( trans, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb1 ) then + call stdlib_zcopy( n-1, dl, 1, dlf, 1 ) + call stdlib_zcopy( n-1, du, 1, duf, 1 ) + end if + call stdlib_zgttrf( n, dlf, df, duf, du2, ipiv, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_zlangt( norm, n, dl, d, du ) + ! compute the reciprocal of the condition number of a. + call stdlib_zgtcon( norm, n, dlf, df, duf, du2, ipiv, anorm, rcond, work,info ) + ! compute the solution vectors x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zgttrs( trans, n, nrhs, dlf, df, duf, du2, ipiv, x, ldx,info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_zgtrfs( trans, n, nrhs, dl, d, du, dlf, df, duf, du2, ipiv,b, ldb, x, ldx, & + ferr, berr, work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZHBGST: reduces a complex Hermitian-definite banded generalized + !> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, + !> such that C has the same bandwidth as A. + !> B must have been previously factorized as S**H*S by ZPBSTF, using a + !> split Cholesky factorization. A is overwritten by C = X**H*A*X, where + !> X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the + !> bandwidth of A. + + pure subroutine stdlib_zhbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, rwork,& + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldx, n + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(in) :: bb(ldbb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: update, upper, wantx + integer(ilp) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, & + nrt, nx + real(dp) :: bii + complex(dp) :: ra, ra1, t + ! Intrinsic Functions + intrinsic :: real,conjg,max,min + ! Executable Statements + ! test the input parameters + wantx = stdlib_lsame( vect, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + ka1 = ka + 1 + kb1 = kb + 1 + info = 0 + if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldabn-1 )go to 480 + end if + if( upper ) then + ! transform a, working with the upper triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( kb1, i ),KIND=dp) + ab( ka1, i ) = ( real( ab( ka1, i ),KIND=dp) / bii ) / bii + do j = i + 1, i1 + ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii + end do + do j = max( 1, i-ka ), i - 1 + ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*conjg( ab( k-i+ka1, & + i ) ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1, i ) +real( ab( ka1, i ),& + KIND=dp)*bb( j-i+kb1, i )*conjg( bb( k-i+kb1, i ) ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1,& + i ) + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j ) + + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_zdscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_zgerc( n-m, kbt, -cone, x( m+1, i ), 1,bb( kb1-kbt, i )& + , 1, x( m+1, i-kbt ),ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+ka1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_130: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i,i-k+ka+1) + call stdlib_zlartg( ab( k+1, i-k+ka ), ra1,rwork( i-k+ka-m ), work( i-k+ka-& + m ), ra ) + ! create nonzero element a(i-k,i-k+ka+1) outside the + ! band and store it in work(i-k) + t = -bb( kb1-k, i )*ra1 + work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( 1, i-k+ka & + ) + ab( 1, i-k+ka ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( 1, i-k+ka ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( 1, j+1 ) + ab( 1, j+1 ) = rwork( j-m )*ab( 1, j+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_zlargv( nrt, ab( 1, j2t ), inca, work( j2t-m ), ka1,rwork(& + j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, & + rwork( j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + rwork( j2-m ),work( j2-m ), ka1 ) + call stdlib_zlacgv( nr, work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_zrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j-m ), & + conjg( work( j-m ) ) ) + end do + end if + end do loop_130 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kb1-kbt, i )*ra1 + end if + end if + loop_170: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), & + inca, rwork( j2-ka ),work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + rwork( j ) = rwork( j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j-ka,j+1) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( 1, j+1 ) + ab( 1, j+1 ) = rwork( j )*ab( 1, j+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_zlargv( nr, ab( 1, j2 ), inca, work( j2 ), ka1,rwork( j2 ), ka1 ) + + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, & + rwork( j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, & + rwork( j2 ),work( j2 ), ka1 ) + call stdlib_zlacgv( nr, work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_zrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j ), conjg( & + work( j ) ) ) + end do + end if + end do loop_210 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l & + ), inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, j2 + ka, -1 + rwork( j-m ) = rwork( j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( 1, i ),KIND=dp) + ab( 1, i ) = ( real( ab( 1, i ),KIND=dp) / bii ) / bii + do j = i + 1, i1 + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do j = max( 1, i-ka ), i - 1 + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do k = i - kbt, i - 1 + do j = i - kbt, k + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*conjg( ab( i-k+1,k ) ) - & + conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + real( ab( 1, i ),KIND=dp)*bb( i-j+& + 1, j )*conjg( bb( i-k+1,k ) ) + end do + do j = max( 1, i-ka ), i - kbt - 1 + ab( k-j+1, j ) = ab( k-j+1, j ) -conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + + end do + end do + do j = i, i1 + do k = max( j-ka, i-kbt ), i - 1 + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_zdscal( n-m, one / bii, x( m+1, i ), 1 ) + if( kbt>0 )call stdlib_zgeru( n-m, kbt, -cone, x( m+1, i ), 1,bb( kbt+1, i-& + kbt ), ldbb-1,x( m+1, i-kbt ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions down toward the bottom of the + ! band + loop_360: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i-k+ka1 ) then + ! generate rotation to annihilate a(i-k+ka+1,i) + call stdlib_zlartg( ab( ka1-k, i ), ra1, rwork( i-k+ka-m ),work( i-k+ka-m )& + , ra ) + ! create nonzero element a(i-k+ka+1,i-k) outside the + ! band and store it in work(i-k) + t = -bb( k+1, i-k )*ra1 + work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( ka1, i-k ) + + ab( ka1, i-k ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( ka1, i-k ) + + ra1 = ra + end if + end if + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + if( update ) then + j2t = max( j2, i+2*ka-k+1 ) + else + j2t = j2 + end if + nrt = ( n-j2t+ka ) / ka1 + do j = j2t, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j-m) + work( j-m ) = work( j-m )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = rwork( j-m )*ab( ka1, j-ka+1 ) + end do + ! generate rotations in 1st set to annihilate elements which + ! have been created outside the band + if( nrt>0 )call stdlib_zlargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, & + rwork( j2t-m ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(& + j2-m ),work( j2-m ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, rwork( & + j2-m ), work( j2-m ), ka1 ) + call stdlib_zlacgv( nr, work( j2-m ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j2, j1, ka1 + call stdlib_zrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j-m ), work(& + j-m ) ) + end do + end if + end do loop_360 + if( update ) then + if( i2<=n .and. kbt>0 ) then + ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the + ! band and store it in work(i-kbt) + work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1 + end if + end if + loop_400: do k = kb, 1, -1 + if( update ) then + j2 = i - k - 1 + max( 2, k-i0+1 )*ka1 + else + j2 = i - k - 1 + max( 1, k-i0+1 )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+ka+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-& + ka+1 ), inca,rwork( j2-ka ), work( j2-ka ), ka1 ) + end do + nr = ( n-j2+ka ) / ka1 + j1 = j2 + ( nr-1 )*ka1 + do j = j1, j2, -ka1 + work( j ) = work( j-ka ) + rwork( j ) = rwork( j-ka ) + end do + do j = j2, j1, ka1 + ! create nonzero element a(j+1,j-ka) outside the band + ! and store it in work(j) + work( j ) = work( j )*ab( ka1, j-ka+1 ) + ab( ka1, j-ka+1 ) = rwork( j )*ab( ka1, j-ka+1 ) + end do + if( update ) then + if( i-k0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_zlargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,rwork( j2 ), & + ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(& + j2 ),work( j2 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( 1, j2 ), ab( 1, j2+1 ), ab( 2, j2 ),inca, rwork( & + j2 ), work( j2 ), ka1 ) + call stdlib_zlacgv( nr, work( j2 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2 ),work( j2 ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j2, j1, ka1 + call stdlib_zrot( n-m, x( m+1, j ), 1, x( m+1, j+1 ), 1,rwork( j ), work( & + j ) ) + end do + end if + end do loop_440 + do k = 1, kb - 1 + j2 = i - k - 1 + max( 1, k-i0+2 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( n-j2+l ) / ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),& + inca, rwork( j2-m ),work( j2-m ), ka1 ) + end do + end do + if( kb>1 ) then + do j = n - 1, j2 + ka, -1 + rwork( j-m ) = rwork( j-ka-m ) + work( j-m ) = work( j-ka-m ) + end do + end if + end if + go to 10 + 480 continue + ! **************************** phase 2 ***************************** + ! the logical structure of this phase is: + ! update = .true. + ! do i = 1, m + ! use s(i) to update a and create a new bulge + ! apply rotations to push all bulges ka positions upward + ! end do + ! update = .false. + ! do i = m - ka - 1, 2, -1 + ! apply rotations to push all bulges ka positions upward + ! end do + ! to avoid duplicating code, the two loops are merged. + update = .true. + i = 0 + 490 continue + if( update ) then + i = i + 1 + kbt = min( kb, m-i ) + i0 = i + 1 + i1 = max( 1, i-ka ) + i2 = i + kbt - ka1 + if( i>m ) then + update = .false. + i = i - 1 + i0 = m + 1 + if( ka==0 )return + go to 490 + end if + else + i = i - ka + if( i<2 )return + end if + if( i0 )call stdlib_zgeru( nx, kbt, -cone, x( 1, i ), 1,bb( kb, i+1 ), & + ldbb-1, x( 1, i+1 ), ldx ) + end if + ! store a(i1,i) in ra1 for use in next loop over k + ra1 = ab( i1-i+ka1, i ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_610: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_zlargv( nrt, ab( 1, j1+ka ), inca, work( j1 ), ka1,rwork( & + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the left + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + rwork( j1 ),work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + rwork( j1 ), work( j1 ),ka1 ) + call stdlib_zlacgv( nr, work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( j1t ),work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_zrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( j ), work( j ) ) + + end do + end if + end do loop_610 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1 + end if + end if + loop_650: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the right + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),& + inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + rwork( m-kb+j ) = rwork( m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j-1,j+ka) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( 1, j+ka-1 ) + ab( 1, j+ka-1 ) = rwork( m-kb+j )*ab( 1, j+ka-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_650 + loop_690: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_zlargv( nr, ab( 1, j1+ka ), inca, work( m-kb+j1 ),ka1, rwork( m-& + kb+j1 ), ka1 ) + ! apply rotations in 2nd set from the left + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, & + rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, & + rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 ) + call stdlib_zlacgv( nr, work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the right + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_zrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( m-kb+j ), work( & + m-kb+j ) ) + end do + end if + end do loop_690 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the right + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,& + rwork( j1t ),work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, i2 - ka + rwork( j ) = rwork( j+ka ) + work( j ) = work( j+ka ) + end do + end if + else + ! transform a, working with the lower triangle + if( update ) then + ! form inv(s(i))**h * a * inv(s(i)) + bii = real( bb( 1, i ),KIND=dp) + ab( 1, i ) = ( real( ab( 1, i ),KIND=dp) / bii ) / bii + do j = i1, i - 1 + ab( i-j+1, j ) = ab( i-j+1, j ) / bii + end do + do j = i + 1, min( n, i+ka ) + ab( j-i+1, i ) = ab( j-i+1, i ) / bii + end do + do k = i + 1, i + kbt + do j = k, i + kbt + ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*conjg( ab( k-i+1,i ) ) - & + conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + real( ab( 1, i ),KIND=dp)*bb( j-i+& + 1, i )*conjg( bb( k-i+1,i ) ) + end do + do j = i + kbt + 1, min( n, i+ka ) + ab( j-k+1, k ) = ab( j-k+1, k ) -conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + + end do + end do + do j = i1, i + do k = i + 1, min( j+ka, i+kbt ) + ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j ) + end do + end do + if( wantx ) then + ! post-multiply x by inv(s(i)) + call stdlib_zdscal( nx, one / bii, x( 1, i ), 1 ) + if( kbt>0 )call stdlib_zgerc( nx, kbt, -cone, x( 1, i ), 1, bb( 2, i ),1, x( & + 1, i+1 ), ldx ) + end if + ! store a(i,i1) in ra1 for use in next loop over k + ra1 = ab( i-i1+1, i1 ) + end if + ! generate and apply vectors of rotations to chase all the + ! existing bulges ka positions up toward the top of the band + loop_840: do k = 1, kb - 1 + if( update ) then + ! determine the rotations which would annihilate the bulge + ! which has in theory just been created + if( i+k-ka1>0 .and. i+k0 )call stdlib_zlargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,rwork( & + j1 ), ka1 ) + if( nr>0 ) then + ! apply rotations in 1st set from the right + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( & + j1 ), work( j1 ), ka1 ) + end do + ! apply rotations in 1st set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, rwork(& + j1 ),work( j1 ), ka1 ) + call stdlib_zlacgv( nr, work( j1 ), ka1 ) + end if + ! start applying rotations in 1st set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 1st set + do j = j1, j2, ka1 + call stdlib_zrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( j ), conjg( work(& + j ) ) ) + end do + end if + end do loop_840 + if( update ) then + if( i2>0 .and. kbt>0 ) then + ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the + ! band and store it in work(m-kb+i+kbt) + work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1 + end if + end if + loop_880: do k = kb, 1, -1 + if( update ) then + j2 = i + k + 1 - max( 2, k+i0-m )*ka1 + else + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + end if + ! finish applying rotations in 2nd set from the left + do l = kb - k, 1, -1 + nrt = ( j2+ka+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, & + j1t+l-1 ), inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 ) + end do + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + do j = j1, j2, ka1 + work( m-kb+j ) = work( m-kb+j+ka ) + rwork( m-kb+j ) = rwork( m-kb+j+ka ) + end do + do j = j1, j2, ka1 + ! create nonzero element a(j+ka,j-1) outside the band + ! and store it in work(m-kb+j) + work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 ) + ab( ka1, j-1 ) = rwork( m-kb+j )*ab( ka1, j-1 ) + end do + if( update ) then + if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k ) + end if + end do loop_880 + loop_920: do k = kb, 1, -1 + j2 = i + k + 1 - max( 1, k+i0-m )*ka1 + nr = ( j2+ka-1 ) / ka1 + j1 = j2 - ( nr-1 )*ka1 + if( nr>0 ) then + ! generate rotations in 2nd set to annihilate elements + ! which have been created outside the band + call stdlib_zlargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, rwork( m-kb+& + j1 ), ka1 ) + ! apply rotations in 2nd set from the right + do l = 1, ka - 1 + call stdlib_zlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( & + m-kb+j1 ), work( m-kb+j1 ),ka1 ) + end do + ! apply rotations in 2nd set from both sides to diagonal + ! blocks + call stdlib_zlar2v( nr, ab( 1, j1 ), ab( 1, j1-1 ),ab( 2, j1-1 ), inca, rwork(& + m-kb+j1 ),work( m-kb+j1 ), ka1 ) + call stdlib_zlacgv( nr, work( m-kb+j1 ), ka1 ) + end if + ! start applying rotations in 2nd set from the left + do l = ka - 1, kb - k + 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 ) + end do + if( wantx ) then + ! post-multiply x by product of rotations in 2nd set + do j = j1, j2, ka1 + call stdlib_zrot( nx, x( 1, j ), 1, x( 1, j-1 ), 1,rwork( m-kb+j ), conjg( & + work( m-kb+j ) ) ) + end do + end if + end do loop_920 + do k = 1, kb - 1 + j2 = i + k + 1 - max( 1, k+i0-m+1 )*ka1 + ! finish applying rotations in 1st set from the left + do l = kb - k, 1, -1 + nrt = ( j2+l-1 ) / ka1 + j1t = j2 - ( nrt-1 )*ka1 + if( nrt>0 )call stdlib_zlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, & + j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 ) + end do + end do + if( kb>1 ) then + do j = 2, i2 - ka + rwork( j ) = rwork( j+ka ) + work( j ) = work( j+ka ) + end do + end if + end if + go to 490 + end subroutine stdlib_zhbgst + + !> ZHBTRD: reduces a complex Hermitian band matrix A to real symmetric + !> tridiagonal form T by a unitary similarity transformation: + !> Q**H * A * Q = T. + + pure subroutine stdlib_zhbtrd( vect, uplo, n, kd, ab, ldab, d, e, q, ldq,work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldq, n + ! Array Arguments + real(dp), intent(out) :: d(*), e(*) + complex(dp), intent(inout) :: ab(ldab,*), q(ldq,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: initq, upper, wantq + integer(ilp) :: i, i2, ibl, inca, incx, iqaend, iqb, iqend, j, j1, j1end, j1inc, j2, & + jend, jin, jinc, k, kd1, kdm1, kdn, l, last, lend, nq, nr, nrt + real(dp) :: abst + complex(dp) :: t, temp + ! Intrinsic Functions + intrinsic :: abs,real,conjg,max,min + ! Executable Statements + ! test the input parameters + initq = stdlib_lsame( vect, 'V' ) + wantq = initq .or. stdlib_lsame( vect, 'U' ) + upper = stdlib_lsame( uplo, 'U' ) + kd1 = kd + 1 + kdm1 = kd - 1 + incx = ldab - 1 + iqend = 1 + info = 0 + if( .not.wantq .and. .not.stdlib_lsame( vect, 'N' ) ) then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldab1 ) then + ! reduce to complex hermitian tridiagonal form, working with + ! the upper triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + ab( kd1, 1 ) = real( ab( kd1, 1 ),KIND=dp) + loop_90: do i = 1, n - 2 + ! reduce i-th row of matrix to tridiagonal form + loop_80: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_zlargv( nr, ab( 1, j1-1 ), inca, work( j1 ),kd1, d( j1 ), & + kd1 ) + ! apply rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_zlartv or stdlib_zrot is used + if( nr>=2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_zlartv( nr, ab( l+1, j1-1 ), inca,ab( l, j1 ), inca, & + d( j1 ),work( j1 ), kd1 ) + end do + else + jend = j1 + ( nr-1 )*kd1 + do jinc = j1, jend, kd1 + call stdlib_zrot( kdm1, ab( 2, jinc-1 ), 1,ab( 1, jinc ), 1, d( & + jinc ),work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i,i+k-1) + ! within the band + call stdlib_zlartg( ab( kd-k+3, i+k-2 ),ab( kd-k+2, i+k-1 ), d( i+k-& + 1 ),work( i+k-1 ), temp ) + ab( kd-k+3, i+k-2 ) = temp + ! apply rotation from the right + call stdlib_zrot( k-3, ab( kd-k+4, i+k-2 ), 1,ab( kd-k+3, i+k-1 ), 1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_zlar2v( nr, ab( kd1, j1-1 ), ab( kd1, j1 ),ab( kd, & + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the left + if( nr>0 ) then + call stdlib_zlacgv( nr, work( j1 ), kd1 ) + if( 2*kd-1n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_zlartv( nrt, ab( kd-l, j1+l ), inca,ab( kd-& + l+1, j1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do jin = j1, j1end, kd1 + call stdlib_zrot( kd-1, ab( kd-1, jin+1 ), incx,ab( kd, jin+1 )& + , incx,d( jin ), work( jin ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_zrot( lend, ab( kd-1, last+1 ), incx,ab( kd, & + last+1 ), incx, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_zrot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + conjg( work( j ) ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_zrot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), conjg( & + work( j ) ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j-1,j+kd) outside the band + ! and store it in work + work( j+kd ) = work( j )*ab( 1, j+kd ) + ab( 1, j+kd ) = d( j )*ab( 1, j+kd ) + end do + end do loop_80 + end do loop_90 + end if + if( kd>0 ) then + ! make off-diagonal elements real and copy them to e + do i = 1, n - 1 + t = ab( kd, i+1 ) + abst = abs( t ) + ab( kd, i+1 ) = abst + e( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( i1 ) then + ! reduce to complex hermitian tridiagonal form, working with + ! the lower triangle + nr = 0 + j1 = kdn + 2 + j2 = 1 + ab( 1, 1 ) = real( ab( 1, 1 ),KIND=dp) + loop_210: do i = 1, n - 2 + ! reduce i-th column of matrix to tridiagonal form + loop_200: do k = kdn + 1, 2, -1 + j1 = j1 + kdn + j2 = j2 + kdn + if( nr>0 ) then + ! generate plane rotations to annihilate nonzero + ! elements which have been created outside the band + call stdlib_zlargv( nr, ab( kd1, j1-kd1 ), inca,work( j1 ), kd1, d( j1 )& + , kd1 ) + ! apply plane rotations from one side + ! dependent on the the number of diagonals either + ! stdlib_zlartv or stdlib_zrot is used + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + call stdlib_zlartv( nr, ab( kd1-l, j1-kd1+l ), inca,ab( kd1-l+1, & + j1-kd1+l ), inca,d( j1 ), work( j1 ), kd1 ) + end do + else + jend = j1 + kd1*( nr-1 ) + do jinc = j1, jend, kd1 + call stdlib_zrot( kdm1, ab( kd, jinc-kd ), incx,ab( kd1, jinc-kd )& + , incx,d( jinc ), work( jinc ) ) + end do + end if + end if + if( k>2 ) then + if( k<=n-i+1 ) then + ! generate plane rotation to annihilate a(i+k-1,i) + ! within the band + call stdlib_zlartg( ab( k-1, i ), ab( k, i ),d( i+k-1 ), work( i+k-1 & + ), temp ) + ab( k-1, i ) = temp + ! apply rotation from the left + call stdlib_zrot( k-3, ab( k-2, i+1 ), ldab-1,ab( k-1, i+1 ), ldab-1,& + d( i+k-1 ),work( i+k-1 ) ) + end if + nr = nr + 1 + j1 = j1 - kdn - 1 + end if + ! apply plane rotations from both sides to diagonal + ! blocks + if( nr>0 )call stdlib_zlar2v( nr, ab( 1, j1-1 ), ab( 1, j1 ),ab( 2, j1-1 ),& + inca, d( j1 ),work( j1 ), kd1 ) + ! apply plane rotations from the right + ! dependent on the the number of diagonals either + ! stdlib_zlartv or stdlib_zrot is used + if( nr>0 ) then + call stdlib_zlacgv( nr, work( j1 ), kd1 ) + if( nr>2*kd-1 ) then + do l = 1, kd - 1 + if( j2+l>n ) then + nrt = nr - 1 + else + nrt = nr + end if + if( nrt>0 )call stdlib_zlartv( nrt, ab( l+2, j1-1 ), inca,ab( l+1,& + j1 ), inca, d( j1 ),work( j1 ), kd1 ) + end do + else + j1end = j1 + kd1*( nr-2 ) + if( j1end>=j1 ) then + do j1inc = j1, j1end, kd1 + call stdlib_zrot( kdm1, ab( 3, j1inc-1 ), 1,ab( 2, j1inc ), 1, & + d( j1inc ),work( j1inc ) ) + end do + end if + lend = min( kdm1, n-j2 ) + last = j1end + kd1 + if( lend>0 )call stdlib_zrot( lend, ab( 3, last-1 ), 1,ab( 2, last ),& + 1, d( last ),work( last ) ) + end if + end if + if( wantq ) then + ! accumulate product of plane rotations in q + if( initq ) then + ! take advantage of the fact that q was + ! initially the identity matrix + iqend = max( iqend, j2 ) + i2 = max( 0, k-3 ) + iqaend = 1 + i*kd + if( k==2 )iqaend = iqaend + kd + iqaend = min( iqaend, iqend ) + do j = j1, j2, kd1 + ibl = i - i2 / kdm1 + i2 = i2 + 1 + iqb = max( 1, j-ibl ) + nq = 1 + iqaend - iqb + iqaend = min( iqaend+kd, iqend ) + call stdlib_zrot( nq, q( iqb, j-1 ), 1, q( iqb, j ),1, d( j ), & + work( j ) ) + end do + else + do j = j1, j2, kd1 + call stdlib_zrot( n, q( 1, j-1 ), 1, q( 1, j ), 1,d( j ), work( j & + ) ) + end do + end if + end if + if( j2+kdn>n ) then + ! adjust j2 to keep within the bounds of the matrix + nr = nr - 1 + j2 = j2 - kdn - 1 + end if + do j = j1, j2, kd1 + ! create nonzero element a(j+kd,j-1) outside the + ! band and store it in work + work( j+kd ) = work( j )*ab( kd1, j ) + ab( kd1, j ) = d( j )*ab( kd1, j ) + end do + end do loop_200 + end do loop_210 + end if + if( kd>0 ) then + ! make off-diagonal elements real and copy them to e + do i = 1, n - 1 + t = ab( 2, i ) + abst = abs( t ) + ab( 2, i ) = abst + e( i ) = abst + if( abst/=zero ) then + t = t / abst + else + t = cone + end if + if( i ZHECON: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHETRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zhecon( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_zhetrs( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_zhecon + + !> ZHECON_ROOK: estimates the reciprocal of the condition number of a complex + !> Hermitian matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by CHETRF_ROOK. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zhecon_rook( uplo, n, a, lda, ipiv, anorm, rcond, work,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, kase + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda0 .and. a( i, i )==zero )return + end do + else + ! lower triangular storage: examine d from top to bottom. + do i = 1, n + if( ipiv( i )>0 .and. a( i, i )==zero )return + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_zhetrs_rook( uplo, n, 1, a, lda, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_zhecon_rook + + !> ZHEEV: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. + + subroutine stdlib_zheev( jobz, uplo, n, a, lda, w, work, lwork, rwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, lwork, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indtau, indwrk, iscale, llwork, lwkopt, nb + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_zlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_zhetrd to reduce hermitian matrix to tridiagonal form. + inde = 1 + indtau = 1 + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_zhetrd( uplo, n, a, lda, w, rwork( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_zungtr to generate the unitary matrix, then call stdlib_zsteqr. + if( .not.wantz ) then + call stdlib_dsterf( n, w, rwork( inde ), info ) + else + call stdlib_zungtr( uplo, n, a, lda, work( indtau ), work( indwrk ),llwork, iinfo ) + + indwrk = inde + n + call stdlib_zsteqr( jobz, n, w, rwork( inde ), a, lda,rwork( indwrk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! set work(1) to optimal complex workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_zheev + + !> ZHEEVR: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + !> ZHEEVR first reduces the matrix A to tridiagonal form T with a call + !> to ZHETRD. Then, whenever possible, ZHEEVR calls ZSTEMR to compute + !> eigenspectrum using Relatively Robust Representations. ZSTEMR + !> computes eigenvalues by the dqds algorithm, while orthogonal + !> eigenvectors are computed from various "good" L D L^T representations + !> (also known as Relatively Robust Representations). Gram-Schmidt + !> orthogonalization is avoided as far as possible. More specifically, + !> the various steps of the algorithm are as follows. + !> For each unreduced block (submatrix) of T, + !> (a) Compute T - sigma I = L D L^T, so that L and D + !> define all the wanted eigenvalues to high relative accuracy. + !> This means that small relative changes in the entries of D and L + !> cause only small relative changes in the eigenvalues and + !> eigenvectors. The standard (unfactored) representation of the + !> tridiagonal matrix T does not have this property in general. + !> (b) Compute the eigenvalues to suitable accuracy. + !> If the eigenvectors are desired, the algorithm attains full + !> accuracy of the computed eigenvalues only right before + !> the corresponding vectors have to be computed, see steps c) and d). + !> (c) For each cluster of close eigenvalues, select a new + !> shift close to the cluster, find a new factorization, and refine + !> the shifted eigenvalues to suitable accuracy. + !> (d) For each eigenvalue with a large enough relative separation compute + !> the corresponding eigenvector by forming a rank revealing twisted + !> factorization. Go back to (c) for any clusters that remain. + !> The desired accuracy of the output can be specified by the input + !> parameter ABSTOL. + !> For more details, see ZSTEMR's documentation and: + !> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations + !> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," + !> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. + !> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and + !> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, + !> 2004. Also LAPACK Working Note 154. + !> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric + !> tridiagonal eigenvalue/eigenvector problem", + !> Computer Science Division Technical Report No. UCB/CSD-97-971, + !> UC Berkeley, May 1997. + !> Note 1 : ZHEEVR calls ZSTEMR when the full spectrum is requested + !> on machines which conform to the ieee-754 floating point standard. + !> ZHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and + !> when partial spectrum requests are made. + !> Normal execution of ZSTEMR may create NaNs and infinities and + !> hence may abort due to a floating point exception in environments + !> which do not handle NaNs and infinities in the ieee standard default + !> manner. + + subroutine stdlib_zheevr( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork,rwork, lrwork, iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, liwork, lrwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz, tryrac + character :: order + integer(ilp) :: i, ieeeok, iinfo, imax, indibl, indifl, indisp, indiwo, indrd, indrdd, & + indre, indree, indrwk, indtau, indwk, indwkn, iscale, itmp1, j, jj, liwmin, llwork, & + llrwork, llwrkn, lrwmin, lwkopt, lwmin, nb, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + ieeeok = stdlib_ilaenv( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 ) + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( ( lwork==-1 ) .or. ( lrwork==-1 ) .or.( liwork==-1 ) ) + lrwmin = max( 1, 24*n ) + liwmin = max( 1, 10*n ) + lwmin = max( 1, 2*n ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( a( 1, 1 ),KIND=dp) )then + m = 1 + w( 1 ) = real( a( 1, 1 ),KIND=dp) + end if + end if + if( wantz ) then + z( 1, 1 ) = one + isuppz( 1 ) = 1 + isuppz( 2 ) = 1 + end if + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if (valeig) then + vll = vl + vuu = vu + end if + anrm = stdlib_zlansy( 'M', uplo, n, a, lda, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_zdscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_zdscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! initialize indices into workspaces. note: the iwork indices are + ! used only if stdlib_dsterf or stdlib_zstemr fail. + ! work(indtau:indtau+n-1) stores the complex scalar factors of the + ! elementary reflectors used in stdlib_zhetrd. + indtau = 1 + ! indwk is the starting offset of the remaining complex workspace, + ! and llwork is the remaining complex workspace size. + indwk = indtau + n + llwork = lwork - indwk + 1 + ! rwork(indrd:indrd+n-1) stores the real tridiagonal's diagonal + ! entries. + indrd = 1 + ! rwork(indre:indre+n-1) stores the off-diagonal entries of the + ! tridiagonal matrix from stdlib_zhetrd. + indre = indrd + n + ! rwork(indrdd:indrdd+n-1) is a copy of the diagonal entries over + ! -written by stdlib_zstemr (the stdlib_dsterf path copies the diagonal to w). + indrdd = indre + n + ! rwork(indree:indree+n-1) is a copy of the off-diagonal entries over + ! -written while computing the eigenvalues in stdlib_dsterf and stdlib_zstemr. + indree = indrdd + n + ! indrwk is the starting offset of the left-over real workspace, and + ! llrwork is the remaining workspace size. + indrwk = indree + n + llrwork = lrwork - indrwk + 1 + ! iwork(indibl:indibl+m-1) corresponds to iblock in stdlib_dstebz and + ! stores the block indices of each of the m<=n eigenvalues. + indibl = 1 + ! iwork(indisp:indisp+nsplit-1) corresponds to isplit in stdlib_dstebz and + ! stores the starting and finishing indices of each block. + indisp = indibl + n + ! iwork(indifl:indifl+n-1) stores the indices of eigenvectors + ! that corresponding to eigenvectors that fail to converge in + ! stdlib_dstein. this information is discarded; if any fail, the driver + ! returns info > 0. + indifl = indisp + n + ! indiwo is the offset of the remaining integer workspace. + indiwo = indifl + n + ! call stdlib_zhetrd to reduce hermitian matrix to tridiagonal form. + call stdlib_zhetrd( uplo, n, a, lda, rwork( indrd ), rwork( indre ),work( indtau ), & + work( indwk ), llwork, iinfo ) + ! if all eigenvalues are desired + ! then call stdlib_dsterf or stdlib_zstemr and stdlib_zunmtr. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig.or.test ) .and. ( ieeeok==1 ) ) then + if( .not.wantz ) then + call stdlib_dcopy( n, rwork( indrd ), 1, w, 1 ) + call stdlib_dcopy( n-1, rwork( indre ), 1, rwork( indree ), 1 ) + call stdlib_dsterf( n, w, rwork( indree ), info ) + else + call stdlib_dcopy( n-1, rwork( indre ), 1, rwork( indree ), 1 ) + call stdlib_dcopy( n, rwork( indrd ), 1, rwork( indrdd ), 1 ) + if (abstol <= two*n*eps) then + tryrac = .true. + else + tryrac = .false. + end if + call stdlib_zstemr( jobz, 'A', n, rwork( indrdd ),rwork( indree ), vl, vu, il, & + iu, m, w,z, ldz, n, isuppz, tryrac,rwork( indrwk ), llrwork,iwork, liwork, info ) + + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_zstemr. + if( wantz .and. info==0 ) then + indwkn = indwk + llwrkn = lwork - indwkn + 1 + call stdlib_zunmtr( 'L', uplo, 'N', n, m, a, lda,work( indtau ), z, ldz, work(& + indwkn ),llwrkn, iinfo ) + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_zstein. + ! also call stdlib_dstebz and stdlib_zstein if stdlib_zstemr fails. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indrd ), rwork( & + indre ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwo )& + , info ) + if( wantz ) then + call stdlib_zstein( n, rwork( indrd ), rwork( indre ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwo ), iwork( indifl ),info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_zstein. + indwkn = indwk + llwrkn = lwork - indwkn + 1 + call stdlib_zunmtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwkn ), llwrkn, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHEEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can + !> be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_zheevx( jobz, range, uplo, n, a, lda, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, lwork, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, lda, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, lquery, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indtau, indwrk, iscale, itmp1, j, jj, llwork, lwkmin, lwkopt, nb, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + lower = stdlib_lsame( uplo, 'L' ) + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 .and. vu<=vl )info = -8 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -9 + else if( iun ) then + info = -10 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( a( 1, 1 ),KIND=dp) )then + m = 1 + w( 1 ) = real( a( 1, 1 ),KIND=dp) + end if + end if + if( wantz )z( 1, 1 ) = cone + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + end if + anrm = stdlib_zlanhe( 'M', uplo, n, a, lda, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + do j = 1, n + call stdlib_zdscal( n-j+1, sigma, a( j, j ), 1 ) + end do + else + do j = 1, n + call stdlib_zdscal( j, sigma, a( 1, j ), 1 ) + end do + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_zhetrd to reduce hermitian matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indtau = 1 + indwrk = indtau + n + llwork = lwork - indwrk + 1 + call stdlib_zhetrd( uplo, n, a, lda, rwork( indd ), rwork( inde ),work( indtau ), work(& + indwrk ), llwork, iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal to + ! zero, then call stdlib_dsterf or stdlib_zungtr and stdlib_zsteqr. if this fails for + ! some eigenvalue, then try stdlib_dstebz. + test = .false. + if( indeig ) then + if( il==1 .and. iu==n ) then + test = .true. + end if + end if + if( ( alleig .or. test ) .and. ( abstol<=zero ) ) then + call stdlib_dcopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_dcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_dsterf( n, w, rwork( indee ), info ) + else + call stdlib_zlacpy( 'A', n, n, a, lda, z, ldz ) + call stdlib_zungtr( uplo, n, z, ldz, work( indtau ),work( indwrk ), llwork, & + iinfo ) + call stdlib_dcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_zsteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 40 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_zstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_zstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_zstein. + call stdlib_zunmtr( 'L', uplo, 'N', n, m, a, lda, work( indtau ), z,ldz, work( & + indwrk ), llwork, iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 40 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHEGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian and B is also + !> positive definite. + + subroutine stdlib_zhegv( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, lwork, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: lwkopt, nb, neig + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + call stdlib_ztrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, cone,b, ldb, a, lda & + ) + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + call stdlib_ztrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, neig, cone,b, ldb, a, lda & + ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zhegv + + !> ZHEGVX: computes selected eigenvalues, and optionally, eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> Eigenvalues and eigenvectors can be selected by specifying either a + !> range of values or a range of indices for the desired eigenvalues. + + subroutine stdlib_zhegvx( itype, jobz, range, uplo, n, a, lda, b, ldb,vl, vu, il, iu, abstol,& + m, w, z, ldz, work,lwork, rwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, lda, ldb, ldz, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: alleig, indeig, lquery, upper, valeig, wantz + character :: trans + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lquery = ( lwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( lda0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if (info==0) then + if (ldz<1 .or. (wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + call stdlib_ztrsm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, cone, b,ldb, z, ldz ) + + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + call stdlib_ztrmm( 'LEFT', uplo, trans, 'NON-UNIT', n, m, cone, b,ldb, z, ldz ) + + end if + end if + ! set work(1) to optimal complex workspace size. + work( 1 ) = lwkopt + return + end subroutine stdlib_zhegvx + + !> ZHERFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite, and + !> provides error bounds and backward error estimates for the solution. + + pure subroutine stdlib_zherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb,x, ldx, ferr, & + berr, work, rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, j, k, kase, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldasafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zhetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_zhetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zhetrs( uplo, n, 1, af, ldaf, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zherfs + + !> ZHESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then + !> used to solve the system of equations A * X = B. + + pure subroutine stdlib_zhesv( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESV_RK: computes the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix + !> and X and B are N-by-NRHS matrices. + !> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used + !> to factor A as + !> A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or + !> A = P*L*D*(L**H)*(P**T), if UPLO = 'L', + !> where U (or L) is unit upper (or lower) triangular matrix, + !> U**H (or L**H) is the conjugate of U (or L), P is a permutation + !> matrix, P**T is the transpose of P, and D is Hermitian and block + !> diagonal with 1-by-1 and 2-by-2 diagonal blocks. + !> ZHETRF_RK is called to compute the factorization of a complex + !> Hermitian matrix. The factored form of A is then used to solve + !> the system of equations A * X = B by calling BLAS3 routine ZHETRS_3. + + pure subroutine stdlib_zhesv_rk( uplo, n, nrhs, a, lda, e, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: e(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESV_ROOK: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used + !> to factor A as + !> A = U * D * U**T, if UPLO = 'U', or + !> A = L * D * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and D is Hermitian and block diagonal with + !> 1-by-1 and 2-by-2 diagonal blocks. + !> ZHETRF_ROOK is called to compute the factorization of a complex + !> Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal + !> pivoting method. + !> The factored form of A is then used to solve the system + !> of equations A * X = B by calling ZHETRS_ROOK (uses BLAS 2). + + pure subroutine stdlib_zhesv_rook( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, nb + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHESVX: uses the diagonal pivoting factorization to compute the + !> solution to a complex system of linear equations A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zhesvx( fact, uplo, n, nrhs, a, lda, af, ldaf, ipiv, b,ldb, x, ldx, rcond, & + ferr, berr, work, lwork,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, lwork, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: a(lda,*), b(ldb,*) + complex(dp), intent(inout) :: af(ldaf,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, nofact + integer(ilp) :: lwkopt, nb + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + lquery = ( lwork==-1 ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlanhe( 'I', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zhecon( uplo, n, af, ldaf, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zhetrs( uplo, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_zherfs( uplo, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZHGEQZ: computes the eigenvalues of a complex matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the single-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a complex matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by ZGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices and S and P are upper triangular. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced + !> the matrix pair (A,B) to generalized Hessenberg form, then the output + !> matrices Q1*Q and Z1*Z are the unitary factors from the generalized + !> Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) + !> (equivalently, of (A,B)) are computed as a pair of complex values + !> (alpha,beta). If beta is nonzero, lambda = alpha / beta is an + !> eigenvalue of the generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> The values of alpha and beta for the i-th eigenvalue can be read + !> directly from the generalized Schur form: alpha = S(i,i), + !> beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + + subroutine stdlib_zhgeqz( job, compq, compz, n, ilo, ihi, h, ldh, t, ldt,alpha, beta, q, ldq,& + z, ldz, work, lwork,rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, compz, job + integer(ilp), intent(in) :: ihi, ilo, ldh, ldq, ldt, ldz, lwork, n + integer(ilp), intent(out) :: info + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: alpha(*), beta(*), work(*) + complex(dp), intent(inout) :: h(ldh,*), q(ldq,*), t(ldt,*), z(ldz,*) + ! ===================================================================== + + + + ! Local Scalars + logical(lk) :: ilazr2, ilazro, ilq, ilschr, ilz, lquery + integer(ilp) :: icompq, icompz, ifirst, ifrstm, iiter, ilast, ilastm, in, ischur, & + istart, j, jc, jch, jiter, jr, maxit + real(dp) :: absb, anorm, ascale, atol, bnorm, bscale, btol, c, safmin, temp, temp2, & + tempr, ulp + complex(dp) :: abi22, ad11, ad12, ad21, ad22, ctemp, ctemp2, ctemp3, eshift, s, shift, & + signbc, u12, x, abi12, y + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max,min,sqrt + ! Statement Functions + real(dp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=dp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode job, compq, compz + if( stdlib_lsame( job, 'E' ) ) then + ilschr = .false. + ischur = 1 + else if( stdlib_lsame( job, 'S' ) ) then + ilschr = .true. + ischur = 2 + else + ilschr = .true. + ischur = 0 + end if + if( stdlib_lsame( compq, 'N' ) ) then + ilq = .false. + icompq = 1 + else if( stdlib_lsame( compq, 'V' ) ) then + ilq = .true. + icompq = 2 + else if( stdlib_lsame( compq, 'I' ) ) then + ilq = .true. + icompq = 3 + else + ilq = .true. + icompq = 0 + end if + if( stdlib_lsame( compz, 'N' ) ) then + ilz = .false. + icompz = 1 + else if( stdlib_lsame( compz, 'V' ) ) then + ilz = .true. + icompz = 2 + else if( stdlib_lsame( compz, 'I' ) ) then + ilz = .true. + icompz = 3 + else + ilz = .true. + icompz = 0 + end if + ! check argument values + info = 0 + work( 1 ) = max( 1, n ) + lquery = ( lwork==-1 ) + if( ischur==0 ) then + info = -1 + else if( icompq==0 ) then + info = -2 + else if( icompz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihisafmin ) then + signbc = conjg( t( j, j ) / absb ) + t( j, j ) = absb + if( ilschr ) then + call stdlib_zscal( j-1, signbc, t( 1, j ), 1 ) + call stdlib_zscal( j, signbc, h( 1, j ), 1 ) + else + call stdlib_zscal( 1, signbc, h( j, j ), 1 ) + end if + if( ilz )call stdlib_zscal( n, signbc, z( 1, j ), 1 ) + else + t( j, j ) = czero + end if + alpha( j ) = h( j, j ) + beta( j ) = t( j, j ) + end do + ! if ihi < ilo, skip qz steps + if( ihimaxit )go to 180 + ! split the matrix if possible. + ! two tests: + ! 1: h(j,j-1)=0 or j=ilo + ! 2: t(j,j)=0 + ! special case: j=ilast + if( ilast==ilo ) then + go to 60 + else + if( abs1( h( ilast, ilast-1 ) )<=max( safmin, ulp*(abs1( h( ilast, ilast ) ) + & + abs1( h( ilast-1, ilast-1 )) ) ) ) then + h( ilast, ilast-1 ) = czero + go to 60 + end if + end if + if( abs( t( ilast, ilast ) )<=max( safmin, ulp*(abs( t( ilast - 1, ilast ) ) + abs( & + t( ilast-1, ilast-1 )) ) ) ) then + t( ilast, ilast ) = czero + go to 50 + end if + ! general case: j ilo )temp = temp + abs ( t( j - 1, j ) ) + if( abs( t( j, j ) )=btol ) then + if( jch+1>=ilast ) then + go to 60 + else + ifirst = jch + 1 + go to 70 + end if + end if + t( jch+1, jch+1 ) = czero + end do + go to 50 + else + ! only test 2 passed -- chase the zero to t(ilast,ilast) + ! then process as in the case t(ilast,ilast)=0 + do jch = j, ilast - 1 + ctemp = t( jch, jch+1 ) + call stdlib_zlartg( ctemp, t( jch+1, jch+1 ), c, s,t( jch, jch+1 ) ) + + t( jch+1, jch+1 ) = czero + if( jchsafmin ) then + signbc = conjg( t( ilast, ilast ) / absb ) + t( ilast, ilast ) = absb + if( ilschr ) then + call stdlib_zscal( ilast-ifrstm, signbc, t( ifrstm, ilast ), 1 ) + call stdlib_zscal( ilast+1-ifrstm, signbc, h( ifrstm, ilast ),1 ) + else + call stdlib_zscal( 1, signbc, h( ilast, ilast ), 1 ) + end if + if( ilz )call stdlib_zscal( n, signbc, z( 1, ilast ), 1 ) + else + t( ilast, ilast ) = czero + end if + alpha( ilast ) = h( ilast, ilast ) + beta( ilast ) = t( ilast, ilast ) + ! go to next block -- exit if finished. + ilast = ilast - 1 + if( ilastilast )ifrstm = ilo + end if + go to 160 + ! qz step + ! this iteration only involves rows/columns ifirst:ilast. we + ! assume ifirst < ilast, and that the diagonal of b is non-zero. + 70 continue + iiter = iiter + 1 + if( .not.ilschr ) then + ifrstm = ifirst + end if + ! compute the shift. + ! at this point, ifirst < ilast, and the diagonal elements of + ! t(ifirst:ilast,ifirst,ilast) are larger than btol (in + ! magnitude) + if( ( iiter / 10 )*10/=iiter ) then + ! the wilkinson shift (aep p.512_dp), i.e., the eigenvalue of + ! the bottom-right 2x2 block of a inv(b) which is nearest to + ! the bottom-right element. + ! we factor b as u*d, where u has unit diagonals, and + ! compute (a*inv(d))*inv(u). + u12 = ( bscale*t( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad11 = ( ascale*h( ilast-1, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad21 = ( ascale*h( ilast, ilast-1 ) ) /( bscale*t( ilast-1, ilast-1 ) ) + ad12 = ( ascale*h( ilast-1, ilast ) ) /( bscale*t( ilast, ilast ) ) + ad22 = ( ascale*h( ilast, ilast ) ) /( bscale*t( ilast, ilast ) ) + abi22 = ad22 - u12*ad21 + abi12 = ad12 - u12*ad11 + shift = abi22 + ctemp = sqrt( abi12 )*sqrt( ad21 ) + temp = abs1( ctemp ) + if( ctemp/=zero ) then + x = half*( ad11-shift ) + temp2 = abs1( x ) + temp = max( temp, abs1( x ) ) + y = temp*sqrt( ( x / temp )**2+( ctemp / temp )**2 ) + if( temp2>zero ) then + if( real( x / temp2,KIND=dp)*real( y,KIND=dp)+aimag( x / temp2 )*aimag( y )& + safmin ) & + then + eshift = eshift + ( ascale*h( ilast,ilast ) )/( bscale*t( ilast, ilast ) ) + + else + eshift = eshift + ( ascale*h( ilast,ilast-1 ) )/( bscale*t( ilast-1, ilast-1 )& + ) + end if + shift = eshift + end if + ! now check for two consecutive small subdiagonals. + do j = ilast - 1, ifirst + 1, -1 + istart = j + ctemp = ascale*h( j, j ) - shift*( bscale*t( j, j ) ) + temp = abs1( ctemp ) + temp2 = ascale*abs1( h( j+1, j ) ) + tempr = max( temp, temp2 ) + if( tempristart ) then + ctemp = h( j, j-1 ) + call stdlib_zlartg( ctemp, h( j+1, j-1 ), c, s, h( j, j-1 ) ) + h( j+1, j-1 ) = czero + end if + do jc = j, ilastm + ctemp = c*h( j, jc ) + s*h( j+1, jc ) + h( j+1, jc ) = -conjg( s )*h( j, jc ) + c*h( j+1, jc ) + h( j, jc ) = ctemp + ctemp2 = c*t( j, jc ) + s*t( j+1, jc ) + t( j+1, jc ) = -conjg( s )*t( j, jc ) + c*t( j+1, jc ) + t( j, jc ) = ctemp2 + end do + if( ilq ) then + do jr = 1, n + ctemp = c*q( jr, j ) + conjg( s )*q( jr, j+1 ) + q( jr, j+1 ) = -s*q( jr, j ) + c*q( jr, j+1 ) + q( jr, j ) = ctemp + end do + end if + ctemp = t( j+1, j+1 ) + call stdlib_zlartg( ctemp, t( j+1, j ), c, s, t( j+1, j+1 ) ) + t( j+1, j ) = czero + do jr = ifrstm, min( j+2, ilast ) + ctemp = c*h( jr, j+1 ) + s*h( jr, j ) + h( jr, j ) = -conjg( s )*h( jr, j+1 ) + c*h( jr, j ) + h( jr, j+1 ) = ctemp + end do + do jr = ifrstm, j + ctemp = c*t( jr, j+1 ) + s*t( jr, j ) + t( jr, j ) = -conjg( s )*t( jr, j+1 ) + c*t( jr, j ) + t( jr, j+1 ) = ctemp + end do + if( ilz ) then + do jr = 1, n + ctemp = c*z( jr, j+1 ) + s*z( jr, j ) + z( jr, j ) = -conjg( s )*z( jr, j+1 ) + c*z( jr, j ) + z( jr, j+1 ) = ctemp + end do + end if + end do loop_150 + 160 continue + end do loop_170 + ! drop-through = non-convergence + 180 continue + info = ilast + go to 210 + ! successful completion of all qz steps + 190 continue + ! set eigenvalues 1:ilo-1 + do j = 1, ilo - 1 + absb = abs( t( j, j ) ) + if( absb>safmin ) then + signbc = conjg( t( j, j ) / absb ) + t( j, j ) = absb + if( ilschr ) then + call stdlib_zscal( j-1, signbc, t( 1, j ), 1 ) + call stdlib_zscal( j, signbc, h( 1, j ), 1 ) + else + call stdlib_zscal( 1, signbc, h( j, j ), 1 ) + end if + if( ilz )call stdlib_zscal( n, signbc, z( 1, j ), 1 ) + else + t( j, j ) = czero + end if + alpha( j ) = h( j, j ) + beta( j ) = t( j, j ) + end do + ! normal termination + info = 0 + ! exit (other than argument error) -- return optimal workspace size + 210 continue + work( 1 ) = cmplx( n,KIND=dp) + return + end subroutine stdlib_zhgeqz + + !> ZHPCON: estimates the reciprocal of the condition number of a complex + !> Hermitian packed matrix A using the factorization A = U*D*U**H or + !> A = L*D*L**H computed by ZHPTRF. + !> An estimate is obtained for norm(inv(A)), and the reciprocal of the + !> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). + + pure subroutine stdlib_zhpcon( uplo, n, ap, ipiv, anorm, rcond, work, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + real(dp), intent(in) :: anorm + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + complex(dp), intent(in) :: ap(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: i, ip, kase + real(dp) :: ainvnm + ! Local Arrays + integer(ilp) :: isave(3) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( anorm0 .and. ap( ip )==zero )return + ip = ip - i + end do + else + ! lower triangular storage: examine d from top to bottom. + ip = 1 + do i = 1, n + if( ipiv( i )>0 .and. ap( ip )==zero )return + ip = ip + n - i + 1 + end do + end if + ! estimate the 1-norm of the inverse. + kase = 0 + 30 continue + call stdlib_zlacn2( n, work( n+1 ), work, ainvnm, kase, isave ) + if( kase/=0 ) then + ! multiply by inv(l*d*l**h) or inv(u*d*u**h). + call stdlib_zhptrs( uplo, n, 1, ap, ipiv, work, n, info ) + go to 30 + end if + ! compute the estimate of the reciprocal condition number. + if( ainvnm/=zero )rcond = ( one / ainvnm ) / anorm + return + end subroutine stdlib_zhpcon + + !> ZHPEV: computes all the eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix in packed storage. + + subroutine stdlib_zhpev( jobz, uplo, n, ap, w, z, ldz, work, rwork,info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwrk, iscale + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_zdscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_zhptrd to reduce hermitian packed matrix to tridiagonal form. + inde = 1 + indtau = 1 + call stdlib_zhptrd( uplo, n, ap, w, rwork( inde ), work( indtau ),iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_zupgtr to generate the orthogonal matrix, then call stdlib_zsteqr. + if( .not.wantz ) then + call stdlib_dsterf( n, w, rwork( inde ), info ) + else + indwrk = indtau + n + call stdlib_zupgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + indrwk = inde + n + call stdlib_zsteqr( jobz, n, w, rwork( inde ), z, ldz,rwork( indrwk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_zhpev + + !> ZHPEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian matrix A in packed storage. + !> Eigenvalues/vectors can be selected by specifying either a range of + !> values or a range of indices for the desired eigenvalues. + + subroutine stdlib_zhpevx( jobz, range, uplo, n, ap, vl, vu, il, iu,abstol, m, w, z, ldz, & + work, rwork, iwork,ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indtau, indwrk, iscale, itmp1, j, jj, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -3 + else if( n<0 ) then + info = -4 + else + if( valeig ) then + if( n>0 .and. vu<=vl )info = -7 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -8 + else if( iun ) then + info = -9 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=real( ap( 1 ),KIND=dp) ) then + m = 1 + w( 1 ) = real( ap( 1 ),KIND=dp) + end if + end if + if( wantz )z( 1, 1 ) = cone + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + anrm = stdlib_zlanhp( 'M', uplo, n, ap, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_zdscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_zhptrd to reduce hermitian packed matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indtau = 1 + indwrk = indtau + n + call stdlib_zhptrd( uplo, n, ap, rwork( indd ), rwork( inde ),work( indtau ), iinfo ) + + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_dsterf or stdlib_zupgtr and stdlib_zsteqr. if this fails + ! for some eigenvalue, then try stdlib_dstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_dcopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_dcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_dsterf( n, w, rwork( indee ), info ) + else + call stdlib_zupgtr( uplo, n, ap, work( indtau ), z, ldz,work( indwrk ), iinfo ) + + call stdlib_dcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_zsteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 20 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_zstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_zstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_zstein. + indwrk = indtau + n + call stdlib_zupmtr( 'L', uplo, 'N', n, m, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 20 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHPGV: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + !> Here A and B are assumed to be Hermitian, stored in packed format, + !> and B is also positive definite. + + subroutine stdlib_zhpgv( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ap(*), bp(*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: trans + integer(ilp) :: j, neig + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, neig + call stdlib_ztpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_ztpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_zhpgv + + !> ZHPGVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. Eigenvalues and eigenvectors can be selected by + !> specifying either a range of values or a range of indices for the + !> desired eigenvalues. + + subroutine stdlib_zhpgvx( itype, jobz, range, uplo, n, ap, bp, vl, vu,il, iu, abstol, m, w, & + z, ldz, work, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, itype, iu, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ap(*), bp(*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: alleig, indeig, upper, valeig, wantz + character :: trans + integer(ilp) :: j + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -3 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else + if( valeig ) then + if( n>0 .and. vu<=vl ) then + info = -9 + end if + else if( indeig ) then + if( il<1 ) then + info = -10 + else if( iun ) then + info = -11 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz0 )m = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, m + call stdlib_ztpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, m + call stdlib_ztpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + return + end subroutine stdlib_zhpgvx + + !> ZHPRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian indefinite + !> and packed, and provides error bounds and backward error estimates + !> for the solution. + + pure subroutine stdlib_zhprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx,ferr, berr, work,& + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(in) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(in) :: afp(*), ap(*), b(ldb,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ik, j, k, kase, kk, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk + complex(dp) :: zdum + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zhptrs( uplo, n, 1, afp, ipiv, work, n, info ) + call stdlib_zaxpy( n, cone, work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + ! use stdlib_zlacn2 to estimate the infinity-norm of the matrix + ! inv(a) * diag(w), + ! where w = abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + kase = 0 + 100 continue + call stdlib_zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! multiply by diag(w)*inv(a**h). + call stdlib_zhptrs( uplo, n, 1, afp, ipiv, work, n, info ) + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + else if( kase==2 ) then + ! multiply by inv(a)*diag(w). + do i = 1, n + work( i ) = rwork( i )*work( i ) + end do + call stdlib_zhptrs( uplo, n, 1, afp, ipiv, work, n, info ) + end if + go to 100 + end if + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, cabs1( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_140 + return + end subroutine stdlib_zhprfs + + !> ZHPSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix stored in packed format and X + !> and B are N-by-NRHS matrices. + !> The diagonal pivoting method is used to factor A as + !> A = U * D * U**H, if UPLO = 'U', or + !> A = L * D * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, D is Hermitian and block diagonal with 1-by-1 + !> and 2-by-2 diagonal blocks. The factored form of A is then used to + !> solve the system of equations A * X = B. + + pure subroutine stdlib_zhpsv( uplo, n, nrhs, ap, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: ap(*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb ZHPSVX: uses the diagonal pivoting factorization A = U*D*U**H or + !> A = L*D*L**H to compute the solution to a complex system of linear + !> equations A * X = B, where A is an N-by-N Hermitian matrix stored + !> in packed format and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zhpsvx( fact, uplo, n, nrhs, ap, afp, ipiv, b, ldb, x,ldx, rcond, ferr, & + berr, work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + complex(dp), intent(inout) :: afp(*) + complex(dp), intent(in) :: ap(*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlanhp( 'I', uplo, n, ap, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zhpcon( uplo, n, afp, ipiv, anorm, rcond, work, info ) + ! compute the solution vectors x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zhptrs( uplo, n, nrhs, afp, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_zhprfs( uplo, n, nrhs, ap, afp, ipiv, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZHSEIN: uses inverse iteration to find specified right and/or left + !> eigenvectors of a complex upper Hessenberg matrix H. + !> The right eigenvector x and the left eigenvector y of the matrix H + !> corresponding to an eigenvalue w are defined by: + !> H * x = w * x, y**h * H = w * y**h + !> where y**h denotes the conjugate transpose of the vector y. + + subroutine stdlib_zhsein( side, eigsrc, initv, select, n, h, ldh, w, vl,ldvl, vr, ldvr, mm, & + m, work, rwork, ifaill,ifailr, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: eigsrc, initv, side + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldh, ldvl, ldvr, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: ifaill(*), ifailr(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(in) :: h(ldh,*) + complex(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*), w(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + real(dp), parameter :: rzero = 0.0e+0_dp + + + ! Local Scalars + logical(lk) :: bothv, fromqr, leftv, noinit, rightv + integer(ilp) :: i, iinfo, k, kl, kln, kr, ks, ldwork + real(dp) :: eps3, hnorm, smlnum, ulp, unfl + complex(dp) :: cdum, wk + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! decode and test the input parameters. + bothv = stdlib_lsame( side, 'B' ) + rightv = stdlib_lsame( side, 'R' ) .or. bothv + leftv = stdlib_lsame( side, 'L' ) .or. bothv + fromqr = stdlib_lsame( eigsrc, 'Q' ) + noinit = stdlib_lsame( initv, 'N' ) + ! set m to the number of columns required to store the selected + ! eigenvectors. + m = 0 + do k = 1, n + if( select( k ) )m = m + 1 + end do + info = 0 + if( .not.rightv .and. .not.leftv ) then + info = -1 + else if( .not.fromqr .and. .not.stdlib_lsame( eigsrc, 'N' ) ) then + info = -2 + else if( .not.noinit .and. .not.stdlib_lsame( initv, 'U' ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldhkr ) then + do i = k, n - 1 + if( h( i+1, i )==czero )go to 50 + end do + 50 continue + kr = i + end if + end if + if( kl/=kln ) then + kln = kl + ! compute infinity-norm of submatrix h(kl:kr,kl:kr) if it + ! has not ben computed before. + hnorm = stdlib_zlanhs( 'I', kr-kl+1, h( kl, kl ), ldh, rwork ) + if( stdlib_disnan( hnorm ) ) then + info = -6 + return + else if( hnorm>rzero ) then + eps3 = hnorm*ulp + else + eps3 = smlnum + end if + end if + ! perturb eigenvalue if it is close to any previous + ! selected eigenvalues affiliated to the submatrix + ! h(kl:kr,kl:kr). close roots are modified by eps3. + wk = w( k ) + 60 continue + do i = k - 1, kl, -1 + if( select( i ) .and. cabs1( w( i )-wk )0 ) then + info = info + 1 + ifaill( ks ) = k + else + ifaill( ks ) = 0 + end if + do i = 1, kl - 1 + vl( i, ks ) = czero + end do + end if + if( rightv ) then + ! compute right eigenvector. + call stdlib_zlaein( .true., noinit, kr, h, ldh, wk, vr( 1, ks ),work, ldwork, & + rwork, eps3, smlnum, iinfo ) + if( iinfo>0 ) then + info = info + 1 + ifailr( ks ) = k + else + ifailr( ks ) = 0 + end if + do i = kr + 1, n + vr( i, ks ) = czero + end do + end if + ks = ks + 1 + end if + end do loop_100 + return + end subroutine stdlib_zhsein + + !> Using the divide and conquer method, ZLAED0: computes all eigenvalues + !> of a symmetric tridiagonal matrix which is one diagonal block of + !> those from reducing a dense or band Hermitian matrix and + !> corresponding eigenvectors of the dense or band matrix. + + pure subroutine stdlib_zlaed0( qsiz, n, d, e, q, ldq, qstore, ldqs, rwork,iwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldq, ldqs, n, qsiz + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: q(ldq,*) + complex(dp), intent(out) :: qstore(ldqs,*) + ! ===================================================================== + ! warning: n could be as big as qsiz! + + ! Local Scalars + integer(ilp) :: curlvl, curprb, curr, i, igivcl, igivnm, igivpt, indxq, iperm, iprmpt, & + iq, iqptr, iwrem, j, k, lgn, ll, matsiz, msd2, smlsiz, smm1, spm1, spm2, submat, & + subpbs, tlvls + real(dp) :: temp + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max + ! Executable Statements + ! test the input parameters. + info = 0 + ! if( icompq < 0 .or. icompq > 2 ) then + ! info = -1 + ! else if( ( icompq == 1 ) .and. ( qsiz < max( 0, n ) ) ) + ! $ then + if( qsizsmlsiz ) then + do j = subpbs, 1, -1 + iwork( 2*j ) = ( iwork( j )+1 ) / 2 + iwork( 2*j-1 ) = iwork( j ) / 2 + end do + tlvls = tlvls + 1 + subpbs = 2*subpbs + go to 10 + end if + do j = 2, subpbs + iwork( j ) = iwork( j ) + iwork( j-1 ) + end do + ! divide the matrix into subpbs submatrices of size at most smlsiz+1 + ! using rank-1 modifications (cuts). + spm1 = subpbs - 1 + do i = 1, spm1 + submat = iwork( i ) + 1 + smm1 = submat - 1 + d( smm1 ) = d( smm1 ) - abs( e( smm1 ) ) + d( submat ) = d( submat ) - abs( e( smm1 ) ) + end do + indxq = 4*n + 3 + ! set up workspaces for eigenvalues only/accumulate new vectors + ! routine + temp = log( real( n,KIND=dp) ) / log( two ) + lgn = int( temp,KIND=ilp) + if( 2**lgn0 ) then + info = submat*( n+1 ) + submat + matsiz - 1 + return + end if + k = 1 + do j = submat, iwork( i+1 ) + iwork( indxq+j ) = k + k = k + 1 + end do + end do + ! successively merge eigensystems of adjacent submatrices + ! into eigensystem for the corresponding larger matrix. + ! while ( subpbs > 1 ) + curlvl = 1 + 80 continue + if( subpbs>1 ) then + spm2 = subpbs - 2 + do i = 0, spm2, 2 + if( i==0 ) then + submat = 1 + matsiz = iwork( 2 ) + msd2 = iwork( 1 ) + curprb = 0 + else + submat = iwork( i ) + 1 + matsiz = iwork( i+2 ) - iwork( i ) + msd2 = matsiz / 2 + curprb = curprb + 1 + end if + ! merge lower order eigensystems (of size msd2 and matsiz - msd2) + ! into an eigensystem of size matsiz. stdlib_zlaed7 handles the case + ! when the eigenvectors of a full or band hermitian matrix (which + ! was reduced to tridiagonal form) are desired. + ! i am free to use q as a valuable working space until loop 150. + call stdlib_zlaed7( matsiz, msd2, qsiz, tlvls, curlvl, curprb,d( submat ), & + qstore( 1, submat ), ldqs,e( submat+msd2-1 ), iwork( indxq+submat ),rwork( iq ), & + iwork( iqptr ), iwork( iprmpt ),iwork( iperm ), iwork( igivpt ),iwork( igivcl ), & + rwork( igivnm ),q( 1, submat ), rwork( iwrem ),iwork( subpbs+1 ), info ) + if( info>0 ) then + info = submat*( n+1 ) + submat + matsiz - 1 + return + end if + iwork( i / 2+1 ) = iwork( i+2 ) + end do + subpbs = subpbs / 2 + curlvl = curlvl + 1 + go to 80 + end if + ! end while + ! re-merge the eigenvalues/vectors which were deflated at the final + ! merge step. + do i = 1, n + j = iwork( indxq+i ) + rwork( i ) = d( j ) + call stdlib_zcopy( qsiz, qstore( 1, j ), 1, q( 1, i ), 1 ) + end do + call stdlib_dcopy( n, rwork, 1, d, 1 ) + return + end subroutine stdlib_zlaed0 + + !> ZLAMSWLQ: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product of blocked + !> elementary reflectors computed by short wide LQ + !> factorization (ZLASWLQ) + + pure subroutine stdlib_zlamswlq( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), t(ldt,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * mb + else + lw = m * mb + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( k<0 ) then + info = -5 + else if( m=max(m,n,k))) then + call stdlib_zgemlqt( side, trans, m, n, k, mb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.tran) then + ! multiply q to the last block of c + kk = mod((m-k),(nb-k)) + ctr = (m-k)/(nb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_ztpmlqt('L','C',kk , n, k, 0, mb, a(1,ii), lda,t(1,ctr*k+1), ldt, c(& + 1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+nb) + ctr = ctr - 1 + call stdlib_ztpmlqt('L','C',nb-k , n, k, 0,mb, a(1,i), lda,t(1,ctr*k+1),ldt, c(1,& + 1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:nb) + call stdlib_zgemlqt('L','C',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.notran) then + ! multiply q to the first block of c + kk = mod((m-k),(nb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_zgemlqt('L','N',nb , n, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (i:i+nb,1:n) + call stdlib_ztpmlqt('L','N',nb-k , n, k, 0,mb, a(1,i), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_ztpmlqt('L','N',kk , n, k, 0, mb, a(1,ii), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.notran) then + ! multiply q to the last block of c + kk = mod((n-k),(nb-k)) + ctr = (n-k)/(nb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_ztpmlqt('R','N',m , kk, k, 0, mb, a(1, ii), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(nb-k),nb+1,-(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_ztpmlqt('R','N', m, nb-k, k, 0, mb, a(1, i), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_zgemlqt('R','N',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.tran) then + ! multiply q to the first block of c + kk = mod((n-k),(nb-k)) + ii=n-kk+1 + call stdlib_zgemlqt('R','C',m , nb, k, mb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + ctr = 1 + do i=nb+1,ii-nb+k,(nb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_ztpmlqt('R','C',m , nb-k, k, 0,mb, a(1,i), lda,t(1,ctr *k+1), ldt, c(1,& + 1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_ztpmlqt('R','C',m , kk, k, 0,mb, a(1,ii), lda,t(1, ctr * k + 1),ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_zlamswlq + + !> ZLAMTSQR: overwrites the general complex M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (ZLATSQR) + + pure subroutine stdlib_zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, & + lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), t(ldt,*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: c(ldc,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: i, ii, kk, lw, ctr, q + ! External Subroutines + ! Executable Statements + ! test the input arguments + lquery = lwork<0 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + if (left) then + lw = n * nb + q = m + else + lw = m * nb + q = n + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m=max(m,n,k))) then + call stdlib_zgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info) + + return + end if + if(left.and.notran) then + ! multiply q to the last block of c + kk = mod((m-k),(mb-k)) + ctr = (m-k)/(mb-k) + if (kk>0) then + ii=m-kk+1 + call stdlib_ztpmqrt('L','N',kk , n, k, 0, nb, a(ii,1), lda,t(1, ctr * k + 1),ldt ,& + c(1,1), ldc,c(ii,1), ldc, work, info ) + else + ii=m+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + ctr = ctr - 1 + call stdlib_ztpmqrt('L','N',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, & + c(1,1), ldc,c(i,1), ldc, work, info ) + end do + ! multiply q to the first block of c (1:mb,1:n) + call stdlib_zgemqrt('L','N',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (left.and.tran) then + ! multiply q to the first block of c + kk = mod((m-k),(mb-k)) + ii=m-kk+1 + ctr = 1 + call stdlib_zgemqrt('L','C',mb , n, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (i:i+mb,1:n) + call stdlib_ztpmqrt('L','C',mb-k , n, k, 0,nb, a(i,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(i,1), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=m) then + ! multiply q to the last block of c + call stdlib_ztpmqrt('L','C',kk , n, k, 0,nb, a(ii,1), lda,t(1, ctr * k + 1), ldt, & + c(1,1), ldc,c(ii,1), ldc, work, info ) + end if + else if(right.and.tran) then + ! multiply q to the last block of c + kk = mod((n-k),(mb-k)) + ctr = (n-k)/(mb-k) + if (kk>0) then + ii=n-kk+1 + call stdlib_ztpmqrt('R','C',m , kk, k, 0, nb, a(ii,1), lda,t(1,ctr * k + 1), ldt,& + c(1,1), ldc,c(1,ii), ldc, work, info ) + else + ii=n+1 + end if + do i=ii-(mb-k),mb+1,-(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + ctr = ctr - 1 + call stdlib_ztpmqrt('R','C',m , mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1), & + ldt, c(1,1), ldc,c(1,i), ldc, work, info ) + end do + ! multiply q to the first block of c (1:m,1:mb) + call stdlib_zgemqrt('R','C',m , mb, k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + else if (right.and.notran) then + ! multiply q to the first block of c + kk = mod((n-k),(mb-k)) + ii=n-kk+1 + ctr = 1 + call stdlib_zgemqrt('R','N', m, mb , k, nb, a(1,1), lda, t,ldt ,c(1,1), ldc, work, & + info ) + do i=mb+1,ii-mb+k,(mb-k) + ! multiply q to the current block of c (1:m,i:i+mb) + call stdlib_ztpmqrt('R','N', m, mb-k, k, 0,nb, a(i,1), lda,t(1, ctr * k + 1),ldt, & + c(1,1), ldc,c(1,i), ldc, work, info ) + ctr = ctr + 1 + end do + if(ii<=n) then + ! multiply q to the last block of c + call stdlib_ztpmqrt('R','N', m, kk , k, 0,nb, a(ii,1), lda,t(1,ctr * k + 1),ldt, c(& + 1,1), ldc,c(1,ii), ldc, work, info ) + end if + end if + work(1) = lw + return + end subroutine stdlib_zlamtsqr + + !> ZLAQR2: is identical to ZLAQR3 except that it avoids + !> recursion by calling ZLAHQR instead of ZLAQR4. + !> Aggressive early deflation: + !> ZLAQR2 accepts as input an upper Hessenberg matrix + !> H and performs an unitary similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an unitary similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + pure subroutine stdlib_zlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(dp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(dp), parameter :: rzero = 0.0_dp + real(dp), parameter :: rone = 1.0_dp + + + ! Local Scalars + complex(dp) :: beta, cdum, s, tau + real(dp) :: foo, safmax, safmin, smlnum, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & + lwk1, lwk2, lwkopt + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,int,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_zgehrd ==== + call stdlib_zgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_zunmhr ==== + call stdlib_zunmhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = jw + max( lwk1, lwk2 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = cone + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = czero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sh( kwtop ) = h( kwtop, kwtop ) + ns = 1 + nd = 0 + if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero + end if + work( 1 ) = cone + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_zlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_zcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_zlaset( 'A', jw, jw, czero, cone, v, ldv ) + call stdlib_zlahqr( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + infqr ) + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + do knt = infqr + 1, jw + ! ==== small spike tip deflation test ==== + foo = cabs1( t( ns, ns ) ) + if( foo==rzero )foo = cabs1( s ) + if( cabs1( s )*cabs1( v( 1, ns ) )<=max( smlnum, ulp*foo ) )then + ! ==== cone more converged eigenvalue ==== + ns = ns - 1 + else + ! ==== cone undeflatable eigenvalue. move it up out of the + ! . way. (stdlib_ztrexc can not fail in this case.) ==== + ifst = ns + call stdlib_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + ilst = ilst + 1 + end if + end do + ! ==== return to hessenberg form ==== + if( ns==0 )s = czero + if( nscabs1( t( ifst, ifst ) ) )ifst = j + end do + ilst = i + if( ifst/=ilst )call stdlib_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + + end do + end if + ! ==== restore shift/eigenvalue array from t ==== + do i = infqr + 1, jw + sh( kwtop+i-1 ) = t( i, i ) + end do + if( ns1 .and. s/=czero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_zcopy( ns, v, ldv, work, 1 ) + do i = 1, ns + work( i ) = conjg( work( i ) ) + end do + beta = work( 1 ) + call stdlib_zlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = cone + call stdlib_zlaset( 'L', jw-2, jw-2, czero, czero, t( 3, 1 ), ldt ) + call stdlib_zlarf( 'L', ns, jw, work, 1, conjg( tau ), t, ldt,work( jw+1 ) ) + + call stdlib_zlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_zlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_zgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*conjg( v( 1, 1 ) ) + call stdlib_zlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_zcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=czero )call stdlib_zunmhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_zgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & + czero, wv, ldwv ) + call stdlib_zlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_zgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & + czero, t, ldt ) + call stdlib_zlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_zgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & + czero, wv, ldwv ) + call stdlib_zlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + end subroutine stdlib_zlaqr2 + + !> ZLASWLQ: computes a blocked Tall-Skinny LQ factorization of + !> a complexx M-by-N matrix A for M <= N: + !> A = ( L 0 ) * Q, + !> where: + !> Q is a n-by-N orthogonal matrix, stored on exit in an implicit + !> form in the elements above the diagonal of the array A and in + !> the elements of the array T; + !> L is a lower-triangular M-by-M matrix stored on exit in + !> the elements on and below the diagonal of the array A. + !> 0 is a M-by-(N-M) zero matrix, if M < N, and is not stored. + + pure subroutine stdlib_zlaswlq( m, n, mb, nb, a, lda, t, ldt, work, lwork,info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, lwork, ldt + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. nm .and. m>0 )) then + info = -3 + else if( nb<=0 ) then + info = -4 + else if( lda=n).or.(nb<=m).or.(nb>=n)) then + call stdlib_zgelqt( m, n, mb, a, lda, t, ldt, work, info) + return + end if + kk = mod((n-m),(nb-m)) + ii=n-kk+1 + ! compute the lq factorization of the first block a(1:m,1:nb) + call stdlib_zgelqt( m, nb, mb, a(1,1), lda, t, ldt, work, info) + ctr = 1 + do i = nb+1, ii-nb+m , (nb-m) + ! compute the qr factorization of the current block a(1:m,i:i+nb-m) + call stdlib_ztplqt( m, nb-m, 0, mb, a(1,1), lda, a( 1, i ),lda, t(1, ctr * m + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(1:m,ii:n) + if (ii<=n) then + call stdlib_ztplqt( m, kk, 0, mb, a(1,1), lda, a( 1, ii ),lda, t(1, ctr * m + 1), & + ldt,work, info ) + end if + work( 1 ) = m * mb + return + end subroutine stdlib_zlaswlq + + !> ZLATSQR: computes a blocked Tall-Skinny QR factorization of + !> a complex M-by-N matrix A for M >= N: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix, stored on exit in an implicit + !> form in the elements below the diagonal of the array A and in + !> the elements of the array T; + !> R is an upper-triangular N-by-N matrix, stored on exit in + !> the elements on and above the diagonal of the array A. + !> 0 is a (M-N)-by-N zero matrix, and is not stored. + + pure subroutine stdlib_zlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, mb, nb, ldt, lwork + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*), t(ldt,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, ii, kk, ctr + ! External Subroutines + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 .or. mn .and. n>0 )) then + info = -4 + else if( lda=m)) then + call stdlib_zgeqrt( m, n, nb, a, lda, t, ldt, work, info) + return + end if + kk = mod((m-n),(mb-n)) + ii=m-kk+1 + ! compute the qr factorization of the first block a(1:mb,1:n) + call stdlib_zgeqrt( mb, n, nb, a(1,1), lda, t, ldt, work, info ) + ctr = 1 + do i = mb+1, ii-mb+n , (mb-n) + ! compute the qr factorization of the current block a(i:i+mb-n,1:n) + call stdlib_ztpqrt( mb-n, n, 0, nb, a(1,1), lda, a( i, 1 ), lda,t(1, ctr * n + 1),& + ldt, work, info ) + ctr = ctr + 1 + end do + ! compute the qr factorization of the last block a(ii:m,1:n) + if (ii<=m) then + call stdlib_ztpqrt( kk, n, 0, nb, a(1,1), lda, a( ii, 1 ), lda,t(1,ctr * n + 1), & + ldt,work, info ) + end if + work( 1 ) = n*nb + return + end subroutine stdlib_zlatsqr + + !> ZPBSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular band matrix, and L is a lower + !> triangular band matrix, with the same number of superdiagonals or + !> subdiagonals as A. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_zpbsv( uplo, n, kd, nrhs, ab, ldab, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldb, n, nrhs + ! Array Arguments + complex(dp), intent(inout) :: ab(ldab,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( kd<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( ldab ZPBSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite band matrix and X + !> and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zpbsvx( fact, uplo, n, kd, nrhs, ab, ldab, afb, ldafb,equed, s, b, ldb, x, & + ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldafb, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(inout) :: s(*) + complex(dp), intent(inout) :: ab(ldab,*), afb(ldafb,*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ, upper + integer(ilp) :: i, infequ, j, j1, j2 + real(dp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + upper = stdlib_lsame( uplo, 'U' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( nrhs<0 ) then + info = -5 + else if( ldab0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlanhb( '1', uplo, n, kd, ab, ldab, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zpbcon( uplo, n, kd, afb, ldafb, anorm, rcond, work, rwork,info ) + ! compute the solution matrix x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zpbtrs( uplo, n, kd, nrhs, afb, ldafb, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_zpbrfs( uplo, n, kd, nrhs, ab, ldab, afb, ldafb, b, ldb, x,ldx, ferr, berr,& + work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPFTRF: computes the Cholesky factorization of a complex Hermitian + !> positive definite matrix A. + !> The factorization has the form + !> A = U**H * U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is lower triangular. + !> This is the block version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zpftrf( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(in) :: n + integer(ilp), intent(out) :: info + ! Array Arguments + complex(dp), intent(inout) :: a(0:*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPFTRF', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution: there are eight cases + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_zpotrf( 'L', n1, a( 0 ), n, info ) + if( info>0 )return + call stdlib_ztrsm( 'R', 'L', 'C', 'N', n2, n1, cone, a( 0 ), n,a( n1 ), n ) + + call stdlib_zherk( 'U', 'N', n2, n1, -one, a( n1 ), n, one,a( n ), n ) + call stdlib_zpotrf( 'U', n2, a( n ), n, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_zpotrf( 'L', n1, a( n2 ), n, info ) + if( info>0 )return + call stdlib_ztrsm( 'L', 'L', 'N', 'N', n1, n2, cone, a( n2 ), n,a( 0 ), n ) + + call stdlib_zherk( 'U', 'C', n2, n1, -one, a( 0 ), n, one,a( n1 ), n ) + call stdlib_zpotrf( 'U', n2, a( n1 ), n, info ) + if( info>0 )info = info + n1 + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is odd + ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1) + ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1 + call stdlib_zpotrf( 'U', n1, a( 0 ), n1, info ) + if( info>0 )return + call stdlib_ztrsm( 'L', 'U', 'C', 'N', n1, n2, cone, a( 0 ), n1,a( n1*n1 ), & + n1 ) + call stdlib_zherk( 'L', 'C', n2, n1, -one, a( n1*n1 ), n1, one,a( 1 ), n1 ) + + call stdlib_zpotrf( 'L', n2, a( 1 ), n1, info ) + if( info>0 )info = info + n1 + else + ! srpa for upper, transpose and n is odd + ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0) + ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2 + call stdlib_zpotrf( 'U', n1, a( n2*n2 ), n2, info ) + if( info>0 )return + call stdlib_ztrsm( 'R', 'U', 'N', 'N', n2, n1, cone, a( n2*n2 ),n2, a( 0 ), & + n2 ) + call stdlib_zherk( 'L', 'N', n2, n1, -one, a( 0 ), n2, one,a( n1*n2 ), n2 ) + + call stdlib_zpotrf( 'L', n2, a( n1*n2 ), n2, info ) + if( info>0 )info = info + n1 + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_zpotrf( 'L', k, a( 1 ), n+1, info ) + if( info>0 )return + call stdlib_ztrsm( 'R', 'L', 'C', 'N', k, k, cone, a( 1 ), n+1,a( k+1 ), n+1 ) + + call stdlib_zherk( 'U', 'N', k, k, -one, a( k+1 ), n+1, one,a( 0 ), n+1 ) + + call stdlib_zpotrf( 'U', k, a( 0 ), n+1, info ) + if( info>0 )info = info + k + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_zpotrf( 'L', k, a( k+1 ), n+1, info ) + if( info>0 )return + call stdlib_ztrsm( 'L', 'L', 'N', 'N', k, k, cone, a( k+1 ),n+1, a( 0 ), n+1 ) + + call stdlib_zherk( 'U', 'C', k, k, -one, a( 0 ), n+1, one,a( k ), n+1 ) + + call stdlib_zpotrf( 'U', k, a( k ), n+1, info ) + if( info>0 )info = info + k + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1) + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_zpotrf( 'U', k, a( 0+k ), k, info ) + if( info>0 )return + call stdlib_ztrsm( 'L', 'U', 'C', 'N', k, k, cone, a( k ), n1,a( k*( k+1 ) ), & + k ) + call stdlib_zherk( 'L', 'C', k, k, -one, a( k*( k+1 ) ), k, one,a( 0 ), k ) + + call stdlib_zpotrf( 'L', k, a( 0 ), k, info ) + if( info>0 )info = info + k + else + ! srpa for upper, transpose and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0) + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_zpotrf( 'U', k, a( k*( k+1 ) ), k, info ) + if( info>0 )return + call stdlib_ztrsm( 'R', 'U', 'N', 'N', k, k, cone,a( k*( k+1 ) ), k, a( 0 ), & + k ) + call stdlib_zherk( 'L', 'N', k, k, -one, a( 0 ), k, one,a( k*k ), k ) + call stdlib_zpotrf( 'L', k, a( k*k ), k, info ) + if( info>0 )info = info + k + end if + end if + end if + return + end subroutine stdlib_zpftrf + + !> ZPFTRI: computes the inverse of a complex Hermitian positive definite + !> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + !> computed by ZPFTRF. + + pure subroutine stdlib_zpftri( transr, uplo, n, a, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: transr, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: n + ! Array Arguments + complex(dp), intent(inout) :: a(0:*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, nisodd, normaltransr + integer(ilp) :: n1, n2, k + ! Intrinsic Functions + intrinsic :: mod + ! Executable Statements + ! test the input parameters. + info = 0 + normaltransr = stdlib_lsame( transr, 'N' ) + lower = stdlib_lsame( uplo, 'L' ) + if( .not.normaltransr .and. .not.stdlib_lsame( transr, 'C' ) ) then + info = -1 + else if( .not.lower .and. .not.stdlib_lsame( uplo, 'U' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZPFTRI', -info ) + return + end if + ! quick return if possible + if( n==0 )return + ! invert the triangular cholesky factor u or l. + call stdlib_ztftri( transr, uplo, 'N', n, a, info ) + if( info>0 )return + ! if n is odd, set nisodd = .true. + ! if n is even, set k = n/2 and nisodd = .false. + if( mod( n, 2 )==0 ) then + k = n / 2 + nisodd = .false. + else + nisodd = .true. + end if + ! set n1 and n2 depending on lower + if( lower ) then + n2 = n / 2 + n1 = n - n2 + else + n1 = n / 2 + n2 = n - n1 + end if + ! start execution of triangular matrix multiply: inv(u)*inv(u)^c or + ! inv(l)^c*inv(l). there are eight cases. + if( nisodd ) then + ! n is odd + if( normaltransr ) then + ! n is odd and transr = 'n' + if( lower ) then + ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) ) + ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0) + ! t1 -> a(0), t2 -> a(n), s -> a(n1) + call stdlib_zlauum( 'L', n1, a( 0 ), n, info ) + call stdlib_zherk( 'L', 'C', n1, n2, one, a( n1 ), n, one,a( 0 ), n ) + call stdlib_ztrmm( 'L', 'U', 'N', 'N', n2, n1, cone, a( n ), n,a( n1 ), n ) + + call stdlib_zlauum( 'U', n2, a( n ), n, info ) + else + ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1) + ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0) + ! t1 -> a(n2), t2 -> a(n1), s -> a(0) + call stdlib_zlauum( 'L', n1, a( n2 ), n, info ) + call stdlib_zherk( 'L', 'N', n1, n2, one, a( 0 ), n, one,a( n2 ), n ) + call stdlib_ztrmm( 'R', 'U', 'C', 'N', n1, n2, cone, a( n1 ), n,a( 0 ), n ) + + call stdlib_zlauum( 'U', n2, a( n1 ), n, info ) + end if + else + ! n is odd and transr = 'c' + if( lower ) then + ! srpa for lower, transpose, and n is odd + ! t1 -> a(0), t2 -> a(1), s -> a(0+n1*n1) + call stdlib_zlauum( 'U', n1, a( 0 ), n1, info ) + call stdlib_zherk( 'U', 'N', n1, n2, one, a( n1*n1 ), n1, one,a( 0 ), n1 ) + + call stdlib_ztrmm( 'R', 'L', 'N', 'N', n1, n2, cone, a( 1 ), n1,a( n1*n1 ), & + n1 ) + call stdlib_zlauum( 'L', n2, a( 1 ), n1, info ) + else + ! srpa for upper, transpose, and n is odd + ! t1 -> a(0+n2*n2), t2 -> a(0+n1*n2), s -> a(0) + call stdlib_zlauum( 'U', n1, a( n2*n2 ), n2, info ) + call stdlib_zherk( 'U', 'C', n1, n2, one, a( 0 ), n2, one,a( n2*n2 ), n2 ) + + call stdlib_ztrmm( 'L', 'L', 'C', 'N', n2, n1, cone, a( n1*n2 ),n2, a( 0 ), & + n2 ) + call stdlib_zlauum( 'L', n2, a( n1*n2 ), n2, info ) + end if + end if + else + ! n is even + if( normaltransr ) then + ! n is even and transr = 'n' + if( lower ) then + ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0) + ! t1 -> a(1), t2 -> a(0), s -> a(k+1) + call stdlib_zlauum( 'L', k, a( 1 ), n+1, info ) + call stdlib_zherk( 'L', 'C', k, k, one, a( k+1 ), n+1, one,a( 1 ), n+1 ) + + call stdlib_ztrmm( 'L', 'U', 'N', 'N', k, k, cone, a( 0 ), n+1,a( k+1 ), n+1 ) + + call stdlib_zlauum( 'U', k, a( 0 ), n+1, info ) + else + ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) ) + ! t1 -> a(k+1,0) , t2 -> a(k,0), s -> a(0,0) + ! t1 -> a(k+1), t2 -> a(k), s -> a(0) + call stdlib_zlauum( 'L', k, a( k+1 ), n+1, info ) + call stdlib_zherk( 'L', 'N', k, k, one, a( 0 ), n+1, one,a( k+1 ), n+1 ) + + call stdlib_ztrmm( 'R', 'U', 'C', 'N', k, k, cone, a( k ), n+1,a( 0 ), n+1 ) + + call stdlib_zlauum( 'U', k, a( k ), n+1, info ) + end if + else + ! n is even and transr = 'c' + if( lower ) then + ! srpa for lower, transpose, and n is even (see paper) + ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1), + ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k + call stdlib_zlauum( 'U', k, a( k ), k, info ) + call stdlib_zherk( 'U', 'N', k, k, one, a( k*( k+1 ) ), k, one,a( k ), k ) + + call stdlib_ztrmm( 'R', 'L', 'N', 'N', k, k, cone, a( 0 ), k,a( k*( k+1 ) ), & + k ) + call stdlib_zlauum( 'L', k, a( 0 ), k, info ) + else + ! srpa for upper, transpose, and n is even (see paper) + ! t1 -> b(0,k+1), t2 -> b(0,k), s -> b(0,0), + ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k + call stdlib_zlauum( 'U', k, a( k*( k+1 ) ), k, info ) + call stdlib_zherk( 'U', 'C', k, k, one, a( 0 ), k, one,a( k*( k+1 ) ), k ) + + call stdlib_ztrmm( 'L', 'L', 'C', 'N', k, k, cone, a( k*k ), k,a( 0 ), k ) + + call stdlib_zlauum( 'L', k, a( k*k ), k, info ) + end if + end if + end if + return + end subroutine stdlib_zpftri + + !> ZPOSV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> The Cholesky decomposition is used to factor A as + !> A = U**H* U, if UPLO = 'U', or + !> A = L * L**H, if UPLO = 'L', + !> where U is an upper triangular matrix and L is a lower triangular + !> matrix. The factored form of A is then used to solve the system of + !> equations A * X = B. + + pure subroutine stdlib_zposv( uplo, n, nrhs, a, lda, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZPOSVX: uses the Cholesky factorization A = U**H*U or A = L*L**H to + !> compute the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian positive definite matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zposvx( fact, uplo, n, nrhs, a, lda, af, ldaf, equed,s, b, ldb, x, ldx, & + rcond, ferr, berr, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(inout) :: s(*) + complex(dp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: equil, nofact, rcequ + integer(ilp) :: i, infequ, j + real(dp) :: amax, anorm, bignum, scond, smax, smin, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + if( nofact .or. equil ) then + equed = 'N' + rcequ = .false. + else + rcequ = stdlib_lsame( equed, 'Y' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + scond = max( smin, smlnum ) / min( smax, bignum ) + else + scond = one + end if + end if + if( info==0 ) then + if( ldb0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlanhe( '1', uplo, n, a, lda, rwork ) + ! compute the reciprocal of the condition number of a. + call stdlib_zpocon( uplo, n, af, ldaf, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zpotrs( uplo, n, nrhs, af, ldaf, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_zporfs( uplo, n, nrhs, a, lda, af, ldaf, b, ldb, x, ldx,ferr, berr, work, & + rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( rcequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = s( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / scond + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZPTRFS: improves the computed solution to a system of linear + !> equations when the coefficient matrix is Hermitian positive definite + !> and tridiagonal, and provides error bounds and backward error + !> estimates for the solution. + + pure subroutine stdlib_zptrfs( uplo, n, nrhs, d, e, df, ef, b, ldb, x, ldx,ferr, berr, work, & + rwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(in) :: d(*), df(*) + complex(dp), intent(in) :: b(ldb,*), e(*), ef(*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: x(ldx,*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: itmax = 5 + + + + + + ! Local Scalars + logical(lk) :: upper + integer(ilp) :: count, i, ix, j, nz + real(dp) :: eps, lstres, s, safe1, safe2, safmin + complex(dp) :: bi, cx, dx, ex, zdum + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,max + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldbsafe2 ) then + s = max( s, cabs1( work( i ) ) / rwork( i ) ) + else + s = max( s, ( cabs1( work( i ) )+safe1 ) /( rwork( i )+safe1 ) ) + end if + end do + berr( j ) = s + ! test stopping criterion. continue iterating if + ! 1) the residual berr(j) is larger than machine epsilon, and + ! 2) berr(j) decreased by at least a factor of 2 during the + ! last iteration, and + ! 3) at most itmax iterations tried. + if( berr( j )>eps .and. two*berr( j )<=lstres .and.count<=itmax ) then + ! update solution and try again. + call stdlib_zpttrs( uplo, n, 1, df, ef, work, n, info ) + call stdlib_zaxpy( n, cmplx( one,KIND=dp), work, 1, x( 1, j ), 1 ) + lstres = berr( j ) + count = count + 1 + go to 20 + end if + ! bound error from formula + ! norm(x - xtrue) / norm(x) .le. ferr = + ! norm( abs(inv(a))* + ! ( abs(r) + nz*eps*( abs(a)*abs(x)+abs(b) ))) / norm(x) + ! where + ! norm(z) is the magnitude of the largest component of z + ! inv(a) is the inverse of a + ! abs(z) is the componentwise absolute value of the matrix or + ! vector z + ! nz is the maximum number of nonzeros in any row of a, plus 1 + ! eps is machine epsilon + ! the i-th component of abs(r)+nz*eps*(abs(a)*abs(x)+abs(b)) + ! is incremented by safe1 if the i-th component of + ! abs(a)*abs(x) + abs(b) is less than safe2. + do i = 1, n + if( rwork( i )>safe2 ) then + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) + else + rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +safe1 + end if + end do + ix = stdlib_idamax( n, rwork, 1 ) + ferr( j ) = rwork( ix ) + ! estimate the norm of inv(a). + ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by + ! m(i,j) = abs(a(i,j)), i = j, + ! m(i,j) = -abs(a(i,j)), i .ne. j, + ! and e = [ 1, 1, ..., 1 ]**t. note m(a) = m(l)*d*m(l)**h. + ! solve m(l) * x = e. + rwork( 1 ) = one + do i = 2, n + rwork( i ) = one + rwork( i-1 )*abs( ef( i-1 ) ) + end do + ! solve d * m(l)**h * x = b. + rwork( n ) = rwork( n ) / df( n ) + do i = n - 1, 1, -1 + rwork( i ) = rwork( i ) / df( i ) +rwork( i+1 )*abs( ef( i ) ) + end do + ! compute norm(inv(a)) = max(x(i)), 1<=i<=n. + ix = stdlib_idamax( n, rwork, 1 ) + ferr( j ) = ferr( j )*abs( rwork( ix ) ) + ! normalize error. + lstres = zero + do i = 1, n + lstres = max( lstres, abs( x( i, j ) ) ) + end do + if( lstres/=zero )ferr( j ) = ferr( j ) / lstres + end do loop_100 + return + end subroutine stdlib_zptrfs + + !> ZPTSV: computes the solution to a complex system of linear equations + !> A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal + !> matrix, and X and B are N-by-NRHS matrices. + !> A is factored as A = L*D*L**H, and the factored form of A is then + !> used to solve the system of equations. + + pure subroutine stdlib_zptsv( n, nrhs, d, e, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, n, nrhs + ! Array Arguments + real(dp), intent(inout) :: d(*) + complex(dp), intent(inout) :: b(ldb,*), e(*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldb ZPTSVX: uses the factorization A = L*D*L**H to compute the solution + !> to a complex system of linear equations A*X = B, where A is an + !> N-by-N Hermitian positive definite tridiagonal matrix and X and B + !> are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + pure subroutine stdlib_zptsvx( fact, n, nrhs, d, e, df, ef, b, ldb, x, ldx,rcond, ferr, berr,& + work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: fact + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(in) :: d(*) + real(dp), intent(inout) :: df(*) + complex(dp), intent(in) :: b(ldb,*), e(*) + complex(dp), intent(inout) :: ef(*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: nofact + real(dp) :: anorm + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + if( .not.nofact .and. .not.stdlib_lsame( fact, 'F' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldb1 )call stdlib_zcopy( n-1, e, 1, ef, 1 ) + call stdlib_zpttrf( n, df, ef, info ) + ! return if info is non-zero. + if( info>0 )then + rcond = zero + return + end if + end if + ! compute the norm of the matrix a. + anorm = stdlib_zlanht( '1', n, d, e ) + ! compute the reciprocal of the condition number of a. + call stdlib_zptcon( n, df, ef, anorm, rcond, rwork, info ) + ! compute the solution vectors x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zpttrs( 'LOWER', n, nrhs, df, ef, x, ldx, info ) + ! use iterative refinement to improve the computed solutions and + ! compute error bounds and backward error estimates for them. + call stdlib_zptrfs( 'LOWER', n, nrhs, d, e, df, ef, b, ldb, x, ldx, ferr,berr, work, & + rwork, info ) + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZSTEDC: computes all eigenvalues and, optionally, eigenvectors of a + !> symmetric tridiagonal matrix using the divide and conquer method. + !> The eigenvectors of a full or band complex Hermitian matrix can also + !> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + !> matrix to tridiagonal form. + !> This code makes very mild assumptions about floating point + !> arithmetic. It will work on machines with a guard digit in + !> add/subtract, or on those binary machines without guard digits + !> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + !> It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. See DLAED3 for details. + + pure subroutine stdlib_zstedc( compz, n, d, e, z, ldz, work, lwork, rwork,lrwork, iwork, & + liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(out) :: work(*) + complex(dp), intent(inout) :: z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: finish, i, icompz, ii, j, k, lgn, liwmin, ll, lrwmin, lwmin, m, smlsiz,& + start + real(dp) :: eps, orgnrm, p, tiny + ! Intrinsic Functions + intrinsic :: abs,real,int,log,max,mod,sqrt + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + if( stdlib_lsame( compz, 'N' ) ) then + icompz = 0 + else if( stdlib_lsame( compz, 'V' ) ) then + icompz = 1 + else if( stdlib_lsame( compz, 'I' ) ) then + icompz = 2 + else + icompz = -1 + end if + if( icompz<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( ( ldz<1 ) .or.( icompz>0 .and. ldztiny ) then + finish = finish + 1 + go to 40 + end if + end if + ! (sub) problem determined. compute its size and solve it. + m = finish - start + 1 + if( m>smlsiz ) then + ! scale. + orgnrm = stdlib_dlanst( 'M', m, d( start ), e( start ) ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, m, 1, d( start ), m,info ) + call stdlib_dlascl( 'G', 0, 0, orgnrm, one, m-1, 1, e( start ),m-1, info ) + + call stdlib_zlaed0( n, m, d( start ), e( start ), z( 1, start ),ldz, work, n, & + rwork, iwork, info ) + if( info>0 ) then + info = ( info / ( m+1 )+start-1 )*( n+1 ) +mod( info, ( m+1 ) ) + start - & + 1 + go to 70 + end if + ! scale back. + call stdlib_dlascl( 'G', 0, 0, one, orgnrm, m, 1, d( start ), m,info ) + else + call stdlib_dsteqr( 'I', m, d( start ), e( start ), rwork, m,rwork( m*m+1 ), & + info ) + call stdlib_zlacrm( n, m, z( 1, start ), ldz, rwork, m, work, n,rwork( m*m+1 )& + ) + call stdlib_zlacpy( 'A', n, m, work, n, z( 1, start ), ldz ) + if( info>0 ) then + info = start*( n+1 ) + finish + go to 70 + end if + end if + start = finish + 1 + go to 30 + end if + ! endwhile + ! use selection sort to minimize swaps of eigenvectors + do ii = 2, n + i = ii - 1 + k = i + p = d( i ) + do j = ii, n + if( d( j )

ZSTEGR: computes selected eigenvalues and, optionally, eigenvectors + !> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has + !> a well defined set of pairwise different real eigenvalues, the corresponding + !> real eigenvectors are pairwise orthogonal. + !> The spectrum may be computed either completely or partially by specifying + !> either an interval (VL,VU] or a range of indices IL:IU for the desired + !> eigenvalues. + !> ZSTEGR is a compatibility wrapper around the improved ZSTEMR routine. + !> See ZSTEMR for further details. + !> One important change is that the ABSTOL parameter no longer provides any + !> benefit and hence is no longer used. + !> Note : ZSTEGR and ZSTEMR work only on machines which follow + !> IEEE-754 floating-point standard in their handling of infinities and + !> NaNs. Normal execution may create these exceptiona values and hence + !> may abort due to a floating point exception in environments which + !> do not conform to the IEEE-754 standard. + + pure subroutine stdlib_zstegr( jobz, range, n, d, e, vl, vu, il, iu,abstol, m, w, z, ldz, & + isuppz, work, lwork, iwork,liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range + integer(ilp), intent(in) :: il, iu, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: isuppz(*), iwork(*) + real(dp), intent(inout) :: d(*), e(*) + real(dp), intent(out) :: w(*), work(*) + complex(dp), intent(out) :: z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: tryrac + ! Executable Statements + info = 0 + tryrac = .false. + call stdlib_zstemr( jobz, range, n, d, e, vl, vu, il, iu,m, w, z, ldz, n, isuppz, & + tryrac, work, lwork,iwork, liwork, info ) + end subroutine stdlib_zstegr + + !> ZTGSEN: reorders the generalized Schur decomposition of a complex + !> matrix pair (A, B) (in terms of an unitary equivalence trans- + !> formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues + !> appears in the leading diagonal blocks of the pair (A,B). The leading + !> columns of Q and Z form unitary bases of the corresponding left and + !> right eigenspaces (deflating subspaces). (A, B) must be in + !> generalized Schur canonical form, that is, A and B are both upper + !> triangular. + !> ZTGSEN also computes the generalized eigenvalues + !> w(j)= ALPHA(j) / BETA(j) + !> of the reordered matrix pair (A, B). + !> Optionally, the routine computes estimates of reciprocal condition + !> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), + !> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) + !> between the matrix pairs (A11, B11) and (A22,B22) that correspond to + !> the selected cluster and the eigenvalues outside the cluster, resp., + !> and norms of "projections" onto left and right eigenspaces w.r.t. + !> the selected cluster in the (1,1)-block. + + pure subroutine stdlib_ztgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, q, & + ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + logical(lk), intent(in) :: wantq, wantz + integer(ilp), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n + integer(ilp), intent(out) :: info, m + real(dp), intent(out) :: pl, pr + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: dif(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) + complex(dp), intent(out) :: alpha(*), beta(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, swap, wantd, wantd1, wantd2, wantp + integer(ilp) :: i, ierr, ijb, k, kase, ks, liwmin, lwmin, mn2, n1, n2 + real(dp) :: dscale, dsum, rdscal, safmin + complex(dp) :: temp1, temp2 + ! Local Arrays + integer(ilp) :: isave(3) + ! Intrinsic Functions + intrinsic :: abs,cmplx,conjg,max,sqrt + ! Executable Statements + ! decode and test the input parameters + info = 0 + lquery = ( lwork==-1 .or. liwork==-1 ) + if( ijob<0 .or. ijob>5 ) then + info = -1 + else if( n<0 ) then + info = -5 + else if( lda=4 + wantd1 = ijob==2 .or. ijob==4 + wantd2 = ijob==3 .or. ijob==5 + wantd = wantd1 .or. wantd2 + ! set m to the dimension of the specified pair of deflating + ! subspaces. + m = 0 + if( .not.lquery .or. ijob/=0 ) then + do k = 1, n + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + if( k0 ) then + ! swap is rejected: exit. + info = 1 + if( wantp ) then + pl = zero + pr = zero + end if + if( wantd ) then + dif( 1 ) = zero + dif( 2 ) = zero + end if + go to 70 + end if + end if + end do + if( wantp ) then + ! solve generalized sylvester equation for r and l: + ! a11 * r - l * a22 = a12 + ! b11 * r - l * b22 = b12 + n1 = m + n2 = n - m + i = n1 + 1 + call stdlib_zlacpy( 'FULL', n1, n2, a( 1, i ), lda, work, n1 ) + call stdlib_zlacpy( 'FULL', n1, n2, b( 1, i ), ldb, work( n1*n2+1 ),n1 ) + ijb = 0 + call stdlib_ztgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,& + i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-2*n1*n2, & + iwork, ierr ) + ! estimate the reciprocal of norms of "projections" onto + ! left and right eigenspaces + rdscal = zero + dsum = one + call stdlib_zlassq( n1*n2, work, 1, rdscal, dsum ) + pl = rdscal*sqrt( dsum ) + if( pl==zero ) then + pl = one + else + pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) ) + end if + rdscal = zero + dsum = one + call stdlib_zlassq( n1*n2, work( n1*n2+1 ), 1, rdscal, dsum ) + pr = rdscal*sqrt( dsum ) + if( pr==zero ) then + pr = one + else + pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) ) + end if + end if + if( wantd ) then + ! compute estimates difu and difl. + if( wantd1 ) then + n1 = m + n2 = n - m + i = n1 + 1 + ijb = idifjb + ! frobenius norm-based difu estimate. + call stdlib_ztgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(& + i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1 ), work( n1*n2*2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + ! frobenius norm-based difl estimate. + call stdlib_ztgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),& + ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2 ), work( n1*n2*2+1 ),lwork-& + 2*n1*n2, iwork, ierr ) + else + ! compute 1-norm-based estimates of difu and difl using + ! reversed communication with stdlib_zlacn2. in each step a + ! generalized sylvester equation or a transposed variant + ! is solved. + kase = 0 + n1 = m + n2 = n - m + i = n1 + 1 + ijb = 0 + mn2 = 2*n1*n2 + ! 1-norm-based estimate of difu. + 40 continue + call stdlib_zlacn2( mn2, work( mn2+1 ), work, dif( 1 ), kase,isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation + call stdlib_ztgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_ztgsyl( 'C', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 40 + end if + dif( 1 ) = dscale / dif( 1 ) + ! 1-norm-based estimate of difl. + 50 continue + call stdlib_zlacn2( mn2, work( mn2+1 ), work, dif( 2 ), kase,isave ) + if( kase/=0 ) then + if( kase==1 ) then + ! solve generalized sylvester equation + call stdlib_ztgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( & + i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + else + ! solve the transposed variant. + call stdlib_ztgsyl( 'C', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b, & + ldb, b( i, i ), ldb,work( n1*n2+1 ), n2, dscale, dif( 2 ),work( n1*n2*2+1 )& + , lwork-2*n1*n2, iwork,ierr ) + end if + go to 50 + end if + dif( 2 ) = dscale / dif( 2 ) + end if + end if + ! if b(k,k) is complex, make it real and positive (normalization + ! of the generalized schur form) and store the generalized + ! eigenvalues of reordered pair (a, b) + do k = 1, n + dscale = abs( b( k, k ) ) + if( dscale>safmin ) then + temp1 = conjg( b( k, k ) / dscale ) + temp2 = b( k, k ) / dscale + b( k, k ) = dscale + call stdlib_zscal( n-k, temp1, b( k, k+1 ), ldb ) + call stdlib_zscal( n-k+1, temp1, a( k, k ), lda ) + if( wantq )call stdlib_zscal( n, temp2, q( 1, k ), 1 ) + else + b( k, k ) = cmplx( zero, zero,KIND=dp) + end if + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + end do + 70 continue + work( 1 ) = lwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_ztgsen + + !> ZTGSNA: estimates reciprocal condition numbers for specified + !> eigenvalues and/or eigenvectors of a matrix pair (A, B). + !> (A, B) must be in generalized Schur canonical form, that is, A and + !> B are both upper triangular. + + pure subroutine stdlib_ztgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, & + dif, mm, m, work, lwork,iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: howmny, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n + ! Array Arguments + logical(lk), intent(in) :: select(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: dif(*), s(*) + complex(dp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: idifjb = 3 + + + ! Local Scalars + logical(lk) :: lquery, somcon, wantbh, wantdf, wants + integer(ilp) :: i, ierr, ifst, ilst, k, ks, lwmin, n1, n2 + real(dp) :: bignum, cond, eps, lnrm, rnrm, scale, smlnum + complex(dp) :: yhax, yhbx + ! Local Arrays + complex(dp) :: dummy(1), dummy1(1) + ! Intrinsic Functions + intrinsic :: abs,cmplx,max + ! Executable Statements + ! decode and test the input parameters + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantdf = stdlib_lsame( job, 'V' ) .or. wantbh + somcon = stdlib_lsame( howmny, 'S' ) + info = 0 + lquery = ( lwork==-1 ) + if( .not.wants .and. .not.wantdf ) then + info = -1 + else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( lda0 ) then + ! ill-conditioned problem - swap rejected. + dif( ks ) = zero + else + ! reordering successful, solve generalized sylvester + ! equation for r and l, + ! a22 * r - l * a11 = a12 + ! b22 * r - l * b11 = b12, + ! and compute estimate of difl[(a11,b11), (a22, b22)]. + n1 = 1 + n2 = n - n1 + i = n*n + 1 + call stdlib_ztgsyl( 'N', idifjb, n2, n1, work( n*n1+n1+1 ),n, work, n, & + work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, & + dif( ks ), dummy,1, iwork, ierr ) + end if + end if + end if + end do loop_20 + work( 1 ) = lwmin + return + end subroutine stdlib_ztgsna + + !> ZTRSEN: reorders the Schur factorization of a complex matrix + !> A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in + !> the leading positions on the diagonal of the upper triangular matrix + !> T, and the leading columns of Q form an orthonormal basis of the + !> corresponding right invariant subspace. + !> Optionally the routine computes the reciprocal condition numbers of + !> the cluster of eigenvalues and/or the invariant subspace. + + subroutine stdlib_ztrsen( job, compq, select, n, t, ldt, q, ldq, w, m, s,sep, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: compq, job + integer(ilp), intent(out) :: info, m + integer(ilp), intent(in) :: ldq, ldt, lwork, n + real(dp), intent(out) :: s, sep + ! Array Arguments + logical(lk), intent(in) :: select(*) + complex(dp), intent(inout) :: q(ldq,*), t(ldt,*) + complex(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantbh, wantq, wants, wantsp + integer(ilp) :: ierr, k, kase, ks, lwmin, n1, n2, nn + real(dp) :: est, rnorm, scale + ! Local Arrays + integer(ilp) :: isave(3) + real(dp) :: rwork(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode and test the input parameters. + wantbh = stdlib_lsame( job, 'B' ) + wants = stdlib_lsame( job, 'E' ) .or. wantbh + wantsp = stdlib_lsame( job, 'V' ) .or. wantbh + wantq = stdlib_lsame( compq, 'V' ) + ! set m to the number of selected eigenvalues. + m = 0 + do k = 1, n + if( select( k ) )m = m + 1 + end do + n1 = m + n2 = n - m + nn = n1*n2 + info = 0 + lquery = ( lwork==-1 ) + if( wantsp ) then + lwmin = max( 1, 2*nn ) + else if( stdlib_lsame( job, 'N' ) ) then + lwmin = 1 + else if( stdlib_lsame( job, 'E' ) ) then + lwmin = max( 1, nn ) + end if + if( .not.stdlib_lsame( job, 'N' ) .and. .not.wants .and. .not.wantsp )then + info = -1 + else if( .not.stdlib_lsame( compq, 'N' ) .and. .not.wantq ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldt ZUNBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, + !> M-P, or M-Q. Routines ZUNBDB2, ZUNBDB3, and ZUNBDB4 handle cases in + !> which Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < q .or. m-p < q ) then + info = -2 + else if( q < 0 .or. m-q < q ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-2 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB1', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., q of x11 and x21 + do i = 1, q + call stdlib_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + theta(i) = atan2( real( x21(i,i),KIND=dp), real( x11(i,i),KIND=dp) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i) = cone + x21(i,i) = cone + call stdlib_zlarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + call stdlib_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + if( i < q ) then + call stdlib_zdrot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c,s ) + call stdlib_zlacgv( q-i, x21(i,i+1), ldx21 ) + call stdlib_zlarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) + s = real( x21(i,i+1),KIND=dp) + x21(i,i+1) = cone + call stdlib_zlarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & + ldx11, work(ilarf) ) + call stdlib_zlarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & + ldx21, work(ilarf) ) + call stdlib_zlacgv( q-i, x21(i,i+1), ldx21 ) + c = sqrt( stdlib_dznrm2( p-i, x11(i+1,i+1), 1 )**2+ stdlib_dznrm2( m-p-i, x21(i+& + 1,i+1), 1 )**2 ) + phi(i) = atan2( s, c ) + call stdlib_zunbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1,x21(i+1,i+1), 1, x11(i+1,& + i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) + end if + end do + return + end subroutine stdlib_zunbdb1 + + !> ZUNBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, + !> Q, or M-Q. Routines ZUNBDB1, ZUNBDB3, and ZUNBDB4 handle cases in + !> which P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by + !> angles THETA, PHI. + + subroutine stdlib_zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < 0 .or. p > m-p ) then + info = -2 + else if( q < 0 .or. q < p .or. m-q < p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p-1, m-p, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB2', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., p of x11 and x21 + do i = 1, p + if( i > 1 ) then + call stdlib_zdrot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c,s ) + end if + call stdlib_zlacgv( q-i+1, x11(i,i), ldx11 ) + call stdlib_zlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + c = real( x11(i,i),KIND=dp) + x11(i,i) = cone + call stdlib_zlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_zlarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & + work(ilarf) ) + call stdlib_zlacgv( q-i+1, x11(i,i), ldx11 ) + s = sqrt( stdlib_dznrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_dznrm2( m-p-i+1, x21(i,i), & + 1 )**2 ) + theta(i) = atan2( s, c ) + call stdlib_zunbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1, x21(i,i), 1,x11(i+1,i+1), & + ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_zscal( p-i, cnegone, x11(i+1,i), 1 ) + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + if( i < p ) then + call stdlib_zlarfgp( p-i, x11(i+1,i), x11(i+2,i), 1, taup1(i) ) + phi(i) = atan2( real( x11(i+1,i),KIND=dp), real( x21(i,i),KIND=dp) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x11(i+1,i) = cone + call stdlib_zlarf( 'L', p-i, q-i, x11(i+1,i), 1, conjg(taup1(i)),x11(i+1,i+1), & + ldx11, work(ilarf) ) + end if + x21(i,i) = cone + call stdlib_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + end do + ! reduce the bottom-right portion of x21 to the identity matrix + do i = p + 1, q + call stdlib_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1, taup2(i) ) + x21(i,i) = cone + call stdlib_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1, conjg(taup2(i)),x21(i,i+1), & + ldx21, work(ilarf) ) + end do + return + end subroutine stdlib_zunbdb2 + + !> ZUNBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, + !> Q, or M-Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB4 handle cases in + !> which M-P is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) + complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( 2*p < m .or. p > m ) then + info = -2 + else if( q < m-p .or. m-q < m-p ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( p, m-p-1, q-1 ) + iorbdb5 = 2 + lorbdb5 = q-1 + lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB3', -info ) + return + else if( lquery ) then + return + end if + ! reduce rows 1, ..., m-p of x11 and x21 + do i = 1, m-p + if( i > 1 ) then + call stdlib_zdrot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c,s ) + end if + call stdlib_zlacgv( q-i+1, x21(i,i), ldx21 ) + call stdlib_zlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + s = real( x21(i,i),KIND=dp) + x21(i,i) = cone + call stdlib_zlarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & + work(ilarf) ) + call stdlib_zlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + call stdlib_zlacgv( q-i+1, x21(i,i), ldx21 ) + c = sqrt( stdlib_dznrm2( p-i+1, x11(i,i), 1 )**2+ stdlib_dznrm2( m-p-i, x21(i+1,i), & + 1 )**2 ) + theta(i) = atan2( s, c ) + call stdlib_zunbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1, x21(i+1,i), 1,x11(i,i+1), & + ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) + call stdlib_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + if( i < m-p ) then + call stdlib_zlarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1, taup2(i) ) + phi(i) = atan2( real( x21(i+1,i),KIND=dp), real( x11(i,i),KIND=dp) ) + c = cos( phi(i) ) + s = sin( phi(i) ) + x21(i+1,i) = cone + call stdlib_zlarf( 'L', m-p-i, q-i, x21(i+1,i), 1,conjg(taup2(i)), x21(i+1,i+1), & + ldx21,work(ilarf) ) + end if + x11(i,i) = cone + call stdlib_zlarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + end do + ! reduce the bottom-right portion of x11 to the identity matrix + do i = m-p + 1, q + call stdlib_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1, taup1(i) ) + x11(i,i) = cone + call stdlib_zlarf( 'L', p-i+1, q-i, x11(i,i), 1, conjg(taup1(i)),x11(i,i+1), ldx11, & + work(ilarf) ) + end do + return + end subroutine stdlib_zunbdb3 + + !> ZUNBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny + !> matrix X with orthonomal columns: + !> [ B11 ] + !> [ X11 ] [ P1 | ] [ 0 ] + !> [-----] = [---------] [-----] Q1**T . + !> [ X21 ] [ | P2 ] [ B21 ] + !> [ 0 ] + !> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, + !> M-P, or Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB3 handle cases in + !> which M-Q is not the minimum dimension. + !> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), + !> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by + !> Householder vectors. + !> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented + !> implicitly by angles THETA, PHI. + + subroutine stdlib_zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & + phantom, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lwork, m, p, q, ldx11, ldx21 + ! Array Arguments + real(dp), intent(out) :: phi(*), theta(*) + complex(dp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) + complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + ! ==================================================================== + + ! Local Scalars + real(dp) :: c, s + integer(ilp) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & + lworkopt + logical(lk) :: lquery + ! Intrinsic Function + intrinsic :: atan2,cos,max,sin,sqrt + ! Executable Statements + ! test input arguments + info = 0 + lquery = lwork == -1 + if( m < 0 ) then + info = -1 + else if( p < m-q .or. m-p < m-q ) then + info = -2 + else if( q < m-q .or. q > m ) then + info = -3 + else if( ldx11 < max( 1, p ) ) then + info = -5 + else if( ldx21 < max( 1, m-p ) ) then + info = -7 + end if + ! compute workspace + if( info == 0 ) then + ilarf = 2 + llarf = max( q-1, p-1, m-p-1 ) + iorbdb5 = 2 + lorbdb5 = q + lworkopt = ilarf + llarf - 1 + lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1 ) + lworkmin = lworkopt + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -14 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNBDB4', -info ) + return + else if( lquery ) then + return + end if + ! reduce columns 1, ..., m-q of x11 and x21 + do i = 1, m-q + if( i == 1 ) then + do j = 1, m + phantom(j) = czero + end do + call stdlib_zunbdb5( p, m-p, q, phantom(1), 1, phantom(p+1), 1,x11, ldx11, x21, & + ldx21, work(iorbdb5),lorbdb5, childinfo ) + call stdlib_zscal( p, cnegone, phantom(1), 1 ) + call stdlib_zlarfgp( p, phantom(1), phantom(2), 1, taup1(1) ) + call stdlib_zlarfgp( m-p, phantom(p+1), phantom(p+2), 1, taup2(1) ) + theta(i) = atan2( real( phantom(1),KIND=dp), real( phantom(p+1),KIND=dp) ) + + c = cos( theta(i) ) + s = sin( theta(i) ) + phantom(1) = cone + phantom(p+1) = cone + call stdlib_zlarf( 'L', p, q, phantom(1), 1, conjg(taup1(1)), x11,ldx11, work(& + ilarf) ) + call stdlib_zlarf( 'L', m-p, q, phantom(p+1), 1, conjg(taup2(1)),x21, ldx21, & + work(ilarf) ) + else + call stdlib_zunbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1,x21(i,i-1), 1, x11(i,i)& + , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) + call stdlib_zscal( p-i+1, cnegone, x11(i,i-1), 1 ) + call stdlib_zlarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1, taup1(i) ) + call stdlib_zlarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1,taup2(i) ) + theta(i) = atan2( real( x11(i,i-1),KIND=dp), real( x21(i,i-1),KIND=dp) ) + c = cos( theta(i) ) + s = sin( theta(i) ) + x11(i,i-1) = cone + x21(i,i-1) = cone + call stdlib_zlarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1,conjg(taup1(i)), x11(i,i), & + ldx11, work(ilarf) ) + call stdlib_zlarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1,conjg(taup2(i)), x21(i,i), & + ldx21, work(ilarf) ) + end if + call stdlib_zdrot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) + call stdlib_zlacgv( q-i+1, x21(i,i), ldx21 ) + call stdlib_zlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) + c = real( x21(i,i),KIND=dp) + x21(i,i) = cone + call stdlib_zlarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_zlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & + work(ilarf) ) + call stdlib_zlacgv( q-i+1, x21(i,i), ldx21 ) + if( i < m-q ) then + s = sqrt( stdlib_dznrm2( p-i, x11(i+1,i), 1 )**2+ stdlib_dznrm2( m-p-i, x21(i+1,& + i), 1 )**2 ) + phi(i) = atan2( s, c ) + end if + end do + ! reduce the bottom-right portion of x11 to [ i 0 ] + do i = m - q + 1, p + call stdlib_zlacgv( q-i+1, x11(i,i), ldx11 ) + call stdlib_zlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) + x11(i,i) = cone + call stdlib_zlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & + work(ilarf) ) + call stdlib_zlarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & + work(ilarf) ) + call stdlib_zlacgv( q-i+1, x11(i,i), ldx11 ) + end do + ! reduce the bottom-right portion of x21 to [ 0 i ] + do i = p + 1, q + call stdlib_zlacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) + call stdlib_zlarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) + + x21(m-q+i-p,i) = cone + call stdlib_zlarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& + , ldx21, work(ilarf) ) + call stdlib_zlacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) + end do + return + end subroutine stdlib_zunbdb4 + + !> ZUNCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with + !> orthonormal columns that has been partitioned into a 2-by-1 block + !> structure: + !> [ I1 0 0 ] + !> [ 0 C 0 ] + !> [ X11 ] [ U1 | ] [ 0 0 0 ] + !> X = [-----] = [---------] [----------] V1**T . + !> [ X21 ] [ | U2 ] [ 0 0 0 ] + !> [ 0 S 0 ] + !> [ 0 0 I2] + !> X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, + !> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R + !> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which + !> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a + !> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). + + subroutine stdlib_zuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & + u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu1, jobu2, jobv1t + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q + integer(ilp), intent(in) :: lrwork + integer(ilp) :: lrworkmin, lrworkopt + ! Array Arguments + real(dp), intent(out) :: rwork(*) + real(dp), intent(out) :: theta(*) + complex(dp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) + complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & + ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & + lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & + r + logical(lk) :: lquery, wantu1, wantu2, wantv1t + ! Local Arrays + real(dp) :: dum(1) + complex(dp) :: cdum(1,1) + ! Intrinsic Function + intrinsic :: int,max,min + ! Executable Statements + ! test input arguments + info = 0 + wantu1 = stdlib_lsame( jobu1, 'Y' ) + wantu2 = stdlib_lsame( jobu2, 'Y' ) + wantv1t = stdlib_lsame( jobv1t, 'Y' ) + lquery = ( lwork==-1 ) .or. ( lrwork==-1 ) + if( m < 0 ) then + info = -4 + else if( p < 0 .or. p > m ) then + info = -5 + else if( q < 0 .or. q > m ) then + info = -6 + else if( ldx11 < max( 1, p ) ) then + info = -8 + else if( ldx21 < max( 1, m-p ) ) then + info = -10 + else if( wantu1 .and. ldu1 < max( 1, p ) ) then + info = -13 + else if( wantu2 .and. ldu2 < max( 1, m - p ) ) then + info = -15 + else if( wantv1t .and. ldv1t < max( 1, q ) ) then + info = -17 + end if + r = min( p, m-p, q, m-q ) + ! compute workspace + ! work layout: + ! |-----------------------------------------| + ! | lworkopt (1) | + ! |-----------------------------------------| + ! | taup1 (max(1,p)) | + ! | taup2 (max(1,m-p)) | + ! | tauq1 (max(1,q)) | + ! |-----------------------------------------| + ! | stdlib_zunbdb work | stdlib_zungqr work | stdlib_zunglq work | + ! | | | | + ! | | | | + ! | | | | + ! | | | | + ! |-----------------------------------------| + ! rwork layout: + ! |------------------| + ! | lrworkopt (1) | + ! |------------------| + ! | phi (max(1,r-1)) | + ! |------------------| + ! | b11d (r) | + ! | b11e (r-1) | + ! | b12d (r) | + ! | b12e (r-1) | + ! | b21d (r) | + ! | b21e (r-1) | + ! | b22d (r) | + ! | b22e (r-1) | + ! | stdlib_zbbcsd rwork | + ! |------------------| + if( info == 0 ) then + iphi = 2 + ib11d = iphi + max( 1, r-1 ) + ib11e = ib11d + max( 1, r ) + ib12d = ib11e + max( 1, r - 1 ) + ib12e = ib12d + max( 1, r ) + ib21d = ib12e + max( 1, r - 1 ) + ib21e = ib21d + max( 1, r ) + ib22d = ib21e + max( 1, r - 1 ) + ib22e = ib22d + max( 1, r ) + ibbcsd = ib22e + max( 1, r - 1 ) + itaup1 = 2 + itaup2 = itaup1 + max( 1, p ) + itauq1 = itaup2 + max( 1, m-p ) + iorbdb = itauq1 + max( 1, q ) + iorgqr = itauq1 + max( 1, q ) + iorglq = itauq1 + max( 1, q ) + lorgqrmin = 1 + lorgqropt = 1 + lorglqmin = 1 + lorglqopt = 1 + if( r == q ) then + call stdlib_zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work, -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_zungqr( p, p, q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + endif + if( wantu2 .and. m-p > 0 ) then + call stdlib_zungqr( m-p, m-p, q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zunglq( q-1, q-1, q-1, v1t, ldv1t,cdum, work(1), -1, childinfo ) + + lorglqmin = max( lorglqmin, q-1 ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_zbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum, u1, ldu1,& + u2, ldu2, v1t, ldv1t, cdum, 1,dum, dum, dum, dum, dum, dum, dum, dum,rwork(1), -& + 1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else if( r == p ) then + call stdlib_zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_zungqr( p-1, p-1, p-1, u1(2,2), ldu1, cdum, work(1),-1, childinfo & + ) + lorgqrmin = max( lorgqrmin, p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zungqr( m-p, m-p, q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zunglq( q, q, r, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_zbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum, v1t, & + ldv1t, cdum, 1, u1, ldu1, u2, ldu2,dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else if( r == m-p ) then + call stdlib_zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, work(1), -1, childinfo ) + lorbdb = int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_zungqr( p, p, q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zungqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2, cdum,work(1), -1, & + childinfo ) + lorgqrmin = max( lorgqrmin, m-p-1 ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zunglq( q, q, r, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_zbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum, cdum,& + 1, v1t, ldv1t, u2, ldu2, u1,ldu1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + else + call stdlib_zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & + cdum, cdum, work(1), -1, childinfo) + lorbdb = m + int( work(1),KIND=ilp) + if( wantu1 .and. p > 0 ) then + call stdlib_zungqr( p, p, m-q, u1, ldu1, cdum, work(1), -1,childinfo ) + lorgqrmin = max( lorgqrmin, p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zungqr( m-p, m-p, m-q, u2, ldu2, cdum, work(1), -1,childinfo ) + + lorgqrmin = max( lorgqrmin, m-p ) + lorgqropt = max( lorgqropt, int( work(1),KIND=ilp) ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zunglq( q, q, q, v1t, ldv1t, cdum, work(1), -1,childinfo ) + lorglqmin = max( lorglqmin, q ) + lorglqopt = max( lorglqopt, int( work(1),KIND=ilp) ) + end if + call stdlib_zbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum, u2, & + ldu2, u1, ldu1, cdum, 1, v1t,ldv1t, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& + 1), -1, childinfo ) + lbbcsd = int( rwork(1),KIND=ilp) + end if + lrworkmin = ibbcsd+lbbcsd-1 + lrworkopt = lrworkmin + rwork(1) = lrworkopt + lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1 ) + lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1 ) + work(1) = lworkopt + if( lwork < lworkmin .and. .not.lquery ) then + info = -19 + end if + if( lrwork < lrworkmin .and. .not.lquery ) then + info = -21 + end if + end if + if( info /= 0 ) then + call stdlib_xerbla( 'ZUNCSD2BY1', -info ) + return + else if( lquery ) then + return + end if + lorgqr = lwork-iorgqr+1 + lorglq = lwork-iorglq+1 + ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, + ! in which r = min(p,m-p,q,m-q) + if( r == q ) then + ! case 1: r = q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_zlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_zungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_zungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + v1t(1,1) = cone + do j = 2, q + v1t(1,j) = czero + v1t(j,1) = czero + end do + call stdlib_zlacpy( 'U', q-1, q-1, x21(1,2), ldx21, v1t(2,2),ldv1t ) + call stdlib_zunglq( q-1, q-1, q-1, v1t(2,2), ldv1t, work(itauq1),work(iorglq), & + lorglq, childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_zbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,rwork(iphi), u1, & + ldu1, u2, ldu2, v1t, ldv1t, cdum,1, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& + ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),lrwork-& + ibbcsd+1, childinfo ) + ! permute rows and columns to place czero submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_zlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == p ) then + ! case 2: r = p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + u1(1,1) = cone + do j = 2, p + u1(1,j) = czero + u1(j,1) = czero + end do + call stdlib_zlacpy( 'L', p-1, p-1, x11(2,1), ldx11, u1(2,2), ldu1 ) + call stdlib_zungqr( p-1, p-1, p-1, u1(2,2), ldu1, work(itaup1),work(iorgqr), & + lorgqr, childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + call stdlib_zlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) + call stdlib_zungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zlacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) + call stdlib_zunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_zbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,rwork(iphi), v1t,& + ldv1t, cdum, 1, u1, ldu1, u2,ldu2, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& + ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd), & + lbbcsd,childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > 0 .and. wantu2 ) then + do i = 1, q + iwork(i) = m - p - q + i + end do + do i = q + 1, m - p + iwork(i) = i - q + end do + call stdlib_zlapmt( .false., m-p, m-p, u2, ldu2, iwork ) + end if + else if( r == m-p ) then + ! case 3: r = m-p + ! simultaneously bidiagonalize x11 and x21 + call stdlib_zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) + ! accumulate householder reflectors + if( wantu1 .and. p > 0 ) then + call stdlib_zlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) + call stdlib_zungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + u2(1,1) = cone + do j = 2, m-p + u2(1,j) = czero + u2(j,1) = czero + end do + call stdlib_zlacpy( 'L', m-p-1, m-p-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_zungqr( m-p-1, m-p-1, m-p-1, u2(2,2), ldu2,work(itaup2), work(iorgqr)& + , lorgqr, childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zlacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) + call stdlib_zunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_zbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, rwork(iphi), & + cdum, 1, v1t, ldv1t, u2, ldu2,u1, ldu1, rwork(ib11d), rwork(ib11e),rwork(ib12d), & + rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & + lbbcsd, childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( q > r ) then + do i = 1, r + iwork(i) = q - r + i + end do + do i = r + 1, q + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_zlapmt( .false., p, q, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_zlapmr( .false., q, q, v1t, ldv1t, iwork ) + end if + end if + else + ! case 4: r = m-q + ! simultaneously bidiagonalize x11 and x21 + call stdlib_zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& + itaup1), work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, & + childinfo ) + ! accumulate householder reflectors + if( wantu2 .and. m-p > 0 ) then + call stdlib_zcopy( m-p, work(iorbdb+p), 1, u2, 1 ) + end if + if( wantu1 .and. p > 0 ) then + call stdlib_zcopy( p, work(iorbdb), 1, u1, 1 ) + do j = 2, p + u1(1,j) = czero + end do + call stdlib_zlacpy( 'L', p-1, m-q-1, x11(2,1), ldx11, u1(2,2),ldu1 ) + call stdlib_zungqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantu2 .and. m-p > 0 ) then + do j = 2, m-p + u2(1,j) = czero + end do + call stdlib_zlacpy( 'L', m-p-1, m-q-1, x21(2,1), ldx21, u2(2,2),ldu2 ) + call stdlib_zungqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & + childinfo ) + end if + if( wantv1t .and. q > 0 ) then + call stdlib_zlacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) + call stdlib_zlacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& + 1), ldv1t ) + call stdlib_zlacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) + + call stdlib_zunglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & + childinfo ) + end if + ! simultaneously diagonalize x11 and x21. + call stdlib_zbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, rwork(iphi), & + u2, ldu2, u1, ldu1, cdum, 1,v1t, ldv1t, rwork(ib11d), rwork(ib11e),rwork(ib12d), & + rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & + lbbcsd, childinfo ) + ! permute rows and columns to place identity submatrices in + ! preferred positions + if( p > r ) then + do i = 1, r + iwork(i) = p - r + i + end do + do i = r + 1, p + iwork(i) = i - r + end do + if( wantu1 ) then + call stdlib_zlapmt( .false., p, p, u1, ldu1, iwork ) + end if + if( wantv1t ) then + call stdlib_zlapmr( .false., p, q, v1t, ldv1t, iwork ) + end if + end if + end if + return + end subroutine stdlib_zuncsd2by1 + + !> ZUNGBR: generates one of the complex unitary matrices Q or P**H + !> determined by ZGEBRD when reducing a complex matrix A to bidiagonal + !> form: A = Q * B * P**H. Q and P**H are defined as products of + !> elementary reflectors H(i) or G(i) respectively. + !> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + !> is of order M: + !> if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n + !> columns of Q, where m >= n >= k; + !> if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an + !> M-by-M matrix. + !> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + !> is of order N: + !> if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m + !> rows of P**H, where n >= m >= k; + !> if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as + !> an N-by-N matrix. + + pure subroutine stdlib_zungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, wantq + integer(ilp) :: i, iinfo, j, lwkopt, mn + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + wantq = stdlib_lsame( vect, 'Q' ) + mn = min( m, n ) + lquery = ( lwork==-1 ) + if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 .or. ( wantq .and. ( n>m .or. nn .or. m=k ) then + call stdlib_zungqr( m, n, k, a, lda, tau, work, -1, iinfo ) + else + if( m>1 ) then + call stdlib_zungqr( m-1, m-1, m-1, a, lda, tau, work, -1,iinfo ) + end if + end if + else + if( k1 ) then + call stdlib_zunglq( n-1, n-1, n-1, a, lda, tau, work, -1,iinfo ) + end if + end if + end if + lwkopt = real( work( 1 ),KIND=dp) + lwkopt = max (lwkopt, mn) + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZUNGBR', -info ) + return + else if( lquery ) then + work( 1 ) = lwkopt + return + end if + ! quick return if possible + if( m==0 .or. n==0 ) then + work( 1 ) = 1 + return + end if + if( wantq ) then + ! form q, determined by a call to stdlib_zgebrd to reduce an m-by-k + ! matrix + if( m>=k ) then + ! if m >= k, assume m >= n >= k + call stdlib_zungqr( m, n, k, a, lda, tau, work, lwork, iinfo ) + else + ! if m < k, assume m = n + ! shift the vectors which define the elementary reflectors cone + ! column to the right, and set the first row and column of q + ! to those of the unit matrix + do j = m, 2, -1 + a( 1, j ) = czero + do i = j + 1, m + a( i, j ) = a( i, j-1 ) + end do + end do + a( 1, 1 ) = cone + do i = 2, m + a( i, 1 ) = czero + end do + if( m>1 ) then + ! form q(2:m,2:m) + call stdlib_zungqr( m-1, m-1, m-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + else + ! form p**h, determined by a call to stdlib_zgebrd to reduce a k-by-n + ! matrix + if( k= n, assume m = n + ! shift the vectors which define the elementary reflectors cone + ! row downward, and set the first row and column of p**h to + ! those of the unit matrix + a( 1, 1 ) = cone + do i = 2, n + a( i, 1 ) = czero + end do + do j = 2, n + do i = j - 1, 2, -1 + a( i, j ) = a( i-1, j ) + end do + a( 1, j ) = czero + end do + if( n>1 ) then + ! form p**h(2:n,2:n) + call stdlib_zunglq( n-1, n-1, n-1, a( 2, 2 ), lda, tau, work,lwork, iinfo ) + + end if + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zungbr + + !> ZUNGTSQR: generates an M-by-N complex matrix Q_out with orthonormal + !> columns, which are the first N columns of a product of comlpex unitary + !> matrices of order M which are returned by ZLATSQR + !> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). + !> See the documentation for ZLATSQR. + + pure subroutine stdlib_zungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, mb, nb + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: t(ldt,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j + ! Intrinsic Functions + intrinsic :: cmplx,max,min + ! Executable Statements + ! test the input parameters + lquery = lwork==-1 + info = 0 + if( m<0 ) then + info = -1 + else if( n<0 .or. m If VECT = 'Q', ZUNMBR: overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C + !> with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': P * C C * P + !> TRANS = 'C': P**H * C C * P**H + !> Here Q and P**H are the unitary matrices determined by ZGEBRD when + !> reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + !> and P**H are defined as products of elementary reflectors H(i) and + !> G(i) respectively. + !> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + !> order of the unitary matrix Q or P**H that is applied. + !> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + !> if nq >= k, Q = H(1) H(2) . . . H(k); + !> if nq < k, Q = H(1) H(2) . . . H(nq-1). + !> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + !> if k < nq, P = G(1) G(2) . . . G(k); + !> if k >= nq, P = G(1) G(2) . . . G(nq-1). + + pure subroutine stdlib_zunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans, vect + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: k, lda, ldc, lwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), c(ldc,*) + complex(dp), intent(in) :: tau(*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: applyq, left, lquery, notran + character :: transt + integer(ilp) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + applyq = stdlib_lsame( vect, 'Q' ) + left = stdlib_lsame( side, 'L' ) + notran = stdlib_lsame( trans, 'N' ) + lquery = ( lwork==-1 ) + ! nq is the order of q or p and nw is the minimum dimension of work + if( left ) then + nq = m + nw = max( 1, n ) + else + nq = n + nw = max( 1, m ) + end if + if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then + info = -1 + else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then + info = -2 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( k<0 ) then + info = -6 + else if( ( applyq .and. lda0 .and. n>0 ) then + if( applyq ) then + if( left ) then + nb = stdlib_ilaenv( 1, 'ZUNMQR', side // trans, m-1, n, m-1,-1 ) + else + nb = stdlib_ilaenv( 1, 'ZUNMQR', side // trans, m, n-1, n-1,-1 ) + end if + else + if( left ) then + nb = stdlib_ilaenv( 1, 'ZUNMLQ', side // trans, m-1, n, m-1,-1 ) + else + nb = stdlib_ilaenv( 1, 'ZUNMLQ', side // trans, m, n-1, n-1,-1 ) + end if + end if + lwkopt = nw*nb + else + lwkopt = 1 + end if + work( 1 ) = lwkopt + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZUNMBR', -info ) + return + else if( lquery ) then + return + end if + ! quick return if possible + if( m==0 .or. n==0 )return + if( applyq ) then + ! apply q + if( nq>=k ) then + ! q was determined by a call to stdlib_zgebrd with nq >= k + call stdlib_zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & + ) + else if( nq>1 ) then + ! q was determined by a call to stdlib_zgebrd with nq < k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_zunmqr( side, trans, mi, ni, nq-1, a( 2, 1 ), lda, tau,c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + else + ! apply p + if( notran ) then + transt = 'C' + else + transt = 'N' + end if + if( nq>k ) then + ! p was determined by a call to stdlib_zgebrd with nq > k + call stdlib_zunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & + iinfo ) + else if( nq>1 ) then + ! p was determined by a call to stdlib_zgebrd with nq <= k + if( left ) then + mi = m - 1 + ni = n + i1 = 2 + i2 = 1 + else + mi = m + ni = n - 1 + i1 = 1 + i2 = 2 + end if + call stdlib_zunmlq( side, transt, mi, ni, nq-1, a( 1, 2 ), lda,tau, c( i1, i2 ), & + ldc, work, lwork, iinfo ) + end if + end if + work( 1 ) = lwkopt + return + end subroutine stdlib_zunmbr + + !> ZCGESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> ZCGESV first attempts to factorize the matrix in COMPLEX and use this + !> factorization within an iterative refinement procedure to produce a + !> solution with COMPLEX*16 normwise backward error quality (see below). + !> If the approach fails the method switches to a COMPLEX*16 + !> factorization and solve. + !> The iterative refinement is not going to be a winning strategy if + !> the ratio COMPLEX performance over COMPLEX*16 performance is too + !> small. A reasonable strategy should take the number of right-hand + !> sides and the size of the matrix into account. This might be done + !> with a call to ILAENV in the future. Up to now, we always try + !> iterative refinement. + !> The iterative refinement process is stopped if + !> ITER > ITERMAX + !> or for all the RHS we have: + !> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX + !> where + !> o ITER is the number of the current iteration in the iterative + !> refinement process + !> o RNRM is the infinity-norm of the residual + !> o XNRM is the infinity-norm of the solution + !> o ANRM is the infinity-operator-norm of the matrix A + !> o EPS is the machine epsilon returned by DLAMCH('Epsilon') + !> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 + !> respectively. + + subroutine stdlib_zcgesv( n, nrhs, a, lda, ipiv, b, ldb, x, ldx, work,swork, rwork, iter, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, iter + integer(ilp), intent(in) :: lda, ldb, ldx, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + real(dp), intent(out) :: rwork(*) + complex(sp), intent(out) :: swork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(in) :: b(ldb,*) + complex(dp), intent(out) :: work(n,*), x(ldx,*) + ! ===================================================================== + ! Parameters + logical(lk), parameter :: doitref = .true. + integer(ilp), parameter :: itermax = 30 + real(dp), parameter :: bwdmax = 1.0e+00_dp + + + + + ! Local Scalars + integer(ilp) :: i, iiter, ptsa, ptsx + real(dp) :: anrm, cte, eps, rnrm, xnrm + complex(dp) :: zdum + ! Intrinsic Functions + intrinsic :: abs,real,max,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( zdum ) = abs( real( zdum,KIND=dp) ) + abs( aimag( zdum ) ) + ! Executable Statements + info = 0 + iter = 0 + ! test the input parameters. + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( ldaxnrm*cte )go to 10 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion. we are good to exit. + iter = 0 + return + 10 continue + loop_30: do iiter = 1, itermax + ! convert r (in work) from double precision to single precision + ! and store the result in sx. + call stdlib_zlag2c( n, nrhs, work, n, swork( ptsx ), n, info ) + if( info/=0 ) then + iter = -2 + go to 40 + end if + ! solve the system sa*sx = sr. + call stdlib_cgetrs( 'NO TRANSPOSE', n, nrhs, swork( ptsa ), n, ipiv,swork( ptsx ), & + n, info ) + ! convert sx back to double precision and update the current + ! iterate. + call stdlib_clag2z( n, nrhs, swork( ptsx ), n, work, n, info ) + do i = 1, nrhs + call stdlib_zaxpy( n, cone, work( 1, i ), 1, x( 1, i ), 1 ) + end do + ! compute r = b - ax (r is work). + call stdlib_zlacpy( 'ALL', n, nrhs, b, ldb, work, n ) + call stdlib_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n, nrhs, n, cnegone,a, lda, x, & + ldx, cone, work, n ) + ! check whether the nrhs normwise backward errors satisfy the + ! stopping criterion. if yes, set iter=iiter>0 and return. + do i = 1, nrhs + xnrm = cabs1( x( stdlib_izamax( n, x( 1, i ), 1 ), i ) ) + rnrm = cabs1( work( stdlib_izamax( n, work( 1, i ), 1 ), i ) ) + if( rnrm>xnrm*cte )go to 20 + end do + ! if we are here, the nrhs normwise backward errors satisfy the + ! stopping criterion, we are good to exit. + iter = iiter + return + 20 continue + end do loop_30 + ! if we are at this place of the code, this is because we have + ! performed iter=itermax iterations and never satisfied the stopping + ! criterion, set up the iter flag accordingly and follow up on double + ! precision routine. + iter = -itermax - 1 + 40 continue + ! single-precision iterative refinement failed to converge to a + ! satisfactory solution, so we resort to double precision. + call stdlib_zgetrf( n, n, a, lda, ipiv, info ) + if( info/=0 )return + call stdlib_zlacpy( 'ALL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zgetrs( 'NO TRANSPOSE', n, nrhs, a, lda, ipiv, x, ldx,info ) + return + end subroutine stdlib_zcgesv + + !> ZGELQ: computes an LQ factorization of a complex M-by-N matrix A: + !> A = ( L 0 ) * Q + !> where: + !> Q is a N-by-N orthogonal matrix; + !> L is a lower-triangular M-by-M matrix; + !> 0 is a M-by-(N-M) zero matrix, if M < N. + + pure subroutine stdlib_zgelq( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks, lwmin, lwopt, lwreq + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'ZGELQ ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'ZGELQ ', ' ', m, n, 2, -1 ) + else + mb = 1 + nb = n + end if + if( mb>min( m, n ) .or. mb<1 ) mb = 1 + if( nb>n .or. nb<=m ) nb = n + mintsz = m + 5 + if ( nb>m .and. n>m ) then + if( mod( n - m, nb - m )==0 ) then + nblcks = ( n - m ) / ( nb - m ) + else + nblcks = ( n - m ) / ( nb - m ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + if( ( n<=m ) .or. ( nb<=m ) .or. ( nb>=n ) ) then + lwmin = max( 1, n ) + lwopt = max( 1, mb*n ) + else + lwmin = max( 1, m ) + lwopt = max( 1, mb*m ) + end if + lminws = .false. + if( ( tsize=lwmin ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=n ) ) then + lwreq = max( 1, mb*n ) + else + lwreq = max( 1, mb*m ) + end if + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n ) ) then + call stdlib_zgelqt( m, n, mb, a, lda, t( 6 ), mb, work, info ) + else + call stdlib_zlaswlq( m, n, mb, nb, a, lda, t( 6 ), mb, work,lwork, info ) + end if + work( 1 ) = lwreq + return + end subroutine stdlib_zgelq + + !> ZGELSD: computes the minimum-norm solution to a real linear least + !> squares problem: + !> minimize 2-norm(| b - A*x |) + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The problem is solved in three steps: + !> (1) Reduce the coefficient matrix A to bidiagonal form with + !> Householder transformations, reducing the original problem + !> into a "bidiagonal least squares problem" (BLS) + !> (2) Solve the BLS using a divide and conquer approach. + !> (3) Apply back all the Householder transformations to solve + !> the original least squares problem. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + iwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(dp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), s(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, & + maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz + real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum + ! Intrinsic Functions + intrinsic :: int,log,max,min,real + ! Executable Statements + ! test the input arguments. + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + smlsiz = stdlib_ilaenv( 9, 'ZGELSD', ' ', 0, 0, 0, 0 ) + mnthr = stdlib_ilaenv( 6, 'ZGELSD', ' ', m, n, nrhs, -1 ) + nlvl = max( int( log( real( minmn,KIND=dp) / real( smlsiz + 1,KIND=dp) ) /log( & + two ),KIND=ilp) + 1, 0 ) + liwork = 3*minmn*nlvl + 11*minmn + mm = m + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns. + mm = n + maxwrk = max( maxwrk, n*stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n,-1, -1 ) ) + + maxwrk = max( maxwrk, nrhs*stdlib_ilaenv( 1, 'ZUNMQR', 'LC', m,nrhs, n, -1 ) ) + + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined. + lrwork = 10*n + 2*n*smlsiz + 8*n*nlvl + 3*smlsiz*nrhs +max( (smlsiz+1)**2, n*(& + 1+nrhs) + 2*nrhs ) + maxwrk = max( maxwrk, 2*n + ( mm + n )*stdlib_ilaenv( 1,'ZGEBRD', ' ', mm, n, & + -1, -1 ) ) + maxwrk = max( maxwrk, 2*n + nrhs*stdlib_ilaenv( 1, 'ZUNMBR','QLC', mm, nrhs, & + n, -1 ) ) + maxwrk = max( maxwrk, 2*n + ( n - 1 )*stdlib_ilaenv( 1,'ZUNMBR', 'PLN', n, & + nrhs, n, -1 ) ) + maxwrk = max( maxwrk, 2*n + n*nrhs ) + minwrk = max( 2*n + mm, 2*n + n*nrhs ) + end if + if( n>m ) then + lrwork = 10*m + 2*m*smlsiz + 8*m*nlvl + 3*smlsiz*nrhs +max( (smlsiz+1)**2, n*(& + 1+nrhs) + 2*nrhs ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows. + maxwrk = m + m*stdlib_ilaenv( 1, 'ZGELQF', ' ', m, n, -1,-1 ) + maxwrk = max( maxwrk, m*m + 4*m + 2*m*stdlib_ilaenv( 1,'ZGEBRD', ' ', m, m,& + -1, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + nrhs*stdlib_ilaenv( 1,'ZUNMBR', 'QLC', m,& + nrhs, m, -1 ) ) + maxwrk = max( maxwrk, m*m + 4*m + ( m - 1 )*stdlib_ilaenv( 1,'ZUNMLQ', & + 'LC', n, nrhs, m, -1 ) ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m*m + 4*m + m*nrhs ) + ! xxx: ensure the path 2a case below is triggered. the workspace + ! calculation should use queries for all routines eventually. + maxwrk = max( maxwrk,4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) + else + ! path 2 - underdetermined. + maxwrk = 2*m + ( n + m )*stdlib_ilaenv( 1, 'ZGEBRD', ' ', m,n, -1, -1 ) + + maxwrk = max( maxwrk, 2*m + nrhs*stdlib_ilaenv( 1, 'ZUNMBR','QLC', m, nrhs,& + m, -1 ) ) + maxwrk = max( maxwrk, 2*m + m*stdlib_ilaenv( 1, 'ZUNMBR','PLN', n, nrhs, m,& + -1 ) ) + maxwrk = max( maxwrk, 2*m + m*nrhs ) + end if + minwrk = max( 2*m + n, 2*m + m*nrhs ) + end if + end if + minwrk = min( minwrk, maxwrk ) + work( 1 ) = maxwrk + iwork( 1 ) = liwork + rwork( 1 ) = lrwork + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + call stdlib_dlaset( 'F', minmn, 1, zero, zero, s, 1 ) + rank = 0 + go to 10 + end if + ! scale b if max entry outside range [smlnum,bignum]. + bnrm = stdlib_zlange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum. + call stdlib_zlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! if m < n make sure b(m+1:n,:) = 0 + if( m=n ) then + ! path 1 - overdetermined or exactly determined. + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + nwork = itau + n + ! compute a=q*r. + ! (rworkspace: need n) + ! (cworkspace: need n, prefer n*nb) + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + info ) + ! multiply b by transpose(q). + ! (rworkspace: need n) + ! (cworkspace: need nrhs, prefer nrhs*nb) + call stdlib_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + nwork ), lwork-nwork+1, info ) + ! zero out below r. + if( n>1 ) then + call stdlib_zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + end if + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ie = 1 + nrwork = ie + n + ! bidiagonalize r in a. + ! (rworkspace: need n) + ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) + call stdlib_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r. + ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) + call stdlib_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_zlalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of r. + call stdlib_zunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + else if( n>=mnthr .and. lwork>=4*m+m*m+max( m, 2*m-4, nrhs, n-3*m ) ) then + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm. + ldwork = m + if( lwork>=max( 4*m+m*lda+max( m, 2*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & + lda + itau = 1 + nwork = m + 1 + ! compute a=l*q. + ! (cworkspace: need 2*m, prefer m+m*nb) + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) + + il = nwork + ! copy l to work(il), zeroing out above its diagonal. + call stdlib_zlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) + itauq = il + ldwork*m + itaup = itauq + m + nwork = itaup + m + ie = 1 + nrwork = ie + m + ! bidiagonalize l in work(il). + ! (rworkspace: need m) + ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb) + call stdlib_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & + itaup ), work( nwork ),lwork-nwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l. + ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) + call stdlib_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_zlalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of l. + call stdlib_zunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & + ldb, work( nwork ),lwork-nwork+1, info ) + ! zero out below first m rows of b. + call stdlib_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1 ), ldb ) + nwork = itau + m + ! multiply transpose(q) by b. + ! (cworkspace: need nrhs, prefer nrhs*nb) + call stdlib_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& + , lwork-nwork+1, info ) + else + ! path 2 - remaining underdetermined cases. + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ie = 1 + nrwork = ie + m + ! bidiagonalize a. + ! (rworkspace: need m) + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& + nwork ), lwork-nwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors. + ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) + call stdlib_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + ! solve the bidiagonal least squares problem. + call stdlib_zlalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & + nwork ), rwork( nrwork ),iwork, info ) + if( info/=0 ) then + go to 10 + end if + ! multiply b by right bidiagonalizing vectors of a. + call stdlib_zunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & + nwork ), lwork-nwork+1, info ) + end if + ! undo scaling. + if( iascl==1 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_zlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_zlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 10 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwork + rwork( 1 ) = lrwork + return + end subroutine stdlib_zgelsd + + !> ZGELSS: computes the minimum norm solution to a complex linear + !> least squares problem: + !> Minimize 2-norm(| b - A*x |). + !> using the singular value decomposition (SVD) of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix + !> X. + !> The effective rank of A is determined by treating as zero those + !> singular values which are less than RCOND times the largest singular + !> value. + + subroutine stdlib_zgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(dp), intent(in) :: rcond + ! Array Arguments + real(dp), intent(out) :: rwork(*), s(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & + ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr + integer(ilp) :: lwork_zgeqrf, lwork_zunmqr, lwork_zgebrd, lwork_zunmbr, lwork_zungbr, & + lwork_zunmlq, lwork_zgelqf + real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr + ! Local Arrays + complex(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + maxmn = max( m, n ) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda0 ) then + mm = m + mnthr = stdlib_ilaenv( 6, 'ZGELSS', ' ', m, n, nrhs, -1 ) + if( m>=n .and. m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than + ! columns + ! compute space needed for stdlib_zgeqrf + call stdlib_zgeqrf( m, n, a, lda, dum(1), dum(1), -1, info ) + lwork_zgeqrf = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zunmqr + call stdlib_zunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1), b,ldb, dum(1), -1, & + info ) + lwork_zunmqr = real( dum(1),KIND=dp) + mm = n + maxwrk = max( maxwrk, n + n*stdlib_ilaenv( 1, 'ZGEQRF', ' ', m,n, -1, -1 ) ) + + maxwrk = max( maxwrk, n + nrhs*stdlib_ilaenv( 1, 'ZUNMQR', 'LC',m, nrhs, n, -& + 1 ) ) + end if + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + ! compute space needed for stdlib_zgebrd + call stdlib_zgebrd( mm, n, a, lda, s, s, dum(1), dum(1), dum(1),-1, info ) + + lwork_zgebrd = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zunmbr + call stdlib_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1),b, ldb, dum(1),& + -1, info ) + lwork_zunmbr = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zungbr + call stdlib_zungbr( 'P', n, n, n, a, lda, dum(1),dum(1), -1, info ) + lwork_zungbr = real( dum(1),KIND=dp) + ! compute total workspace needed + maxwrk = max( maxwrk, 2*n + lwork_zgebrd ) + maxwrk = max( maxwrk, 2*n + lwork_zunmbr ) + maxwrk = max( maxwrk, 2*n + lwork_zungbr ) + maxwrk = max( maxwrk, n*nrhs ) + minwrk = 2*n + max( nrhs, m ) + end if + if( n>m ) then + minwrk = 2*m + max( nrhs, n ) + if( n>=mnthr ) then + ! path 2a - underdetermined, with many more columns + ! than rows + ! compute space needed for stdlib_zgelqf + call stdlib_zgelqf( m, n, a, lda, dum(1), dum(1),-1, info ) + lwork_zgelqf = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zgebrd + call stdlib_zgebrd( m, m, a, lda, s, s, dum(1), dum(1),dum(1), -1, info ) + + lwork_zgebrd = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zunmbr + call stdlib_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_zunmbr = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zungbr + call stdlib_zungbr( 'P', m, m, m, a, lda, dum(1),dum(1), -1, info ) + lwork_zungbr = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zunmlq + call stdlib_zunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1),b, ldb, dum(1), -& + 1, info ) + lwork_zunmlq = real( dum(1),KIND=dp) + ! compute total workspace needed + maxwrk = m + lwork_zgelqf + maxwrk = max( maxwrk, 3*m + m*m + lwork_zgebrd ) + maxwrk = max( maxwrk, 3*m + m*m + lwork_zunmbr ) + maxwrk = max( maxwrk, 3*m + m*m + lwork_zungbr ) + if( nrhs>1 ) then + maxwrk = max( maxwrk, m*m + m + m*nrhs ) + else + maxwrk = max( maxwrk, m*m + 2*m ) + end if + maxwrk = max( maxwrk, m + lwork_zunmlq ) + else + ! path 2 - underdetermined + ! compute space needed for stdlib_zgebrd + call stdlib_zgebrd( m, n, a, lda, s, s, dum(1), dum(1),dum(1), -1, info ) + + lwork_zgebrd = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zunmbr + call stdlib_zunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1), b, ldb, dum(& + 1), -1, info ) + lwork_zunmbr = real( dum(1),KIND=dp) + ! compute space needed for stdlib_zungbr + call stdlib_zungbr( 'P', m, n, m, a, lda, dum(1),dum(1), -1, info ) + lwork_zungbr = real( dum(1),KIND=dp) + maxwrk = 2*m + lwork_zgebrd + maxwrk = max( maxwrk, 2*m + lwork_zunmbr ) + maxwrk = max( maxwrk, 2*m + lwork_zungbr ) + maxwrk = max( maxwrk, n*nrhs ) + end if + end if + maxwrk = max( minwrk, maxwrk ) + end if + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + call stdlib_dlaset( 'F', minmn, 1, zero, zero, s, minmn ) + rank = 0 + go to 70 + end if + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! overdetermined case + if( m>=n ) then + ! path 1 - overdetermined or exactly determined + mm = m + if( m>=mnthr ) then + ! path 1a - overdetermined, with many more rows than columns + mm = n + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + info ) + ! multiply b by transpose(q) + ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) + ! (rworkspace: none) + call stdlib_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & + iwork ), lwork-iwork+1, info ) + ! zero out below r + if( n>1 )call stdlib_zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors of r + ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) + ! (rworkspace: none) + call stdlib_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + irwork = ie + n + ! perform bidiagonal qr iteration + ! multiply b by transpose of left singular vectors + ! compute right singular vectors in a + ! (cworkspace: none) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, 0, nrhs, s, rwork( ie ), a, lda, dum,1, b, ldb, & + rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_zdrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_zlaset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors + ! (cworkspace: need n, prefer n*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_zgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) + + call stdlib_zlacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_zgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1, i ),ldb, czero, & + work, n ) + call stdlib_zlacpy( 'G', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_zgemv( 'C', n, n, cone, a, lda, b, 1, czero, work, 1 ) + call stdlib_zcopy( n, work, 1, b, 1 ) + end if + else if( n>=mnthr .and. lwork>=3*m+m*m+max( m, nrhs, n-2*m ) )then + ! underdetermined case, m much less than n + ! path 2a - underdetermined, with many more columns than rows + ! and sufficient workspace for an efficient algorithm + ldwork = m + if( lwork>=3*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda + itau = 1 + iwork = m + 1 + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: none) + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) + + il = iwork + ! copy l to work(il), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( il ), ldwork ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) + ie = 1 + itauq = il + ldwork*m + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(il) + ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & + itaup ), work( iwork ),lwork-iwork+1, info ) + ! multiply b by transpose of left bidiagonalizing vectors of l + ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) + ! (rworkspace: none) + call stdlib_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & + ldb, work( iwork ),lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors of r in work(il) + ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) + ! (rworkspace: none) + call stdlib_zungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & + lwork-iwork+1, info ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right singular + ! vectors of l in work(il) and multiplying b by transpose of + ! left singular vectors + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, 0, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & + b, ldb, rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_zdrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_zlaset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + iwork = il + m*ldwork + ! multiply b by right singular vectors of l in work(il) + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1 ) then + call stdlib_zgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & + work( iwork ), ldb ) + call stdlib_zlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = ( lwork-iwork+1 ) / m + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_zgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1, i ), & + ldb, czero, work( iwork ), m ) + call stdlib_zlacpy( 'G', m, bl, work( iwork ), m, b( 1, i ),ldb ) + end do + else + call stdlib_zgemv( 'C', m, m, cone, work( il ), ldwork, b( 1, 1 ),1, czero, work(& + iwork ), 1 ) + call stdlib_zcopy( m, work( iwork ), 1, b( 1, 1 ), 1 ) + end if + ! zero out below first m rows of b + call stdlib_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1 ), ldb ) + iwork = itau + m + ! multiply transpose(q) by b + ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) + ! (rworkspace: none) + call stdlib_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& + , lwork-iwork+1, info ) + else + ! path 2 - remaining underdetermined cases + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& + iwork ), lwork-iwork+1,info ) + ! multiply b by transpose of left bidiagonalizing vectors + ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) + ! (rworkspace: none) + call stdlib_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & + iwork ), lwork-iwork+1, info ) + ! generate right bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: none) + call stdlib_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& + 1, info ) + irwork = ie + m + ! perform bidiagonal qr iteration, + ! computing right singular vectors of a in a and + ! multiplying b by transpose of left singular vectors + ! (cworkspace: none) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'L', m, n, 0, nrhs, s, rwork( ie ), a, lda, dum,1, b, ldb, & + rwork( irwork ), info ) + if( info/=0 )go to 70 + ! multiply b by reciprocals of singular values + thr = max( rcond*s( 1 ), sfmin ) + if( rcondthr ) then + call stdlib_zdrscl( nrhs, s( i ), b( i, 1 ), ldb ) + rank = rank + 1 + else + call stdlib_zlaset( 'F', 1, nrhs, czero, czero, b( i, 1 ), ldb ) + end if + end do + ! multiply b by right singular vectors of a + ! (cworkspace: need n, prefer n*nrhs) + ! (rworkspace: none) + if( lwork>=ldb*nrhs .and. nrhs>1 ) then + call stdlib_zgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) + + call stdlib_zlacpy( 'G', n, nrhs, work, ldb, b, ldb ) + else if( nrhs>1 ) then + chunk = lwork / n + do i = 1, nrhs, chunk + bl = min( nrhs-i+1, chunk ) + call stdlib_zgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1, i ),ldb, czero, & + work, n ) + call stdlib_zlacpy( 'F', n, bl, work, n, b( 1, i ), ldb ) + end do + else + call stdlib_zgemv( 'C', m, n, cone, a, lda, b, 1, czero, work, 1 ) + call stdlib_zcopy( n, work, 1, b, 1 ) + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,info ) + else if( iascl==2 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, n, nrhs, b, ldb, info ) + call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,info ) + end if + if( ibscl==1 ) then + call stdlib_zlascl( 'G', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info ) + else if( ibscl==2 ) then + call stdlib_zlascl( 'G', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info ) + end if + 70 continue + work( 1 ) = maxwrk + return + end subroutine stdlib_zgelss + + !> ZGELSY: computes the minimum-norm solution to a complex linear least + !> squares problem: + !> minimize || A * X - B || + !> using a complete orthogonal factorization of A. A is an M-by-N + !> matrix which may be rank-deficient. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + !> The routine first computes a QR factorization with column pivoting: + !> A * P = Q * [ R11 R12 ] + !> [ 0 R22 ] + !> with R11 defined as the largest leading submatrix whose estimated + !> condition number is less than 1/RCOND. The order of R11, RANK, + !> is the effective rank of A. + !> Then, R22 is considered to be negligible, and R12 is annihilated + !> by unitary transformations from the right, arriving at the + !> complete orthogonal factorization: + !> A * P = Q * [ T11 0 ] * Z + !> [ 0 0 ] + !> The minimum-norm solution is then + !> X = P * Z**H [ inv(T11)*Q1**H*B ] + !> [ 0 ] + !> where Q1 consists of the first RANK columns of Q. + !> This routine is basically identical to the original xGELSX except + !> three differences: + !> o The permutation of matrix B (the right hand side) is faster and + !> more simple. + !> o The call to the subroutine xGEQPF has been substituted by the + !> the call to the subroutine xGEQP3. This subroutine is a Blas-3 + !> version of the QR factorization with column pivoting. + !> o Matrix B (the right hand side) is updated with Blas-3. + + subroutine stdlib_zgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info, rank + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + real(dp), intent(in) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: jpvt(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: imax = 1 + integer(ilp), parameter :: imin = 2 + + + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, & + nb4 + real(dp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize + complex(dp) :: c1, c2, s1, s2 + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,max,min + ! Executable Statements + mn = min( m, n ) + ismin = mn + 1 + ismax = 2*mn + 1 + ! test the input arguments. + info = 0 + nb1 = stdlib_ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 ) + nb2 = stdlib_ilaenv( 1, 'ZGERQF', ' ', m, n, -1, -1 ) + nb3 = stdlib_ilaenv( 1, 'ZUNMQR', ' ', m, n, nrhs, -1 ) + nb4 = stdlib_ilaenv( 1, 'ZUNMRQ', ' ', m, n, nrhs, -1 ) + nb = max( nb1, nb2, nb3, nb4 ) + lwkopt = max( 1, mn+2*n+nb*( n+1 ), 2*mn+nb*nrhs ) + work( 1 ) = cmplx( lwkopt,KIND=dp) + lquery = ( lwork==-1 ) + if( m<0 ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + rank = 0 + go to 70 + end if + bnrm = stdlib_zlange( 'M', m, nrhs, b, ldb, rwork ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info ) + ibscl = 2 + end if + ! compute qr factorization with column pivoting of a: + ! a * p = q * r + call stdlib_zgeqp3( m, n, a, lda, jpvt, work( 1 ), work( mn+1 ),lwork-mn, rwork, info ) + + wsize = mn + real( work( mn+1 ),KIND=dp) + ! complex workspace: mn+nb*(n+1). real workspace 2*n. + ! details of householder rotations stored in work(1:mn). + ! determine rank using incremental condition estimation + work( ismin ) = cone + work( ismax ) = cone + smax = abs( a( 1, 1 ) ) + smin = smax + if( abs( a( 1, 1 ) )==zero ) then + rank = 0 + call stdlib_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) + go to 70 + else + rank = 1 + end if + 10 continue + if( rank ZGEMLQ: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'C': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by short wide + !> LQ factorization (ZGELQ) + + pure subroutine stdlib_zgemlq( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), t(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * mb + mn = m + else + lw = m * mb + mn = n + end if + if( ( nb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, nb - k ) == 0 ) then + nblcks = ( mn - k ) / ( nb - k ) + else + nblcks = ( mn - k ) / ( nb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_zgemlqt( side, trans, m, n, k, mb, a, lda,t( 6 ), mb, c, ldc, work, info & + ) + else + call stdlib_zlamswlq( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),mb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_zgemlq + + !> ZGEMQR: overwrites the general real M-by-N matrix C with + !> SIDE = 'L' SIDE = 'R' + !> TRANS = 'N': Q * C C * Q + !> TRANS = 'T': Q**H * C C * Q**H + !> where Q is a complex unitary matrix defined as the product + !> of blocked elementary reflectors computed by tall skinny + !> QR factorization (ZGEQR) + + pure subroutine stdlib_zgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, & + info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: side, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, k, tsize, lwork, ldc + ! Array Arguments + complex(dp), intent(in) :: a(lda,*), t(*) + complex(dp), intent(inout) :: c(ldc,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: left, right, tran, notran, lquery + integer(ilp) :: mb, nb, lw, nblcks, mn + ! Intrinsic Functions + intrinsic :: int,max,min,mod + ! Executable Statements + ! test the input arguments + lquery = lwork==-1 + notran = stdlib_lsame( trans, 'N' ) + tran = stdlib_lsame( trans, 'C' ) + left = stdlib_lsame( side, 'L' ) + right = stdlib_lsame( side, 'R' ) + mb = int( t( 2 ),KIND=ilp) + nb = int( t( 3 ),KIND=ilp) + if( left ) then + lw = n * nb + mn = m + else + lw = mb * nb + mn = n + end if + if( ( mb>k ) .and. ( mn>k ) ) then + if( mod( mn - k, mb - k )==0 ) then + nblcks = ( mn - k ) / ( mb - k ) + else + nblcks = ( mn - k ) / ( mb - k ) + 1 + end if + else + nblcks = 1 + end if + info = 0 + if( .not.left .and. .not.right ) then + info = -1 + else if( .not.tran .and. .not.notran ) then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( k<0 .or. k>mn ) then + info = -5 + else if( lda=max( m, n, & + k ) ) ) then + call stdlib_zgemqrt( side, trans, m, n, k, nb, a, lda, t( 6 ),nb, c, ldc, work, info & + ) + else + call stdlib_zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6 ),nb, c, ldc, work, & + lwork, info ) + end if + work( 1 ) = lw + return + end subroutine stdlib_zgemqr + + !> ZGEQR: computes a QR factorization of a complex M-by-N matrix A: + !> A = Q * ( R ), + !> ( 0 ) + !> where: + !> Q is a M-by-M orthogonal matrix; + !> R is an upper-triangular N-by-N matrix; + !> 0 is a (M-N)-by-N zero matrix, if M > N. + + pure subroutine stdlib_zgeqr( m, n, a, lda, t, tsize, work, lwork,info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. -- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, m, n, tsize, lwork + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(*), work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, lminws, mint, minw + integer(ilp) :: mb, nb, mintsz, nblcks + ! Intrinsic Functions + intrinsic :: max,min,mod + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( tsize==-1 .or. tsize==-2 .or.lwork==-1 .or. lwork==-2 ) + mint = .false. + minw = .false. + if( tsize==-2 .or. lwork==-2 ) then + if( tsize/=-1 ) mint = .true. + if( lwork/=-1 ) minw = .true. + end if + ! determine the block size + if( min ( m, n )>0 ) then + mb = stdlib_ilaenv( 1, 'ZGEQR ', ' ', m, n, 1, -1 ) + nb = stdlib_ilaenv( 1, 'ZGEQR ', ' ', m, n, 2, -1 ) + else + mb = m + nb = 1 + end if + if( mb>m .or. mb<=n ) mb = m + if( nb>min( m, n ) .or. nb<1 ) nb = 1 + mintsz = n + 5 + if( mb>n .and. m>n ) then + if( mod( m - n, mb - n )==0 ) then + nblcks = ( m - n ) / ( mb - n ) + else + nblcks = ( m - n ) / ( mb - n ) + 1 + end if + else + nblcks = 1 + end if + ! determine if the workspace size satisfies minimal size + lminws = .false. + if( ( tsize=n ) .and. ( & + tsize>=mintsz ).and. ( .not.lquery ) ) then + if( tsize=m ) ) then + call stdlib_zgeqrt( m, n, nb, a, lda, t( 6 ), nb, work, info ) + else + call stdlib_zlatsqr( m, n, mb, nb, a, lda, t( 6 ), nb, work,lwork, info ) + end if + work( 1 ) = max( 1, nb*n ) + return + end subroutine stdlib_zgeqr + + !> ZGESDD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors, by using divide-and-conquer method. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns VT = V**H, not V. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, iwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), s(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs + integer(ilp) :: blk, chunk, i, ie, ierr, il, ir, iru, irvt, iscl, itau, itaup, itauq, & + iu, ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr1, mnthr2, nrwork,& + nwork, wrkbl + integer(ilp) :: lwork_zgebrd_mn, lwork_zgebrd_mm, lwork_zgebrd_nn, lwork_zgelqf_mn, & + lwork_zgeqrf_mn, lwork_zungbr_p_mn, lwork_zungbr_p_nn, lwork_zungbr_q_mn, & + lwork_zungbr_q_mm, lwork_zunglq_mn, lwork_zunglq_nn, lwork_zungqr_mm, lwork_zungqr_mn, & + lwork_zunmbr_prc_mm, lwork_zunmbr_qln_mm, lwork_zunmbr_prc_mn, lwork_zunmbr_qln_mn, & + lwork_zunmbr_prc_nn, lwork_zunmbr_qln_nn + real(dp) :: anrm, bignum, eps, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dum(1) + complex(dp) :: cdum(1) + ! Intrinsic Functions + intrinsic :: int,max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + mnthr1 = int( minmn*17.0_dp / 9.0_dp,KIND=ilp) + mnthr2 = int( minmn*5.0_dp / 3.0_dp,KIND=ilp) + wntqa = stdlib_lsame( jobz, 'A' ) + wntqs = stdlib_lsame( jobz, 'S' ) + wntqas = wntqa .or. wntqs + wntqo = stdlib_lsame( jobz, 'O' ) + wntqn = stdlib_lsame( jobz, 'N' ) + lquery = ( lwork==-1 ) + minwrk = 1 + maxwrk = 1 + if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( lda=n .and. ldvt=n .and. minmn>0 ) then + ! there is no complex work space needed for bidiagonal svd + ! the realwork space needed for bidiagonal svd (stdlib_dbdsdc,KIND=dp) is + ! bdspac = 3*n*n + 4*n for singular values and vectors; + ! bdspac = 4*n for singular values only; + ! not including e, ru, and rvt matrices. + ! compute space preferred for each routine + call stdlib_zgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_zgebrd_mn = int( cdum(1),KIND=ilp) + call stdlib_zgebrd( n, n, cdum(1), n, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_zgebrd_nn = int( cdum(1),KIND=ilp) + call stdlib_zgeqrf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr ) + lwork_zgeqrf_mn = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'P', n, n, n, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_zungbr_p_nn = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zungbr_q_mm = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'Q', m, n, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zungbr_q_mn = int( cdum(1),KIND=ilp) + call stdlib_zungqr( m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zungqr_mm = int( cdum(1),KIND=ilp) + call stdlib_zungqr( m, n, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zungqr_mn = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_zunmbr_prc_nn = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, n, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_zunmbr_qln_mm = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'Q', 'L', 'N', m, n, n, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_zunmbr_qln_mn = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'Q', 'L', 'N', n, n, n, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_zunmbr_qln_nn = int( cdum(1),KIND=ilp) + if( m>=mnthr1 ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + maxwrk = n + lwork_zgeqrf_mn + maxwrk = max( maxwrk, 2*n + lwork_zgebrd_nn ) + minwrk = 3*n + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + wrkbl = n + lwork_zgeqrf_mn + wrkbl = max( wrkbl, n + lwork_zungqr_mn ) + wrkbl = max( wrkbl, 2*n + lwork_zgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_zunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_zunmbr_prc_nn ) + maxwrk = m*n + n*n + wrkbl + minwrk = 2*n*n + 3*n + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + wrkbl = n + lwork_zgeqrf_mn + wrkbl = max( wrkbl, n + lwork_zungqr_mn ) + wrkbl = max( wrkbl, 2*n + lwork_zgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_zunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_zunmbr_prc_nn ) + maxwrk = n*n + wrkbl + minwrk = n*n + 3*n + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + wrkbl = n + lwork_zgeqrf_mn + wrkbl = max( wrkbl, n + lwork_zungqr_mm ) + wrkbl = max( wrkbl, 2*n + lwork_zgebrd_nn ) + wrkbl = max( wrkbl, 2*n + lwork_zunmbr_qln_nn ) + wrkbl = max( wrkbl, 2*n + lwork_zunmbr_prc_nn ) + maxwrk = n*n + wrkbl + minwrk = n*n + max( 3*n, n + m ) + end if + else if( m>=mnthr2 ) then + ! path 5 (m >> n, but not as much as mnthr1) + maxwrk = 2*n + lwork_zgebrd_mn + minwrk = 2*n + m + if( wntqo ) then + ! path 5o (m >> n, jobz='o') + maxwrk = max( maxwrk, 2*n + lwork_zungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_zungbr_q_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + n*n + else if( wntqs ) then + ! path 5s (m >> n, jobz='s') + maxwrk = max( maxwrk, 2*n + lwork_zungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_zungbr_q_mn ) + else if( wntqa ) then + ! path 5a (m >> n, jobz='a') + maxwrk = max( maxwrk, 2*n + lwork_zungbr_p_nn ) + maxwrk = max( maxwrk, 2*n + lwork_zungbr_q_mm ) + end if + else + ! path 6 (m >= n, but not much larger) + maxwrk = 2*n + lwork_zgebrd_mn + minwrk = 2*n + m + if( wntqo ) then + ! path 6o (m >= n, jobz='o') + maxwrk = max( maxwrk, 2*n + lwork_zunmbr_prc_nn ) + maxwrk = max( maxwrk, 2*n + lwork_zunmbr_qln_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + n*n + else if( wntqs ) then + ! path 6s (m >= n, jobz='s') + maxwrk = max( maxwrk, 2*n + lwork_zunmbr_qln_mn ) + maxwrk = max( maxwrk, 2*n + lwork_zunmbr_prc_nn ) + else if( wntqa ) then + ! path 6a (m >= n, jobz='a') + maxwrk = max( maxwrk, 2*n + lwork_zunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*n + lwork_zunmbr_prc_nn ) + end if + end if + else if( minmn>0 ) then + ! there is no complex work space needed for bidiagonal svd + ! the realwork space needed for bidiagonal svd (stdlib_dbdsdc,KIND=dp) is + ! bdspac = 3*m*m + 4*m for singular values and vectors; + ! bdspac = 4*m for singular values only; + ! not including e, ru, and rvt matrices. + ! compute space preferred for each routine + call stdlib_zgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_zgebrd_mn = int( cdum(1),KIND=ilp) + call stdlib_zgebrd( m, m, cdum(1), m, dum(1), dum(1), cdum(1),cdum(1), cdum(1), -& + 1, ierr ) + lwork_zgebrd_mm = int( cdum(1),KIND=ilp) + call stdlib_zgelqf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr ) + lwork_zgelqf_mn = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'P', m, n, m, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zungbr_p_mn = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'P', n, n, m, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_zungbr_p_nn = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zungbr_q_mm = int( cdum(1),KIND=ilp) + call stdlib_zunglq( m, n, m, cdum(1), m, cdum(1), cdum(1),-1, ierr ) + lwork_zunglq_mn = int( cdum(1),KIND=ilp) + call stdlib_zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1),-1, ierr ) + lwork_zunglq_nn = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'P', 'R', 'C', m, m, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_zunmbr_prc_mm = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'P', 'R', 'C', m, n, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_zunmbr_prc_mn = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, m, cdum(1), n, cdum(1),cdum(1), n, cdum(& + 1), -1, ierr ) + lwork_zunmbr_prc_nn = int( cdum(1),KIND=ilp) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, m, cdum(1), m, cdum(1),cdum(1), m, cdum(& + 1), -1, ierr ) + lwork_zunmbr_qln_mm = int( cdum(1),KIND=ilp) + if( n>=mnthr1 ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + maxwrk = m + lwork_zgelqf_mn + maxwrk = max( maxwrk, 2*m + lwork_zgebrd_mm ) + minwrk = 3*m + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + wrkbl = m + lwork_zgelqf_mn + wrkbl = max( wrkbl, m + lwork_zunglq_mn ) + wrkbl = max( wrkbl, 2*m + lwork_zgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_zunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_zunmbr_prc_mm ) + maxwrk = m*n + m*m + wrkbl + minwrk = 2*m*m + 3*m + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + wrkbl = m + lwork_zgelqf_mn + wrkbl = max( wrkbl, m + lwork_zunglq_mn ) + wrkbl = max( wrkbl, 2*m + lwork_zgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_zunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_zunmbr_prc_mm ) + maxwrk = m*m + wrkbl + minwrk = m*m + 3*m + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + wrkbl = m + lwork_zgelqf_mn + wrkbl = max( wrkbl, m + lwork_zunglq_nn ) + wrkbl = max( wrkbl, 2*m + lwork_zgebrd_mm ) + wrkbl = max( wrkbl, 2*m + lwork_zunmbr_qln_mm ) + wrkbl = max( wrkbl, 2*m + lwork_zunmbr_prc_mm ) + maxwrk = m*m + wrkbl + minwrk = m*m + max( 3*m, m + n ) + end if + else if( n>=mnthr2 ) then + ! path 5t (n >> m, but not as much as mnthr1) + maxwrk = 2*m + lwork_zgebrd_mn + minwrk = 2*m + n + if( wntqo ) then + ! path 5to (n >> m, jobz='o') + maxwrk = max( maxwrk, 2*m + lwork_zungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_zungbr_p_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + m*m + else if( wntqs ) then + ! path 5ts (n >> m, jobz='s') + maxwrk = max( maxwrk, 2*m + lwork_zungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_zungbr_p_mn ) + else if( wntqa ) then + ! path 5ta (n >> m, jobz='a') + maxwrk = max( maxwrk, 2*m + lwork_zungbr_q_mm ) + maxwrk = max( maxwrk, 2*m + lwork_zungbr_p_nn ) + end if + else + ! path 6t (n > m, but not much larger) + maxwrk = 2*m + lwork_zgebrd_mn + minwrk = 2*m + n + if( wntqo ) then + ! path 6to (n > m, jobz='o') + maxwrk = max( maxwrk, 2*m + lwork_zunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_zunmbr_prc_mn ) + maxwrk = maxwrk + m*n + minwrk = minwrk + m*m + else if( wntqs ) then + ! path 6ts (n > m, jobz='s') + maxwrk = max( maxwrk, 2*m + lwork_zunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_zunmbr_prc_mn ) + else if( wntqa ) then + ! path 6ta (n > m, jobz='a') + maxwrk = max( maxwrk, 2*m + lwork_zunmbr_qln_mm ) + maxwrk = max( maxwrk, 2*m + lwork_zunmbr_prc_nn ) + end if + end if + end if + maxwrk = max( maxwrk, minwrk ) + end if + if( info==0 ) then + work( 1 ) = stdlib_droundup_lwork( maxwrk ) + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr1 ) then + if( wntqn ) then + ! path 1 (m >> n, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n [tau] + n [work] + ! cworkspace: prefer n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! zero out below r + call stdlib_zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + nrwork = ie + n + ! perform bidiagonal svd, compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_dbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 2 (m >> n, jobz='o') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + ir = iu + ldwrku*n + if( lwork >= m*n + n*n + 3*n ) then + ! work(ir) is m by n + ldwrkr = m + else + ldwrkr = ( lwork - n*n - 3*n ) / n + end if + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy r to work( ir ), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) + ! generate q in a + ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of r in work(iru) and computing right singular vectors + ! of r in work(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) + ! overwrite work(iu) by the left singular vectors of r + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iu ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by the right singular vectors of r + ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in work(ir) and copying to a + ! cworkspace: need n*n [u] + n*n [r] + ! cworkspace: prefer n*n [u] + m*n [r] + ! rworkspace: need 0 + do i = 1, m, ldwrkr + chunk = min( m-i+1, ldwrkr ) + call stdlib_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( iu ), & + ldwrku, czero,work( ir ), ldwrkr ) + call stdlib_zlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 3 (m >> n, jobz='s') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + ir = 1 + ! work(ir) is n by n + ldwrkr = n + itau = ir + ldwrkr*n + nwork = itau + n + ! compute a=q*r + ! cworkspace: need n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) + ! generate q in a + ! cworkspace: need n*n [r] + n [tau] + n [work] + ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in work(ir) + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of r + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of r + ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! cworkspace: need n*n [r] + ! rworkspace: need 0 + call stdlib_zlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),ldwrkr, czero, & + u, ldu ) + else if( wntqa ) then + ! path 4 (m >> n, jobz='a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + iu = 1 + ! work(iu) is n by n + ldwrku = n + itau = iu + ldwrku*n + nwork = itau + n + ! compute a=q*r, copying result to u + ! cworkspace: need n*n [u] + n [tau] + n [work] + ! cworkspace: prefer n*n [u] + n [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! cworkspace: need n*n [u] + n [tau] + m [work] + ! cworkspace: prefer n*n [u] + n [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ! produce r in a, zeroing out below it + call stdlib_zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + ie = 1 + itauq = itau + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize r in a + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + 2*n*nb [work] + ! rworkspace: need n [e] + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + iru = ie + n + irvt = iru + n*n + nrwork = irvt + n*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) + ! overwrite work(iu) by left singular vectors of r + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_zunmbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & + ldwrku,work( nwork ), lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of r + ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] + ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! cworkspace: need n*n [u] + ! rworkspace: need 0 + call stdlib_zgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),ldwrku, czero, & + a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_zlacpy( 'F', m, n, a, lda, u, ldu ) + end if + else if( m>=mnthr2 ) then + ! mnthr2 <= m < mnthr1 + ! path 5 (m >> n, but not as much as mnthr1) + ! reduce to bidiagonal form without qr decomposition, use + ! stdlib_zungbr and matrix multiplication to compute singular vectors + ie = 1 + nrwork = ie + n + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need n [e] + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5n (m >> n, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_dbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1,dum,1,dum, idum, & + rwork( nrwork ), iwork, info ) + else if( wntqo ) then + iu = nwork + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + ! path 5o (m >> n, jobz='o') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! generate q in a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + if( lwork >= m*n + 3*n ) then + ! work( iu ) is m by n + ldwrku = m + else + ! work(iu) is ldwrku by n + ldwrku = ( lwork - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, + ! storing the result in work(iu), copying to vt + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_zlarcm( n, n, rwork( irvt ), n, vt, ldvt,work( iu ), ldwrku, & + rwork( nrwork ) ) + call stdlib_zlacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt ) + ! multiply q in a by realmatrix rwork(iru,KIND=dp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] + ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_zlacrm( chunk, n, a( i, 1 ), lda, rwork( iru ),n, work( iu ), & + ldwrku, rwork( nrwork ) ) + call stdlib_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else if( wntqs ) then + ! path 5s (m >> n, jobz='s') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! copy a to u, generate q + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + call stdlib_zungbr( 'Q', m, n, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', n, n, a, lda, vt, ldvt ) + ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + call stdlib_zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! path 5a (m >> n, jobz='a') + ! copy a to vt, generate p**h + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! copy a to u, generate q + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + call stdlib_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] + call stdlib_zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', n, n, a, lda, vt, ldvt ) + ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + call stdlib_zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', m, n, a, lda, u, ldu ) + end if + else + ! m < mnthr2 + ! path 6 (m >= n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ! use stdlib_zunmbr to compute singular vectors + ie = 1 + nrwork = ie + n + itauq = 1 + itaup = itauq + n + nwork = itaup + n + ! bidiagonalize a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need n [e] + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 6n (m >= n, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need n [e] + bdspac + call stdlib_dbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + iu = nwork + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + if( lwork >= m*n + 3*n ) then + ! work( iu ) is m by n + ldwrku = m + else + ! work( iu ) is ldwrku by n + ldwrku = ( lwork - 3*n ) / n + end if + nwork = iu + ldwrku*n + ! path 6o (m >= n, jobz='o') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + if( lwork >= m*n + 3*n ) then + ! path 6o-fast + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) + ! overwrite work(iu) by left singular vectors of a, copying + ! to a + ! cworkspace: need 2*n [tauq, taup] + m*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + call stdlib_zlaset( 'F', m, n, czero, czero, work( iu ),ldwrku ) + call stdlib_zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & + ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) + call stdlib_zlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) + else + ! path 6o-slow + ! generate q in a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & + lwork-nwork+1, ierr ) + ! multiply q in a by realmatrix rwork(iru,KIND=dp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*n [tauq, taup] + n*n [u] + ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] + ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here + nrwork = irvt + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_zlacrm( chunk, n, a( i, 1 ), lda,rwork( iru ), n, work( iu )& + , ldwrku,rwork( nrwork ) ) + call stdlib_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + end if + else if( wntqs ) then + ! path 6s (m >= n, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_zlaset( 'F', m, n, czero, czero, u, ldu ) + call stdlib_zlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + else + ! path 6a (m >= n, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac + iru = nrwork + irvt = iru + n*n + nrwork = irvt + n*n + call stdlib_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& + , n, dum, idum,rwork( nrwork ), iwork, info ) + ! set the right corner of u to identity matrix + call stdlib_zlaset( 'F', m, m, czero, czero, u, ldu ) + if( m>n ) then + call stdlib_zlaset( 'F', m-n, m-n, czero, cone,u( n+1, n+1 ), ldu ) + end if + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + m [work] + ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_zlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*n [tauq, taup] + n [work] + ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] + ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + call stdlib_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr1 ) then + if( wntqn ) then + ! path 1t (n >> m, jobz='n') + ! no singular vectors to be computed + itau = 1 + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m [tau] + m [work] + ! cworkspace: prefer m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! zero out above l + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + nrwork = ie + m + ! perform bidiagonal svd, compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_dbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 2t (n >> m, jobz='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + ivt = 1 + ldwkvt = m + ! work(ivt) is m by m + il = ivt + ldwkvt*m + if( lwork >= m*n + m*m + 3*m ) then + ! work(il) m by n + ldwrkl = m + chunk = n + else + ! work(il) is m by chunk + ldwrkl = m + chunk = ( lwork - m*m - 3*m ) / m + end if + itau = il + ldwrkl*chunk + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy l to work(il), zeroing about above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) + + ! generate q in a + ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) + ! overwrite work(iu) by the left singular vectors of l + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix work(ivt) + ! overwrite work(ivt) by the right singular vectors of l + ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + call stdlib_zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & + work( ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + ! multiply right singular vectors of l in work(il) by q + ! in a, storing result in work(il) and copying to a + ! cworkspace: need m*m [vt] + m*m [l] + ! cworkspace: prefer m*m [vt] + m*n [l] + ! rworkspace: need 0 + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_zgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,a( 1, i ), & + lda, czero, work( il ),ldwrkl ) + call stdlib_zlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 3t (n >> m, jobz='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + il = 1 + ! work(il) is m by m + ldwrkl = m + itau = il + ldwrkl*m + nwork = itau + m + ! compute a=l*q + ! cworkspace: need m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + ! copy l to work(il), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) + + ! generate q in a + ! cworkspace: need m*m [l] + m [tau] + m [work] + ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& + 1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in work(il) + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & + work( itaup ), work( nwork ),lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of l + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & + u, ldu, work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by left singular vectors of l + ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & + vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) + ! copy vt to work(il), multiply right singular vectors of l + ! in work(il) by q in a, storing result in vt + ! cworkspace: need m*m [l] + ! rworkspace: need 0 + call stdlib_zlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,a, lda, czero, & + vt, ldvt ) + else if( wntqa ) then + ! path 4t (n >> m, jobz='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + ivt = 1 + ! work(ivt) is m by m + ldwkvt = m + itau = ivt + ldwkvt*m + nwork = itau + m + ! compute a=l*q, copying result to vt + ! cworkspace: need m*m [vt] + m [tau] + m [work] + ! cworkspace: prefer m*m [vt] + m [tau] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & + ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! cworkspace: need m*m [vt] + m [tau] + n [work] + ! cworkspace: prefer m*m [vt] + m [tau] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork-& + nwork+1, ierr ) + ! produce l in a, zeroing out above it + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = itau + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize l in a + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + 2*m*nb [work] + ! rworkspace: need m [e] + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( nwork ), lwork-nwork+1,ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac + iru = ie + m + irvt = iru + m*m + nrwork = irvt + m*m + call stdlib_dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of l + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix work(ivt) + ! overwrite work(ivt) by right singular vectors of l + ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] + ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + call stdlib_zunmbr( 'P', 'R', 'C', m, m, m, a, lda,work( itaup ), work( ivt ),& + ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + ! multiply right singular vectors of l in work(ivt) by + ! q in vt, storing result in a + ! cworkspace: need m*m [vt] + ! rworkspace: need 0 + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,vt, ldvt, & + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_zlacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else if( n>=mnthr2 ) then + ! mnthr2 <= n < mnthr1 + ! path 5t (n >> m, but not as much as mnthr1) + ! reduce to bidiagonal form without qr decomposition, use + ! stdlib_zungbr and matrix multiplication to compute singular vectors + ie = 1 + nrwork = ie + m + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need m [e] + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 5tn (n >> m, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_dbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + ivt = nwork + ! path 5to (n >> m, jobz='o') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! generate p**h in a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), lwork-& + nwork+1, ierr ) + ldwkvt = m + if( lwork >= m*n + 3*m ) then + ! work( ivt ) is m by n + nwork = ivt + ldwkvt*n + chunk = n + else + ! work( ivt ) is m by chunk + chunk = ( lwork - 3*m ) / m + nwork = ivt + ldwkvt*chunk + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + call stdlib_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(irvt,KIND=dp) + ! storing the result in work(ivt), copying to u + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_zlacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),ldwkvt, rwork( & + nrwork ) ) + call stdlib_zlacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu ) + ! multiply rwork(irvt) by p**h in a, storing the + ! result in work(ivt), copying to a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] + ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_zlarcm( m, blk, rwork( irvt ), m, a( 1, i ), lda,work( ivt ), & + ldwkvt, rwork( nrwork ) ) + call stdlib_zlacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1, i ), lda ) + + end do + else if( wntqs ) then + ! path 5ts (n >> m, jobz='s') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! copy a to vt, generate p**h + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + call stdlib_zungbr( 'P', m, n, m, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', m, m, a, lda, u, ldu ) + ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + call stdlib_zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! path 5ta (n >> m, jobz='a') + ! copy a to u, generate q + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& + nwork+1, ierr ) + ! copy a to vt, generate p**h + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] + ! rworkspace: need 0 + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + call stdlib_zungbr( 'P', n, n, m, vt, ldvt, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the + ! result in a, copying to u + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] + call stdlib_zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', m, m, a, lda, u, ldu ) + ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, + ! storing the result in a, copying to vt + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + call stdlib_zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) + + call stdlib_zlacpy( 'F', m, n, a, lda, vt, ldvt ) + end if + else + ! n < mnthr2 + ! path 6t (n > m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ! use stdlib_zunmbr to compute singular vectors + ie = 1 + nrwork = ie + m + itauq = 1 + itaup = itauq + m + nwork = itaup + m + ! bidiagonalize a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] + ! rworkspace: need m [e] + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( nwork ), lwork-nwork+1,ierr ) + if( wntqn ) then + ! path 6tn (n > m, jobz='n') + ! compute singular values only + ! cworkspace: need 0 + ! rworkspace: need m [e] + bdspac + call stdlib_dbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,dum, idum, rwork(& + nrwork ), iwork, info ) + else if( wntqo ) then + ! path 6to (n > m, jobz='o') + ldwkvt = m + ivt = nwork + if( lwork >= m*n + 3*m ) then + ! work( ivt ) is m by n + call stdlib_zlaset( 'F', m, n, czero, czero, work( ivt ),ldwkvt ) + nwork = ivt + ldwkvt*n + else + ! work( ivt ) is m by chunk + chunk = ( lwork - 3*m ) / m + nwork = ivt + ldwkvt*chunk + end if + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + if( lwork >= m*n + 3*m ) then + ! path 6to-fast + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix work(ivt) + ! overwrite work(ivt) by right singular vectors of a, + ! copying to a + ! cworkspace: need 2*m [tauq, taup] + m*n [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) + + call stdlib_zunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), work( & + ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) + call stdlib_zlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) + else + ! path 6to-slow + ! generate p**h in a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] + ! rworkspace: need 0 + call stdlib_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & + lwork-nwork+1, ierr ) + ! multiply q in a by realmatrix rwork(iru,KIND=dp), storing the + ! result in work(iu), copying to a + ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] + ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here + nrwork = iru + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_zlarcm( m, blk, rwork( irvt ), m, a( 1, i ),lda, work( ivt )& + , ldwkvt,rwork( nrwork ) ) + call stdlib_zlacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1, i ), lda ) + + end do + end if + else if( wntqs ) then + ! path 6ts (n > m, jobz='s') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_zlaset( 'F', m, n, czero, czero, vt, ldvt ) + call stdlib_zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + else + ! path 6ta (n > m, jobz='a') + ! perform bidiagonal svd, computing left singular vectors + ! of bidiagonal matrix in rwork(iru) and computing right + ! singular vectors of bidiagonal matrix in rwork(irvt) + ! cworkspace: need 0 + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac + irvt = nrwork + iru = irvt + m*m + nrwork = iru + m*m + call stdlib_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& + , m, dum, idum,rwork( nrwork ), iwork, info ) + ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u + ! overwrite u by left singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + m [work] + ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + call stdlib_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) + call stdlib_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & + work( nwork ),lwork-nwork+1, ierr ) + ! set all of vt to identity matrix + call stdlib_zlaset( 'F', n, n, czero, cone, vt, ldvt ) + ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt + ! overwrite vt by right singular vectors of a + ! cworkspace: need 2*m [tauq, taup] + n [work] + ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] + ! rworkspace: need m [e] + m*m [rvt] + call stdlib_zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) + call stdlib_zunmbr( 'P', 'R', 'C', n, n, m, a, lda,work( itaup ), vt, ldvt, & + work( nwork ),lwork-nwork+1, ierr ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1,rwork( ie ), minmn, ierr ) + if( anrm ZGESV: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> The LU decomposition with partial pivoting and row interchanges is + !> used to factor A as + !> A = P * L * U, + !> where P is a permutation matrix, L is unit lower triangular, and U is + !> upper triangular. The factored form of A is then used to solve the + !> system of equations A * X = B. + + pure subroutine stdlib_zgesv( n, nrhs, a, lda, ipiv, b, ldb, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + ! ===================================================================== + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + if( n<0 ) then + info = -1 + else if( nrhs<0 ) then + info = -2 + else if( lda ZGESVD: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, optionally computing the left and/or right singular + !> vectors. The SVD is written + !> A = U * SIGMA * conjugate-transpose(V) + !> where SIGMA is an M-by-N matrix which is zero except for its + !> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + !> V is an N-by-N unitary matrix. The diagonal elements of SIGMA + !> are the singular values of A; they are real and non-negative, and + !> are returned in descending order. The first min(m,n) columns of + !> U and V are the left and right singular vectors of A. + !> Note that the routine returns V**H, not V. + + subroutine stdlib_zgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobu, jobvt + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldvt, lwork, m, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), s(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& + wntvs + integer(ilp) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & + iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & + wrkbl + integer(ilp) :: lwork_zgeqrf, lwork_zungqr_n, lwork_zungqr_m, lwork_zgebrd, & + lwork_zungbr_p, lwork_zungbr_q, lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m + real(dp) :: anrm, bignum, eps, smlnum + ! Local Arrays + real(dp) :: dum(1) + complex(dp) :: cdum(1) + ! Intrinsic Functions + intrinsic :: max,min,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + minmn = min( m, n ) + wntua = stdlib_lsame( jobu, 'A' ) + wntus = stdlib_lsame( jobu, 'S' ) + wntuas = wntua .or. wntus + wntuo = stdlib_lsame( jobu, 'O' ) + wntun = stdlib_lsame( jobu, 'N' ) + wntva = stdlib_lsame( jobvt, 'A' ) + wntvs = stdlib_lsame( jobvt, 'S' ) + wntvas = wntva .or. wntvs + wntvo = stdlib_lsame( jobvt, 'O' ) + wntvn = stdlib_lsame( jobvt, 'N' ) + lquery = ( lwork==-1 ) + if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then + info = -1 + else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & + then + info = -2 + else if( m<0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda=n .and. minmn>0 ) then + ! space needed for stdlib_zbdsqr is bdspac = 5*n + mnthr = stdlib_ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 ) + ! compute space needed for stdlib_zgeqrf + call stdlib_zgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_zgeqrf = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zungqr + call stdlib_zungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_zungqr_n = int( cdum(1),KIND=ilp) + call stdlib_zungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_zungqr_m = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zgebrd + call stdlib_zgebrd( n, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, ierr ) + + lwork_zgebrd = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zungbr + call stdlib_zungbr( 'P', n, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + lwork_zungbr_p = int( cdum(1),KIND=ilp) + call stdlib_zungbr( 'Q', n, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + lwork_zungbr_q = int( cdum(1),KIND=ilp) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + maxwrk = n + lwork_zgeqrf + maxwrk = max( maxwrk, 2*n+lwork_zgebrd ) + if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2*n+lwork_zungbr_p ) + minwrk = 3*n + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + maxwrk = max( n*n+wrkbl, n*n+m*n ) + minwrk = 2*n + m + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or + ! 'a') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_p ) + maxwrk = max( n*n+wrkbl, n*n+m*n ) + minwrk = 2*n + m + else if( wntus .and. wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntus .and. wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_p ) + maxwrk = 2*n*n + wrkbl + minwrk = 2*n + m + else if( wntus .and. wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' or + ! 'a') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_n ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_p ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_p ) + maxwrk = 2*n*n + wrkbl + minwrk = 2*n + m + else if( wntua .and. wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' or + ! 'a') + wrkbl = n + lwork_zgeqrf + wrkbl = max( wrkbl, n+lwork_zungqr_m ) + wrkbl = max( wrkbl, 2*n+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_q ) + wrkbl = max( wrkbl, 2*n+lwork_zungbr_p ) + maxwrk = n*n + wrkbl + minwrk = 2*n + m + end if + else + ! path 10 (m at least n, but not much larger) + call stdlib_zgebrd( m, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, & + ierr ) + lwork_zgebrd = int( cdum(1),KIND=ilp) + maxwrk = 2*n + lwork_zgebrd + if( wntus .or. wntuo ) then + call stdlib_zungbr( 'Q', m, n, n, a, lda, cdum(1),cdum(1), -1, ierr ) + + lwork_zungbr_q = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*n+lwork_zungbr_q ) + end if + if( wntua ) then + call stdlib_zungbr( 'Q', m, m, n, a, lda, cdum(1),cdum(1), -1, ierr ) + + lwork_zungbr_q = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*n+lwork_zungbr_q ) + end if + if( .not.wntvn ) then + maxwrk = max( maxwrk, 2*n+lwork_zungbr_p ) + end if + minwrk = 2*n + m + end if + else if( minmn>0 ) then + ! space needed for stdlib_zbdsqr is bdspac = 5*m + mnthr = stdlib_ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 ) + ! compute space needed for stdlib_zgelqf + call stdlib_zgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_zgelqf = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zunglq + call stdlib_zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,ierr ) + lwork_zunglq_n = int( cdum(1),KIND=ilp) + call stdlib_zunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr ) + lwork_zunglq_m = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zgebrd + call stdlib_zgebrd( m, m, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, ierr ) + + lwork_zgebrd = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zungbr p + call stdlib_zungbr( 'P', m, m, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_zungbr_p = int( cdum(1),KIND=ilp) + ! compute space needed for stdlib_zungbr q + call stdlib_zungbr( 'Q', m, m, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_zungbr_q = int( cdum(1),KIND=ilp) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + maxwrk = m + lwork_zgelqf + maxwrk = max( maxwrk, 2*m+lwork_zgebrd ) + if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2*m+lwork_zungbr_q ) + minwrk = 3*m + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + maxwrk = max( m*m+wrkbl, m*m+m*n ) + minwrk = 2*m + n + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', + ! jobvt='o') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_q ) + maxwrk = max( m*m+wrkbl, m*m+m*n ) + minwrk = 2*m + n + else if( wntvs .and. wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntvs .and. wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_q ) + maxwrk = 2*m*m + wrkbl + minwrk = 2*m + n + else if( wntvs .and. wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_m ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_q ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_q ) + maxwrk = 2*m*m + wrkbl + minwrk = 2*m + n + else if( wntva .and. wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + wrkbl = m + lwork_zgelqf + wrkbl = max( wrkbl, m+lwork_zunglq_n ) + wrkbl = max( wrkbl, 2*m+lwork_zgebrd ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_p ) + wrkbl = max( wrkbl, 2*m+lwork_zungbr_q ) + maxwrk = m*m + wrkbl + minwrk = 2*m + n + end if + else + ! path 10t(n greater than m, but not much larger) + call stdlib_zgebrd( m, n, a, lda, s, dum(1), cdum(1),cdum(1), cdum(1), -1, & + ierr ) + lwork_zgebrd = int( cdum(1),KIND=ilp) + maxwrk = 2*m + lwork_zgebrd + if( wntvs .or. wntvo ) then + ! compute space needed for stdlib_zungbr p + call stdlib_zungbr( 'P', m, n, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_zungbr_p = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*m+lwork_zungbr_p ) + end if + if( wntva ) then + call stdlib_zungbr( 'P', n, n, m, a, n, cdum(1),cdum(1), -1, ierr ) + lwork_zungbr_p = int( cdum(1),KIND=ilp) + maxwrk = max( maxwrk, 2*m+lwork_zungbr_p ) + end if + if( .not.wntun ) then + maxwrk = max( maxwrk, 2*m+lwork_zungbr_q ) + end if + minwrk = 2*m + n + end if + end if + maxwrk = max( maxwrk, minwrk ) + work( 1 ) = maxwrk + if( lworkzero .and. anrmbignum ) then + iscl = 1 + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr ) + end if + if( m>=n ) then + ! a has at least as many rows as columns. if a has sufficiently + ! more rows than columns, first reduce using the qr + ! decomposition (if sufficient workspace available) + if( m>=mnthr ) then + if( wntun ) then + ! path 1 (m much larger than n, jobu='n') + ! no left singular vectors to be computed + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: need 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out below r + if( n > 1 ) then + call stdlib_zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),lda ) + end if + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( iwork ), lwork-iwork+1,ierr ) + ncvt = 0 + if( wntvo .or. wntvas ) then + ! if right singular vectors desired, generate p'. + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + ncvt = n + end if + irwork = ie + n + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a if desired + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,cdum, 1, cdum, & + 1, rwork( irwork ), info ) + ! if right singular vectors desired in vt, copy them there + if( wntvas )call stdlib_zlacpy( 'F', n, n, a, lda, vt, ldvt ) + else if( wntuo .and. wntvn ) then + ! path 2 (m much larger than n, jobu='o', jobvt='n') + ! n left singular vectors to be overwritten on a and + ! no right singular vectors to be computed + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*n ) then + ! work(iu) is lda by n, work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+n*n ) then + ! work(iu) is lda by n, work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n, work(ir) is n by n + ldwrku = ( lwork-n*n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to work(ir) and zero out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: need 0) + call stdlib_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,work( ir ), & + ldwrkr, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (cworkspace: need n*n+n, prefer n*n+m*n) + ! (rworkspace: 0) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( ir & + ), ldwrkr, czero,work( iu ), ldwrku ) + call stdlib_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) + ! (rworkspace: n) + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing a + ! (cworkspace: need 3*n, prefer 2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a + ! (cworkspace: need 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,a, lda, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntuo .and. wntvas ) then + ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') + ! n left singular vectors to be overwritten on a and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+n*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ldwrkr = n + else + ! work(iu) is ldwrku by n and work(ir) is n by n + ldwrku = ( lwork-n*n ) / n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_zlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt, copying result to work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) and computing right + ! singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( ir ), & + ldwrkr, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in work(iu) and copying to a + ! (cworkspace: need n*n+n, prefer n*n+m*n) + ! (rworkspace: 0) + do i = 1, m, ldwrku + chunk = min( m-i+1, ldwrku ) + call stdlib_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),lda, work( ir & + ), ldwrkr, czero,work( iu ), ldwrku ) + call stdlib_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1 ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_zlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), ldvt ) + + ! generate q in a + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: n) + call stdlib_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in a by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& + work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, cdum,& + 1, rwork( irwork ),info ) + end if + else if( wntus ) then + if( wntvn ) then + ! path 4 (m much larger than n, jobu='s', jobvt='n') + ! n left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(ir), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,1, work( ir ),& + ldwrkr, cdum, 1,rwork( irwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(ir), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & + czero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,1, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvo ) then + ! path 5 (m much larger than n, jobu='s', jobvt='o') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+3*n ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*n*n+3*n, + ! prefer 2*n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*n*n+3*n-1, + ! prefer 2*n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (cworkspace: need 2*n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, cdum, 1, rwork( irwork ),info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & + czero, u, ldu ) + ! copy right singular vectors of r to a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left vectors bidiagonalizing r + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing r in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,lda, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvas ) then + ! path 6 (m much larger than n, jobu='s', jobvt='s' + ! or 'a') + ! n left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+3*n ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ! generate q in a + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need n*n+3*n-1, + ! prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( iu )& + , ldwrku, cdum, 1,rwork( irwork ), info ) + ! multiply q in a by left singular vectors of r in + ! work(iu), storing result in u + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & + czero, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to vt, zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_zlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), & + ldvt ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + else if( wntua ) then + if( wntvn ) then + ! path 7 (m much larger than n, jobu='a', jobvt='n') + ! m left singular vectors to be computed in u and + ! no right singular vectors to be computed + if( lwork>=n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(ir) is lda by n + ldwrkr = lda + else + ! work(ir) is n by n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! copy r to work(ir), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) + + ! generate q in u + ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(ir) + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,1, work( ir ),& + ldwrkr, cdum, 1,rwork( irwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(ir), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_zlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,1, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvo ) then + ! path 8 (m much larger than n, jobu='a', jobvt='o') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be overwritten on a + if( lwork>=2*n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*n ) then + ! work(iu) is lda by n and work(ir) is lda by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = lda + else if( lwork>=wrkbl+( lda+n )*n ) then + ! work(iu) is lda by n and work(ir) is n by n + ldwrku = lda + ir = iu + ldwrku*n + ldwrkr = n + else + ! work(iu) is n by n and work(ir) is n by n + ldwrku = n + ir = iu + ldwrku*n + ldwrkr = n + end if + itau = ir + ldwrkr*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*n*n+3*n, + ! prefer 2*n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*n*n+3*n-1, + ! prefer 2*n*n+2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in work(ir) + ! (cworkspace: need 2*n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),work( ir ), ldwrkr, & + work( iu ),ldwrku, cdum, 1, rwork( irwork ),info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_zlacpy( 'F', m, n, a, lda, u, ldu ) + ! copy right singular vectors of r from work(ir) to a + call stdlib_zlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! zero out below r in a + if( n > 1 ) then + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,a( 2, 1 ), lda ) + + end if + ! bidiagonalize r in a + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in a + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,lda, u, ldu, & + cdum, 1, rwork( irwork ),info ) + end if + else if( wntvas ) then + ! path 9 (m much larger than n, jobu='a', jobvt='s' + ! or 'a') + ! m left singular vectors to be computed in u and + ! n right singular vectors to be computed in vt + if( lwork>=n*n+max( n+m, 3*n ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*n ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is n by n + ldwrku = n + end if + itau = iu + ldwrku*n + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r to work(iu), zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) + + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in work(iu), copying result to vt + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) + ! generate left bidiagonalizing vectors in work(iu) + ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need n*n+3*n-1, + ! prefer n*n+2*n+(n-1)*nb) + ! (rworkspace: need 0) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of r in work(iu) and computing + ! right singular vectors of r in vt + ! (cworkspace: need n*n) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,ldvt, work( iu )& + , ldwrku, cdum, 1,rwork( irwork ), info ) + ! multiply q in u by left singular vectors of r in + ! work(iu), storing result in a + ! (cworkspace: need n*n) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & + czero, a, lda ) + ! copy left singular vectors of a from a to u + call stdlib_zlacpy( 'F', m, n, a, lda, u, ldu ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + n + ! compute a=q*r, copying result to u + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: 0) + call stdlib_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + ! generate q in u + ! (cworkspace: need n+m, prefer n+m*nb) + ! (rworkspace: 0) + call stdlib_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy r from a to vt, zeroing out below it + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + if( n>1 )call stdlib_zlaset( 'L', n-1, n-1, czero, czero,vt( 2, 1 ), & + ldvt ) + ie = 1 + itauq = itau + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize r in vt + ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply q in u by left bidiagonalizing vectors + ! in vt + ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & + ldu, work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& + lwork-iwork+1, ierr ) + irwork = ie + n + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + end if + else + ! m < mnthr + ! path 10 (m at least n, but not much larger) + ! reduce to bidiagonal form without qr decomposition + ie = 1 + itauq = 1 + itaup = itauq + n + iwork = itaup + n + ! bidiagonalize a + ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) + ! (rworkspace: need n) + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) + ! (rworkspace: 0) + call stdlib_zlacpy( 'L', m, n, a, lda, u, ldu ) + if( wntus )ncu = n + if( wntua )ncu = m + call stdlib_zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zlacpy( 'U', n, n, a, lda, vt, ldvt ) + call stdlib_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*n, prefer 2*n+n*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + irwork = ie + n + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1, rwork( irwork ),info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,lda, u, ldu, cdum,& + 1, rwork( irwork ),info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1, rwork( irwork ),info ) + end if + end if + else + ! a has more columns than rows. if a has sufficiently more + ! columns than rows, first reduce using the lq decomposition (if + ! sufficient workspace available) + if( n>=mnthr ) then + if( wntvn ) then + ! path 1t(n much larger than m, jobvt='n') + ! no right singular vectors to be computed + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & + ierr ) + ! zero out above l + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),lda ) + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& + work( iwork ), lwork-iwork+1,ierr ) + if( wntuo .or. wntuas ) then + ! if left singular vectors desired, generate q + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + irwork = ie + m + nru = 0 + if( wntuo .or. wntuas )nru = m + ! perform bidiagonal qr iteration, computing left singular + ! vectors of a in a if desired + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,a, lda, cdum, & + 1, rwork( irwork ), info ) + ! if left singular vectors desired in u, copy them there + if( wntuas )call stdlib_zlacpy( 'F', m, m, a, lda, u, ldu ) + else if( wntvo .and. wntun ) then + ! path 2t(n much larger than m, jobu='n', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! no left singular vectors to be computed + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to work(ir) and zero out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) + + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& + work( itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l + ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (cworkspace: need m*m+m, prefer m*m+m*n) + ! (rworkspace: 0) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_zgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1, & + i ), lda, czero,work( iu ), ldwrku ) + call stdlib_zlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,cdum, 1, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntvo .and. wntuas ) then + ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') + ! m right singular vectors to be overwritten on a and + ! m left singular vectors to be computed in u + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=max( wrkbl, lda*n )+lda*m ) then + ! work(iu) is lda by n and work(ir) is lda by m + ldwrku = lda + chunk = n + ldwrkr = lda + else if( lwork>=max( wrkbl, lda*n )+m*m ) then + ! work(iu) is lda by n and work(ir) is m by m + ldwrku = lda + chunk = n + ldwrkr = m + else + ! work(iu) is m by chunk and work(ir) is m by m + ldwrku = m + chunk = ( lwork-m*m ) / m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing about above it + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),ldu ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u, copying result to work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) + ! generate right vectors bidiagonalizing l in work(ir) + ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & + iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u, and computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( ir ), ldwrkr, u, & + ldu, cdum, 1,rwork( irwork ), info ) + iu = itauq + ! multiply right singular vectors of l in work(ir) by q + ! in a, storing result in work(iu) and copying to a + ! (cworkspace: need m*m+m, prefer m*m+m*n)) + ! (rworkspace: 0) + do i = 1, n, chunk + blk = min( n-i+1, chunk ) + call stdlib_zgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1, & + i ), lda, czero,work( iu ), ldwrku ) + call stdlib_zlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1, i ), lda ) + + end do + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& + 1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),ldu ) + ! generate q in a + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in a + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left vectors bidiagonalizing l in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,u, ldu, cdum, & + 1, rwork( irwork ), info ) + end if + else if( wntvs ) then + if( wntun ) then + ! path 4t(n much larger than m, jobu='n', jobvt='s') + ! m right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(ir), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & + ldwrkr ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right vectors bidiagonalizing l in + ! work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & + czero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy result to vt + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,ldvt, cdum, 1, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuo ) then + ! path 5t(n much larger than m, jobu='o', jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+3*m ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out below it + call stdlib_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ! generate q in a + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*m*m+3*m, + ! prefer 2*m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*m*m+3*m-1, + ! prefer 2*m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (cworkspace: need 2*m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, cdum, 1, rwork( irwork ),info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & + czero, vt, ldvt ) + ! copy left singular vectors of l to a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right vectors bidiagonalizing l by q in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors of l in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuas ) then + ! path 6t(n much larger than m, jobu='s' or 'a', + ! jobvt='s') + ! m right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+3*m ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by n + ldwrku = lda + else + ! work(iu) is lda by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ! generate q in a + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need m*m+3*m-1, + ! prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + u, ldu, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in a, storing result in vt + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & + czero, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,u( 1, 2 ), ldu ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + else if( wntva ) then + if( wntun ) then + ! path 7t(n much larger than m, jobu='n', jobvt='a') + ! n right singular vectors to be computed in vt and + ! no left singular vectors to be computed + if( lwork>=m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + ir = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(ir) is lda by m + ldwrkr = lda + else + ! work(ir) is m by m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! copy l to work(ir), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & + ldwrkr ) + ! generate q in vt + ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(ir) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + ! generate right bidiagonalizing vectors in work(ir) + ! (cworkspace: need m*m+3*m-1, + ! prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of l in work(ir) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),work( ir ), ldwrkr, & + cdum, 1, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(ir) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_zlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,ldvt, cdum, 1, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuo ) then + ! path 8t(n much larger than m, jobu='o', jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be overwritten on a + if( lwork>=2*m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+2*lda*m ) then + ! work(iu) is lda by m and work(ir) is lda by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = lda + else if( lwork>=wrkbl+( lda+m )*m ) then + ! work(iu) is lda by m and work(ir) is m by m + ldwrku = lda + ir = iu + ldwrku*m + ldwrkr = m + else + ! work(iu) is m by m and work(ir) is m by m + ldwrku = m + ir = iu + ldwrku*m + ldwrkr = m + end if + itau = ir + ldwrkr*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to + ! work(ir) + ! (cworkspace: need 2*m*m+3*m, + ! prefer 2*m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) + + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need 2*m*m+3*m-1, + ! prefer 2*m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in work(ir) + ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & + work( iwork ),lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in work(ir) and computing + ! right singular vectors of l in work(iu) + ! (cworkspace: need 2*m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + work( ir ),ldwrkr, cdum, 1, rwork( irwork ),info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_zlacpy( 'F', m, n, a, lda, vt, ldvt ) + ! copy left singular vectors of a from work(ir) to a + call stdlib_zlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! zero out above l in a + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,a( 1, 2 ), lda ) + ! bidiagonalize l in a + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in a by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in a and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1,rwork( irwork ), info ) + end if + else if( wntuas ) then + ! path 9t(n much larger than m, jobu='s' or 'a', + ! jobvt='a') + ! n right singular vectors to be computed in vt and + ! m left singular vectors to be computed in u + if( lwork>=m*m+max( n+m, 3*m ) ) then + ! sufficient workspace for a fast algorithm + iu = 1 + if( lwork>=wrkbl+lda*m ) then + ! work(iu) is lda by m + ldwrku = lda + else + ! work(iu) is m by m + ldwrku = m + end if + itau = iu + ldwrku*m + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) + ! (rworkspace: 0) + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to work(iu), zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & + ldwrku ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in work(iu), copying result to u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & + itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) + call stdlib_zlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) + ! generate right bidiagonalizing vectors in work(iu) + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & + work( iwork ),lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of l in u and computing right + ! singular vectors of l in work(iu) + ! (cworkspace: need m*m) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),work( iu ), ldwrku, & + u, ldu, cdum, 1,rwork( irwork ), info ) + ! multiply right singular vectors of l in work(iu) by + ! q in vt, storing result in a + ! (cworkspace: need m*m) + ! (rworkspace: 0) + call stdlib_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& + czero, a, lda ) + ! copy right singular vectors of a from a to vt + call stdlib_zlacpy( 'F', m, n, a, lda, vt, ldvt ) + else + ! insufficient workspace for a fast algorithm + itau = 1 + iwork = itau + m + ! compute a=l*q, copying result to vt + ! (cworkspace: need 2*m, prefer m+m*nb) + ! (rworkspace: 0) + call stdlib_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& + iwork+1, ierr ) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + ! generate q in vt + ! (cworkspace: need m+n, prefer m+n*nb) + ! (rworkspace: 0) + call stdlib_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & + lwork-iwork+1, ierr ) + ! copy l to u, zeroing out above it + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zlaset( 'U', m-1, m-1, czero, czero,u( 1, 2 ), ldu ) + ie = 1 + itauq = itau + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize l in u + ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) + ! (rworkspace: need m) + call stdlib_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & + itaup ),work( iwork ), lwork-iwork+1, ierr ) + ! multiply right bidiagonalizing vectors in u by q + ! in vt + ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) + ! (rworkspace: 0) + call stdlib_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & + ldvt,work( iwork ), lwork-iwork+1, ierr ) + ! generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & + lwork-iwork+1, ierr ) + irwork = ie + m + ! perform bidiagonal qr iteration, computing left + ! singular vectors of a in u and computing right + ! singular vectors of a in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1,rwork( irwork ), info ) + end if + end if + end if + else + ! n < mnthr + ! path 10t(n greater than m, but not much larger) + ! reduce to bidiagonal form without lq decomposition + ie = 1 + itauq = 1 + itaup = itauq + m + iwork = itaup + m + ! bidiagonalize a + ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) + ! (rworkspace: m) + call stdlib_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & + work( iwork ), lwork-iwork+1,ierr ) + if( wntuas ) then + ! if left singular vectors desired in u, copy result to u + ! and generate left bidiagonalizing vectors in u + ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zlacpy( 'L', m, m, a, lda, u, ldu ) + call stdlib_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvas ) then + ! if right singular vectors desired in vt, copy result to + ! vt and generate right bidiagonalizing vectors in vt + ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) + ! (rworkspace: 0) + call stdlib_zlacpy( 'U', m, n, a, lda, vt, ldvt ) + if( wntva )nrvt = n + if( wntvs )nrvt = m + call stdlib_zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & + lwork-iwork+1, ierr ) + end if + if( wntuo ) then + ! if left singular vectors desired in a, generate left + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + if( wntvo ) then + ! if right singular vectors desired in a, generate right + ! bidiagonalizing vectors in a + ! (cworkspace: need 3*m, prefer 2*m+m*nb) + ! (rworkspace: 0) + call stdlib_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& + iwork+1, ierr ) + end if + irwork = ie + m + if( wntuas .or. wntuo )nru = m + if( wntun )nru = 0 + if( wntvas .or. wntvo )ncvt = n + if( wntvn )ncvt = 0 + if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, u, ldu, & + cdum, 1, rwork( irwork ),info ) + else if( ( .not.wntuo ) .and. wntvo ) then + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in u and computing right singular + ! vectors in a + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,lda, u, ldu, cdum,& + 1, rwork( irwork ),info ) + else + ! perform bidiagonal qr iteration, if desired, computing + ! left singular vectors in a and computing right singular + ! vectors in vt + ! (cworkspace: 0) + ! (rworkspace: need bdspac) + call stdlib_zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,ldvt, a, lda, & + cdum, 1, rwork( irwork ),info ) + end if + end if + end if + ! undo scaling if necessary + if( iscl==1 ) then + if( anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,& + ierr ) + if( info/=0 .and. anrm>bignum )call stdlib_dlascl( 'G', 0, 0, bignum, anrm, minmn-1,& + 1,rwork( ie ), minmn, ierr ) + if( anrm ZCGESVDQ computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + subroutine stdlib_zgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & + numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) + ! Scalar Arguments + character, intent(in) :: joba, jobp, jobr, jobu, jobv + integer(ilp), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork + integer(ilp), intent(out) :: numrank, info + integer(ilp), intent(inout) :: lcwork + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*), v(ldv,*), cwork(*) + real(dp), intent(out) :: s(*), rwork(*) + integer(ilp), intent(out) :: iwork(*) + ! ===================================================================== + + + ! Local Scalars + integer(ilp) :: ierr, nr, n1, optratio, p, q + integer(ilp) :: lwcon, lwqp3, lwrk_zgelqf, lwrk_zgesvd, lwrk_zgesvd2, lwrk_zgeqp3, & + lwrk_zgeqrf, lwrk_zunmlq, lwrk_zunmqr, lwrk_zunmqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & + lwunq, lwunq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk + logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& + rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr + real(dp) :: big, epsln, rtmp, sconda, sfmin + complex(dp) :: ctmp + ! Local Arrays + complex(dp) :: cdummy(1) + real(dp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,conjg,max,min,real,sqrt + ! Executable Statements + ! test the input arguments + wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) + wntur = stdlib_lsame( jobu, 'R' ) + wntua = stdlib_lsame( jobu, 'A' ) + wntuf = stdlib_lsame( jobu, 'F' ) + lsvc0 = wntus .or. wntur .or. wntua + lsvec = lsvc0 .or. wntuf + dntwu = stdlib_lsame( jobu, 'N' ) + wntvr = stdlib_lsame( jobv, 'R' ) + wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) + rsvec = wntvr .or. wntva + dntwv = stdlib_lsame( jobv, 'N' ) + accla = stdlib_lsame( joba, 'A' ) + acclm = stdlib_lsame( joba, 'M' ) + conda = stdlib_lsame( joba, 'E' ) + acclh = stdlib_lsame( joba, 'H' ) .or. conda + rowprm = stdlib_lsame( jobp, 'P' ) + rtrans = stdlib_lsame( jobr, 'T' ) + if ( rowprm ) then + iminwrk = max( 1, n + m - 1 ) + rminwrk = max( 2, m, 5*n ) + else + iminwrk = max( 1, n ) + rminwrk = max( 2, 5*n ) + end if + lquery = (liwork == -1 .or. lcwork == -1 .or. lrwork == -1) + info = 0 + if ( .not. ( accla .or. acclm .or. acclh ) ) then + info = -1 + else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = -2 + else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then + info = -3 + else if ( .not.( lsvec .or. dntwu ) ) then + info = -4 + else if ( wntur .and. wntva ) then + info = -5 + else if ( .not.( rsvec .or. dntwv )) then + info = -5 + else if ( m<0 ) then + info = -6 + else if ( ( n<0 ) .or. ( n>m ) ) then + info = -7 + else if ( lda big / sqrt(real(m,KIND=dp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_zlascl('G',0,0,sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + call stdlib_zlaswp( n, a, lda, 1, m-1, iwork(n+1), 1 ) + end if + ! .. at this stage, preemptive scaling is done only to avoid column + ! norms overflows during the qr factorization. the svd procedure should + ! have its own scaling to save the singular values from overflows and + ! underflows. that depends on the svd procedure. + if ( .not.rowprm ) then + rtmp = stdlib_zlange( 'M', m, n, a, lda, rwork ) + if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then + info = -8 + call stdlib_xerbla( 'ZGESVDQ', -info ) + return + end if + if ( rtmp > big / sqrt(real(m,KIND=dp)) ) then + ! .. to prevent overflow in the qr factorization, scale the + ! matrix by 1/sqrt(m) if too large entry detected + call stdlib_zlascl('G',0,0, sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) + + ascaled = .true. + end if + end if + ! Qr Factorization With Column Pivoting + ! a * p = q * [ r ] + ! [ 0 ] + do p = 1, n + ! All Columns Are Free Columns + iwork(p) = 0 + end do + call stdlib_zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,rwork, ierr ) + + ! if the user requested accuracy level allows truncation in the + ! computed upper triangular factor, the matrix r is examined and, + ! if possible, replaced with its leading upper trapezoidal part. + epsln = stdlib_dlamch('E') + sfmin = stdlib_dlamch('S') + ! small = sfmin / epsln + nr = n + if ( accla ) then + ! standard absolute error bound suffices. all sigma_i with + ! sigma_i < n*eps*||a||_f are flushed to zero. this is an + ! aggressive enforcement of lower numerical rank by introducing a + ! backward error of the order of n*eps*||a||_f. + nr = 1 + rtmp = sqrt(real(n,KIND=dp))*epsln + do p = 2, n + if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 + nr = nr + 1 + end do + 3002 continue + elseif ( acclm ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r is used as the criterion for being + ! close-to-rank-deficient. the threshold is set to epsln=stdlib_dlamch('e'). + ! [[this can be made more flexible by replacing this hard-coded value + ! with a user specified threshold.]] also, the values that underflow + ! will be truncated. + nr = 1 + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go & + to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! Rrqr Not Authorized To Determine Numerical Rank Except In The + ! obvious case of zero pivots. + ! .. inspect r for exact zeros on the diagonal; + ! r(i,i)=0 => r(i:n,i:n)=0. + nr = 1 + do p = 2, n + if ( abs(a(p,p)) == zero ) go to 3502 + nr = nr + 1 + end do + 3502 continue + if ( conda ) then + ! estimate the scaled condition number of a. use the fact that it is + ! the same as the scaled condition number of r. + ! V Is Used As Workspace + call stdlib_zlacpy( 'U', n, n, a, lda, v, ldv ) + ! only the leading nr x nr submatrix of the triangular factor + ! is considered. only if nr=n will this give a reliable error + ! bound. however, even for nr < n, this can be used on an + ! expert level and obtain useful information in the sense of + ! perturbation theory. + do p = 1, nr + rtmp = stdlib_dznrm2( p, v(1,p), 1 ) + call stdlib_zdscal( p, one/rtmp, v(1,p), 1 ) + end do + if ( .not. ( lsvec .or. rsvec ) ) then + call stdlib_zpocon( 'U', nr, v, ldv, one, rtmp,cwork, rwork, ierr ) + + else + call stdlib_zpocon( 'U', nr, v, ldv, one, rtmp,cwork(n+1), rwork, ierr ) + + end if + sconda = one / sqrt(rtmp) + ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + ! see the reference [1] for more details. + end if + endif + if ( wntur ) then + n1 = nr + else if ( wntus .or. wntuf) then + n1 = n + else if ( wntua ) then + n1 = m + end if + if ( .not. ( rsvec .or. lsvec ) ) then + ! ....................................................................... + ! Only The Singular Values Are Requested + ! ....................................................................... + if ( rtrans ) then + ! .. compute the singular values of r**h = [a](1:nr,1:n)**h + ! .. set the lower triangle of [a] to [a](1:nr,1:n)**h and + ! the upper triangle of [a] to zero. + do p = 1, min( n, nr ) + a(p,p) = conjg(a(p,p)) + do q = p + 1, n + a(q,p) = conjg(a(p,q)) + if ( q <= nr ) a(p,q) = czero + end do + end do + call stdlib_zgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & + rwork, info ) + else + ! .. compute the singular values of r = [a](1:nr,1:n) + if ( nr > 1 )call stdlib_zlaset( 'L', nr-1,nr-1, czero,czero, a(2,1), lda ) + + call stdlib_zgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & + rwork, info ) + end if + else if ( lsvec .and. ( .not. rsvec) ) then + ! ....................................................................... + ! The Singular Values And The Left Singular Vectors Requested + ! ......................................................................."""""""" + if ( rtrans ) then + ! .. apply stdlib_zgesvd to r**h + ! .. copy r**h into [u] and overwrite [u] with the right singular + ! vectors of r + do p = 1, nr + do q = p, n + u(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_zlaset( 'U', nr-1,nr-1, czero,czero, u(1,2), ldu ) + + ! .. the left singular vectors not computed, the nr right singular + ! vectors overwrite [u](1:nr,1:nr) as conjugate transposed. these + ! will be pre-multiplied by q to build the left singular vectors of a. + call stdlib_zgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, nr + u(p,p) = conjg(u(p,p)) + do q = p + 1, nr + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + else + ! Apply Stdlib_Zgesvd To R + ! .. copy r into [u] and overwrite [u] with the left singular vectors + call stdlib_zlacpy( 'U', nr, n, a, lda, u, ldu ) + if ( nr > 1 )call stdlib_zlaset( 'L', nr-1, nr-1, czero, czero, u(2,1), ldu ) + + ! .. the right singular vectors not computed, the nr left singular + ! vectors overwrite [u](1:nr,1:nr) + call stdlib_zgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of + ! r. these will be pre-multiplied by q to build the left singular + ! vectors of a. + end if + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. ( .not.wntuf ) ) then + call stdlib_zlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_zlaset( 'A',nr,n1-nr,czero,czero,u(1,nr+1), ldu ) + call stdlib_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not.wntuf )call stdlib_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & + cwork(n+1), lcwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_zlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + else if ( rsvec .and. ( .not. lsvec ) ) then + ! ....................................................................... + ! The Singular Values And The Right Singular Vectors Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_zgesvd to r**h + ! .. copy r**h into v and overwrite v with the left singular vectors + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + + ! .. the left singular vectors of r**h overwrite v, the right singular + ! vectors not computed + if ( wntvr .or. ( nr == n ) ) then + call stdlib_zgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, nr + v(p,p) = conjg(v(p,p)) + do q = p + 1, nr + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr + 1, n + v(p,q) = conjg(v(q,p)) + end do + end do + end if + call stdlib_zlapmt( .false., nr, n, v, ldv, iwork ) + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:n,1:nr) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the qr factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_zlaset('G', n, n-nr, czero, czero, v(1,nr+1), ldv) + call stdlib_zgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, n + v(p,p) = conjg(v(p,p)) + do q = p + 1, n + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + call stdlib_zlapmt( .false., n, n, v, ldv, iwork ) + end if + else + ! Aply Stdlib_Zgesvd To R + ! Copy R Into V And Overwrite V With The Right Singular Vectors + call stdlib_zlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_zlaset( 'L', nr-1, nr-1, czero, czero, v(2,1), ldv ) + + ! .. the right singular vectors overwrite v, the nr left singular + ! vectors stored in u(1:nr,1:nr) + if ( wntvr .or. ( nr == n ) ) then + call stdlib_zgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_zlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h + else + ! .. need all n right singular vectors and nr < n + ! [!] this is simple implementation that augments [v](1:nr,1:n) + ! by padding a zero block. in the case nr << n, a more efficient + ! way is to first use the lq factorization. for more details + ! how to implement this, see the " full svd " branch. + call stdlib_zlaset('G', n-nr, n, czero,czero, v(nr+1,1), ldv) + call stdlib_zgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_zlapmt( .false., n, n, v, ldv, iwork ) + end if + ! .. now [v] contains the adjoint of the matrix of the right singular + ! vectors of a. + end if + else + ! ....................................................................... + ! Full Svd Requested + ! ....................................................................... + if ( rtrans ) then + ! .. apply stdlib_zgesvd to r**h [[this option is left for r + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r**h into [v] and overwrite [v] with the left singular + ! vectors of r**h + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + + ! .. the left singular vectors of r**h overwrite [v], the nr right + ! singular vectors of r**h stored in [u](1:nr,1:nr) as conjugate + ! transposed + call stdlib_zgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + ! Assemble V + do p = 1, nr + v(p,p) = conjg(v(p,p)) + do q = p + 1, nr + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + if ( nr < n ) then + do p = 1, nr + do q = nr+1, n + v(p,q) = conjg(v(q,p)) + end do + end do + end if + call stdlib_zlapmt( .false., nr, n, v, ldv, iwork ) + do p = 1, nr + u(p,p) = conjg(u(p,p)) + do q = p + 1, nr + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_zlaset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! .. copy r**h into [v] and overwrite [v] with the left singular + ! vectors of r**h + ! [[the optimal ratio n/nr for using qrf instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio*nr > n ) then + do p = 1, nr + do q = p, n + v(q,p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_zlaset('U',nr-1,nr-1, czero,czero, v(1,2),ldv) + + call stdlib_zlaset('A',n,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_zgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & + lcwork-n, rwork, info ) + do p = 1, n + v(p,p) = conjg(v(p,p)) + do q = p + 1, n + ctmp = conjg(v(q,p)) + v(q,p) = conjg(v(p,q)) + v(p,q) = ctmp + end do + end do + call stdlib_zlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + do p = 1, n + u(p,p) = conjg(u(p,p)) + do q = p + 1, n + ctmp = conjg(u(q,p)) + u(q,p) = conjg(u(p,q)) + u(p,q) = ctmp + end do + end do + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_zlaset('A',m-n,n,czero,czero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_zlaset('A',n,n1-n,czero,czero,u(1,n+1),ldu) + call stdlib_zlaset('A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) + end if + end if + else + ! .. copy r**h into [u] and overwrite [u] with the right + ! singular vectors of r + do p = 1, nr + do q = p, n + u(q,nr+p) = conjg(a(p,q)) + end do + end do + if ( nr > 1 )call stdlib_zlaset('U',nr-1,nr-1,czero,czero,u(1,nr+2),ldu) + + call stdlib_zgeqrf( n, nr, u(1,nr+1), ldu, cwork(n+1),cwork(n+nr+1), & + lcwork-n-nr, ierr ) + do p = 1, nr + do q = 1, n + v(q,p) = conjg(u(p,nr+q)) + end do + end do + call stdlib_zlaset('U',nr-1,nr-1,czero,czero,v(1,2),ldv) + call stdlib_zgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, cwork(n+nr+& + 1),lcwork-n-nr,rwork, info ) + call stdlib_zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + call stdlib_zunmqr('R','C', n, n, nr, u(1,nr+1), ldu,cwork(n+1),v,ldv,& + cwork(n+nr+1),lcwork-n-nr,ierr) + call stdlib_zlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_zlaset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) + end if + end if + end if + end if + else + ! .. apply stdlib_zgesvd to r [[this is the recommended option]] + if ( wntvr .or. ( nr == n ) ) then + ! .. copy r into [v] and overwrite v with the right singular vectors + call stdlib_zlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_zlaset( 'L', nr-1,nr-1, czero,czero, v(2,1), ldv ) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_zgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_zlapmt( .false., nr, n, v, ldv, iwork ) + ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_zlaset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + else + ! .. need all n right singular vectors and nr < n + ! The Requested Number Of The Left Singular Vectors + ! is then n1 (n or m) + ! [[the optimal ratio n/nr for using lq instead of padding + ! with zeros. here hard coded to 2; it must be at least + ! two due to work space constraints.]] + ! optratio = stdlib_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) + ! optratio = max( optratio, 2 ) + optratio = 2 + if ( optratio * nr > n ) then + call stdlib_zlacpy( 'U', nr, n, a, lda, v, ldv ) + if ( nr > 1 )call stdlib_zlaset('L', nr-1,nr-1, czero,czero, v(2,1),ldv) + + ! .. the right singular vectors of r overwrite [v], the nr left + ! singular vectors of r stored in [u](1:nr,1:nr) + call stdlib_zlaset('A', n-nr,n, czero,czero, v(nr+1,1),ldv) + call stdlib_zgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & + lcwork-n, rwork, info ) + call stdlib_zlapmt( .false., n, n, v, ldv, iwork ) + ! .. now [v] contains the adjoint of the matrix of the right + ! singular vectors of a. the leading n left singular vectors + ! are in [u](1:n,1:n) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x n1), i.e. (m x n) or (m x m). + if ( ( n < m ) .and. .not.(wntuf)) then + call stdlib_zlaset('A',m-n,n,czero,czero,u(n+1,1),ldu) + if ( n < n1 ) then + call stdlib_zlaset('A',n,n1-n,czero,czero,u(1,n+1),ldu) + call stdlib_zlaset( 'A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) + end if + end if + else + call stdlib_zlacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu ) + if ( nr > 1 )call stdlib_zlaset('L',nr-1,nr-1,czero,czero,u(nr+2,1),ldu) + + call stdlib_zgelqf( nr, n, u(nr+1,1), ldu, cwork(n+1),cwork(n+nr+1), & + lcwork-n-nr, ierr ) + call stdlib_zlacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv) + if ( nr > 1 )call stdlib_zlaset('U',nr-1,nr-1,czero,czero,v(1,2),ldv) + + call stdlib_zgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, cwork(n+nr+& + 1), lcwork-n-nr, rwork, info ) + call stdlib_zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + call stdlib_zunmlq('R','N',n,n,nr,u(nr+1,1),ldu,cwork(n+1),v, ldv, cwork(n+& + nr+1),lcwork-n-nr,ierr) + call stdlib_zlapmt( .false., n, n, v, ldv, iwork ) + ! Assemble The Left Singular Vector Matrix U Of Dimensions + ! (m x nr) or (m x n) or (m x m). + if ( ( nr < m ) .and. .not.(wntuf)) then + call stdlib_zlaset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu) + if ( nr < n1 ) then + call stdlib_zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) + + end if + end if + end if + end if + ! .. end of the "r**h or r" branch + end if + ! the q matrix from the first qrf is built into the left singular + ! vectors matrix u. + if ( .not. wntuf )call stdlib_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & + cwork(n+1), lcwork-n, ierr ) + if ( rowprm .and. .not.wntuf )call stdlib_zlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -& + 1 ) + ! ... end of the "full svd" branch + end if + ! check whether some singular values are returned as zeros, e.g. + ! due to underflow, and update the numerical rank. + p = nr + do q = p, 1, -1 + if ( s(q) > zero ) go to 4002 + nr = nr - 1 + end do + 4002 continue + ! .. if numerical rank deficiency is detected, the truncated + ! singular values are set to zero. + if ( nr < n ) call stdlib_dlaset( 'G', n-nr,1, zero,zero, s(nr+1), n ) + ! .. undo scaling; this may cause overflow in the largest singular + ! values. + if ( ascaled )call stdlib_dlascl( 'G',0,0, one,sqrt(real(m,KIND=dp)), nr,1, s, n, ierr & + ) + if ( conda ) rwork(1) = sconda + rwork(2) = p - nr + ! .. p-nr is the number of singular values that are computed as + ! exact zeros in stdlib_zgesvd() applied to the (possibly truncated) + ! full row rank triangular (trapezoidal) factor of a. + numrank = nr + return + end subroutine stdlib_zgesvdq + + !> ZGESVX: uses the LU factorization to compute the solution to a complex + !> system of linear equations + !> A * X = B, + !> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + !> Error bounds on the solution and a condition estimate are also + !> provided. + + subroutine stdlib_zgesvx( fact, trans, n, nrhs, a, lda, af, ldaf, ipiv,equed, r, c, b, ldb, & + x, ldx, rcond, ferr, berr,work, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(inout) :: equed + character, intent(in) :: fact, trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldaf, ldb, ldx, n, nrhs + real(dp), intent(out) :: rcond + ! Array Arguments + integer(ilp), intent(inout) :: ipiv(*) + real(dp), intent(out) :: berr(*), ferr(*), rwork(*) + real(dp), intent(inout) :: c(*), r(*) + complex(dp), intent(inout) :: a(lda,*), af(ldaf,*), b(ldb,*) + complex(dp), intent(out) :: work(*), x(ldx,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: colequ, equil, nofact, notran, rowequ + character :: norm + integer(ilp) :: i, infequ, j + real(dp) :: amax, anorm, bignum, colcnd, rcmax, rcmin, rowcnd, rpvgrw, smlnum + ! Intrinsic Functions + intrinsic :: max,min + ! Executable Statements + info = 0 + nofact = stdlib_lsame( fact, 'N' ) + equil = stdlib_lsame( fact, 'E' ) + notran = stdlib_lsame( trans, 'N' ) + if( nofact .or. equil ) then + equed = 'N' + rowequ = .false. + colequ = .false. + else + rowequ = stdlib_lsame( equed, 'R' ) .or. stdlib_lsame( equed, 'B' ) + colequ = stdlib_lsame( equed, 'C' ) .or. stdlib_lsame( equed, 'B' ) + smlnum = stdlib_dlamch( 'SAFE MINIMUM' ) + bignum = one / smlnum + end if + ! test the input parameters. + if( .not.nofact .and. .not.equil .and. .not.stdlib_lsame( fact, 'F' ) )then + info = -1 + else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) .and. .not.stdlib_lsame( & + trans, 'C' ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda0 ) then + rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + rowcnd = one + end if + end if + if( colequ .and. info==0 ) then + rcmin = bignum + rcmax = zero + do j = 1, n + rcmin = min( rcmin, c( j ) ) + rcmax = max( rcmax, c( j ) ) + end do + if( rcmin<=zero ) then + info = -12 + else if( n>0 ) then + colcnd = max( rcmin, smlnum ) / min( rcmax, bignum ) + else + colcnd = one + end if + end if + if( info==0 ) then + if( ldb0 ) then + ! compute the reciprocal pivot growth factor of the + ! leading rank-deficient info columns of a. + rpvgrw = stdlib_zlantr( 'M', 'U', 'N', info, info, af, ldaf,rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_zlange( 'M', n, info, a, lda, rwork ) /rpvgrw + end if + rwork( 1 ) = rpvgrw + rcond = zero + return + end if + end if + ! compute the norm of the matrix a and the + ! reciprocal pivot growth factor rpvgrw. + if( notran ) then + norm = '1' + else + norm = 'I' + end if + anorm = stdlib_zlange( norm, n, n, a, lda, rwork ) + rpvgrw = stdlib_zlantr( 'M', 'U', 'N', n, n, af, ldaf, rwork ) + if( rpvgrw==zero ) then + rpvgrw = one + else + rpvgrw = stdlib_zlange( 'M', n, n, a, lda, rwork ) / rpvgrw + end if + ! compute the reciprocal of the condition number of a. + call stdlib_zgecon( norm, n, af, ldaf, anorm, rcond, work, rwork, info ) + ! compute the solution matrix x. + call stdlib_zlacpy( 'FULL', n, nrhs, b, ldb, x, ldx ) + call stdlib_zgetrs( trans, n, nrhs, af, ldaf, ipiv, x, ldx, info ) + ! use iterative refinement to improve the computed solution and + ! compute error bounds and backward error estimates for it. + call stdlib_zgerfs( trans, n, nrhs, a, lda, af, ldaf, ipiv, b, ldb, x,ldx, ferr, berr, & + work, rwork, info ) + ! transform the solution matrix x to a solution of the original + ! system. + if( notran ) then + if( colequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = c( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / colcnd + end do + end if + else if( rowequ ) then + do j = 1, nrhs + do i = 1, n + x( i, j ) = r( i )*x( i, j ) + end do + end do + do j = 1, nrhs + ferr( j ) = ferr( j ) / rowcnd + end do + end if + ! set info = n+1 if the matrix is singular to working precision. + if( rcond ZGETSLS: solves overdetermined or underdetermined complex linear systems + !> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ + !> factorization of A. It is assumed that A has full rank. + !> The following options are provided: + !> 1. If TRANS = 'N' and m >= n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A*X ||. + !> 2. If TRANS = 'N' and m < n: find the minimum norm solution of + !> an underdetermined system A * X = B. + !> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of + !> an undetermined system A**T * X = B. + !> 4. If TRANS = 'C' and m < n: find the least squares solution of + !> an overdetermined system, i.e., solve the least squares problem + !> minimize || B - A**T * X ||. + !> Several right hand side vectors b and solution vectors x can be + !> handled in a single call; they are stored as the columns of the + !> M-by-NRHS right hand side matrix B and the N-by-NRHS solution + !> matrix X. + + subroutine stdlib_zgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: trans + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, m, n, nrhs + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, tran + integer(ilp) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & + lw2, wsizeo, wsizem, info2 + real(dp) :: anrm, bignum, bnrm, smlnum, dum(1) + complex(dp) :: tq(5), workq(1) + ! Intrinsic Functions + intrinsic :: real,max,min,int + ! Executable Statements + ! test the input arguments. + info = 0 + maxmn = max( m, n ) + tran = stdlib_lsame( trans, 'C' ) + lquery = ( lwork==-1 .or. lwork==-2 ) + if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( nrhs<0 ) then + info = -4 + else if( lda=n ) then + call stdlib_zgeqr( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_zgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_zgeqr( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_zgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + else + call stdlib_zgelq( m, n, a, lda, tq, -1, workq, -1, info2 ) + tszo = int( tq( 1 ),KIND=ilp) + lwo = int( workq( 1 ),KIND=ilp) + call stdlib_zgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1, & + info2 ) + lwo = max( lwo, int( workq( 1 ),KIND=ilp) ) + call stdlib_zgelq( m, n, a, lda, tq, -2, workq, -2, info2 ) + tszm = int( tq( 1 ),KIND=ilp) + lwm = int( workq( 1 ),KIND=ilp) + call stdlib_zgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1, & + info2 ) + lwm = max( lwm, int( workq( 1 ),KIND=ilp) ) + wsizeo = tszo + lwo + wsizem = tszm + lwm + end if + if( ( lworkzero .and. anrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info ) + iascl = 2 + else if( anrm==zero ) then + ! matrix all zero. return zero solution. + call stdlib_zlaset( 'F', maxmn, nrhs, czero, czero, b, ldb ) + go to 50 + end if + brow = m + if ( tran ) then + brow = n + end if + bnrm = stdlib_zlange( 'M', brow, nrhs, b, ldb, dum ) + ibscl = 0 + if( bnrm>zero .and. bnrmbignum ) then + ! scale matrix norm down to bignum + call stdlib_zlascl( 'G', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,info ) + ibscl = 2 + end if + if ( m>=n ) then + ! compute qr factorization of a + call stdlib_zgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + if ( .not.tran ) then + ! least-squares problem min || a * x - b || + ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) + call stdlib_zgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& + 1 ), lw2,info ) + ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) + call stdlib_ztrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = n + else + ! overdetermined system of equations a**t * x = b + ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) + call stdlib_ztrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(n+1:m,1:nrhs) = czero + do j = 1, nrhs + do i = n + 1, m + b( i, j ) = czero + end do + end do + ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) + call stdlib_zgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + scllen = m + end if + else + ! compute lq factorization of a + call stdlib_zgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1 ), lw2, info ) + ! workspace at least m, optimally m*nb. + if( .not.tran ) then + ! underdetermined system of equations a * x = b + ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) + call stdlib_ztrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + ! b(m+1:n,1:nrhs) = 0 + do j = 1, nrhs + do i = m + 1, n + b( i, j ) = czero + end do + end do + ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) + call stdlib_zgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + scllen = n + else + ! overdetermined system min || a**t * x - b || + ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) + call stdlib_zgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & + work( 1 ), lw2,info ) + ! workspace at least nrhs, optimally nrhs*nb + ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) + call stdlib_ztrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info ) + if( info>0 ) then + return + end if + scllen = m + end if + end if + ! undo scaling + if( iascl==1 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,info ) + else if( iascl==2 ) then + call stdlib_zlascl( 'G', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,info ) + end if + if( ibscl==1 ) then + call stdlib_zlascl( 'G', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,info ) + else if( ibscl==2 ) then + call stdlib_zlascl( 'G', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,info ) + end if + 50 continue + work( 1 ) = real( tszo + lwo,KIND=dp) + return + end subroutine stdlib_zgetsls + + !> ZGETSQRHRT: computes a NB2-sized column blocked QR-factorization + !> of a complex M-by-N matrix A with M >= N, + !> A = Q * R. + !> The routine uses internally a NB1-sized column blocked and MB1-sized + !> row blocked TSQR-factorization and perfors the reconstruction + !> of the Householder vectors from the TSQR output. The routine also + !> converts the R_tsqr factor from the TSQR-factorization output into + !> the R factor that corresponds to the Householder QR-factorization, + !> A = Q_tsqr * R_tsqr = Q * R. + !> The output Q and R factors are stored in the same format as in ZGEQRT + !> (Q is in blocked compact WY-representation). See the documentation + !> of ZGEQRT for more details on the format. + + pure subroutine stdlib_zgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1 + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: t(ldt,*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, & + num_all_row_blocks + ! Intrinsic Functions + intrinsic :: ceiling,real,cmplx,max,min + ! Executable Statements + ! test the input arguments + info = 0 + lquery = lwork==-1 + if( m<0 ) then + info = -1 + else if( n<0 .or. m ZGGES: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> ZGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + + subroutine stdlib_zgges( jobvsl, jobvsr, sort, selctg, n, a, lda, b, ldb,sdim, alpha, beta, & + vsl, ldvsl, vsr, ldvsr, work,lwork, rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_z) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, iright, irows, irwrk, & + itau, iwrk, lwkmin, lwkopt + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_zggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_zgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_zunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (complex workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_zlaset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_zlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_zungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_zlaset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_zgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + call stdlib_zhgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 30 + end if + ! sort eigenvalues alpha/beta if desired + ! (workspace: none needed) + if( wantst ) then + ! undo scaling on eigenvalues before selecting + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + call stdlib_ztgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alpha,beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pvsl, pvsr,dif, work( iwrk ), lwork-iwrk+1, idum, 1, ierr ) + + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_zggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_zggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_zlascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_zlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_zlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_zlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 30 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zgges + + !> ZGGESX: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the complex Schur form (S,T), + !> and, optionally, the left and/or right matrices of Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T; computes + !> a reciprocal condition number for the average of the selected + !> eigenvalues (RCONDE); and computes a reciprocal condition number for + !> the right and left deflating subspaces corresponding to the selected + !> eigenvalues (RCONDV). The leading columns of VSL and VSR then form + !> an orthonormal basis for the corresponding left and right eigenspaces + !> (deflating subspaces). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0 or for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if T is + !> upper triangular with non-negative diagonal and S is upper + !> triangular. + + subroutine stdlib_zggesx( jobvsl, jobvsr, sort, selctg, sense, n, a, lda,b, ldb, sdim, alpha,& + beta, vsl, ldvsl, vsr,ldvsr, rconde, rcondv, work, lwork, rwork,iwork, liwork, bwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, liwork, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rconde(2), rcondv(2), rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_z) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantsb, wantse, & + wantsn, wantst, wantsv + integer(ilp) :: i, icols, ierr, ihi, ijob, ijobvl, ijobvr, ileft, ilo, iright, irows, & + irwrk, itau, iwrk, liwmin, lwrk, maxwrk, minwrk + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pl, pr, smlnum + ! Local Arrays + real(dp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 .or. liwork==-1 ) + if( wantsn ) then + ijob = 0 + else if( wantse ) then + ijob = 1 + else if( wantsv ) then + ijob = 2 + else if( wantsb ) then + ijob = 4 + end if + ! test the input arguments + info = 0 + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -5 + else if( n<0 ) then + info = -6 + else if( lda0) then + minwrk = 2*n + maxwrk = n*(1 + stdlib_ilaenv( 1, 'ZGEQRF', ' ', n, 1, n, 0 ) ) + maxwrk = max( maxwrk, n*( 1 +stdlib_ilaenv( 1, 'ZUNMQR', ' ', n, 1, n, -1 ) ) ) + + if( ilvsl ) then + maxwrk = max( maxwrk, n*( 1 +stdlib_ilaenv( 1, 'ZUNGQR', ' ', n, 1, n, -1 ) ) & + ) + end if + lwrk = maxwrk + if( ijob>=1 )lwrk = max( lwrk, n*n/2 ) + else + minwrk = 1 + maxwrk = 1 + lwrk = 1 + end if + work( 1 ) = lwrk + if( wantsn .or. n==0 ) then + liwmin = 1 + else + liwmin = n + 2 + end if + iwork( 1 ) = liwmin + if( lworkzero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_zggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_zgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the unitary transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_zunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + ! (complex workspace: need n, prefer n*nb) + if( ilvsl ) then + call stdlib_zlaset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_zlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_zungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_zlaset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + call stdlib_zgghrd( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + call stdlib_zhgeqz( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 40 + end if + ! sort eigenvalues alpha/beta and compute the reciprocal of + ! condition number(s) + if( wantst ) then + ! undo scaling on eigenvalues before selctging + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + ! reorder eigenvalues, transform generalized schur vectors, and + ! compute reciprocal condition numbers + ! (complex workspace: if ijob >= 1, need max(1, 2*sdim*(n-sdim)) + ! otherwise, need 1 ) + call stdlib_ztgsen( ijob, ilvsl, ilvsr, bwork, n, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pl, pr,dif, work( iwrk ), lwork-iwrk+1, iwork, liwork,ierr & + ) + if( ijob>=1 )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( ierr==-21 ) then + ! not enough complex workspace + info = -21 + else + if( ijob==1 .or. ijob==4 ) then + rconde( 1 ) = pl + rconde( 2 ) = pr + end if + if( ijob==2 .or. ijob==4 ) then + rcondv( 1 ) = dif( 1 ) + rcondv( 2 ) = dif( 2 ) + end if + if( ierr==1 )info = n + 3 + end if + end if + ! apply permutation to vsl and vsr + ! (workspace: none needed) + if( ilvsl )call stdlib_zggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_zggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_zlascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_zlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_zlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_zlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 40 continue + work( 1 ) = maxwrk + iwork( 1 ) = liwmin + return + end subroutine stdlib_zggesx + + !> ZGGEV: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_zggev( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, irwrk,& + itau, iwrk, jc, jr, lwkmin, lwkopt + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(dp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(dp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=dp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ! (real workspace: need 6*n) + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_zggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_zgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + ! (complex workspace: need n, prefer n*nb) + call stdlib_zunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + ! (complex workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_zlaset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_zlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_zungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_zlaset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_zgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_zgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur form and schur vectors) + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_zhgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 70 + end if + ! compute eigenvectors + ! (real workspace: need 2*n) + ! (complex workspace: need 2*n) + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_ztgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), rwork( irwrk ),ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 70 + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_zggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), n, vl,& + ldvl, ierr ) + loop_30: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp ZGGEVX: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B) the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> Optionally, it also computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for + !> the eigenvalues (RCONDE), and reciprocal condition numbers for the + !> right eigenvectors (RCONDV). + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j) . + !> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) + !> of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B. + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_zggevx( balanc, jobvl, jobvr, sense, n, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, ilo, ihi,lscale, rscale, abnrm, bbnrm, rconde, rcondv,work, lwork, rwork, & + iwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + real(dp), intent(out) :: abnrm, bbnrm + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: lscale(*), rconde(*), rcondv(*), rscale(*), rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery, noscl, wantsb, wantse, wantsn, & + wantsv + character :: chtemp + integer(ilp) :: i, icols, ierr, ijobvl, ijobvr, in, irows, itau, iwrk, iwrk1, j, jc, & + jr, m, maxwrk, minwrk + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(dp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(dp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=dp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + noscl = stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'P' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( .not.( noscl .or. stdlib_lsame( balanc,'S' ) .or.stdlib_lsame( balanc, 'B' ) ) ) & + then + info = -1 + else if( ijobvl<=0 ) then + info = -2 + else if( ijobvr<=0 ) then + info = -3 + else if( .not.( wantsn .or. wantse .or. wantsb .or. wantsv ) )then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute and/or balance the matrix pair (a,b) + ! (real workspace: need 6*n if balanc = 's' or 'b', 1 otherwise) + call stdlib_zggbal( balanc, n, a, lda, b, ldb, ilo, ihi, lscale, rscale,rwork, ierr ) + + ! compute abnrm and bbnrm + abnrm = stdlib_zlange( '1', n, n, a, lda, rwork( 1 ) ) + if( ilascl ) then + rwork( 1 ) = abnrm + call stdlib_dlascl( 'G', 0, 0, anrmto, anrm, 1, 1, rwork( 1 ), 1,ierr ) + abnrm = rwork( 1 ) + end if + bbnrm = stdlib_zlange( '1', n, n, b, ldb, rwork( 1 ) ) + if( ilbscl ) then + rwork( 1 ) = bbnrm + call stdlib_dlascl( 'G', 0, 0, bnrmto, bnrm, 1, 1, rwork( 1 ), 1,ierr ) + bbnrm = rwork( 1 ) + end if + ! reduce b to triangular form (qr decomposition of b) + ! (complex workspace: need n, prefer n*nb ) + irows = ihi + 1 - ilo + if( ilv .or. .not.wantsn ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_zgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the unitary transformation to a + ! (complex workspace: need n, prefer n*nb) + call stdlib_zunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl and/or vr + ! (workspace: need n, prefer n*nb) + if( ilvl ) then + call stdlib_zlaset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_zlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_zungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + if( ilvr )call stdlib_zlaset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + ! (workspace: none needed) + if( ilv .or. .not.wantsn ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_zgghrd( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + ierr ) + else + call stdlib_zgghrd( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur forms and schur vectors) + ! (complex workspace: need n) + ! (real workspace: need n) + iwrk = itau + if( ilv .or. .not.wantsn ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_zhgeqz( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 90 + end if + ! compute eigenvectors and estimate condition numbers if desired + ! stdlib_ztgevc: (complex workspace: need 2*n ) + ! (real workspace: need 2*n ) + ! stdlib_ztgsna: (complex workspace: need 2*n*n if sense='v' or 'b') + ! (integer workspace: need n+2 ) + if( ilv .or. .not.wantsn ) then + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_ztgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, n,& + in, work( iwrk ), rwork,ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 90 + end if + end if + if( .not.wantsn ) then + ! compute eigenvectors (stdlib_ztgevc) and estimate condition + ! numbers (stdlib_ztgsna). note that the definition of the condition + ! number is not invariant under transformation (u,v) to + ! (q*u, z*v), where (u,v) are eigenvectors of the generalized + ! schur form (s,t), q and z are orthogonal matrices. in order + ! to avoid using extra 2*n*n workspace, we have to + ! re-calculate eigenvectors and estimate the condition numbers + ! one at a time. + do i = 1, n + do j = 1, n + bwork( j ) = .false. + end do + bwork( i ) = .true. + iwrk = n + 1 + iwrk1 = iwrk + n + if( wantse .or. wantsb ) then + call stdlib_ztgevc( 'B', 'S', bwork, n, a, lda, b, ldb,work( 1 ), n, work( & + iwrk ), n, 1, m,work( iwrk1 ), rwork, ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 90 + end if + end if + call stdlib_ztgsna( sense, 'S', bwork, n, a, lda, b, ldb,work( 1 ), n, work( & + iwrk ), n, rconde( i ),rcondv( i ), 1, m, work( iwrk1 ),lwork-iwrk1+1, iwork, & + ierr ) + end do + end if + end if + ! undo balancing on vl and vr and normalization + ! (workspace: none needed) + if( ilvl ) then + call stdlib_zggbak( balanc, 'L', n, ilo, ihi, lscale, rscale, n, vl,ldvl, ierr ) + + loop_50: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp ZHBEV: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. + + subroutine stdlib_zhbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lower, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, iscale + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_zlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_zlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_zhbtrd to reduce hermitian band matrix to tridiagonal form. + inde = 1 + call stdlib_zhbtrd( jobz, uplo, n, kd, ab, ldab, w, rwork( inde ), z,ldz, work, iinfo ) + + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, call stdlib_zsteqr. + if( .not.wantz ) then + call stdlib_dsterf( n, w, rwork( inde ), info ) + else + indrwk = inde + n + call stdlib_zsteqr( jobz, n, w, rwork( inde ), z, ldz,rwork( indrwk ), info ) + + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + return + end subroutine stdlib_zhbev + + !> ZHBEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian band matrix A. If eigenvectors are desired, it + !> uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zhbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work,lwork, rwork, lrwork, & + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: kd, ldab, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indwk2, indwrk, iscale, liwmin, llrwk, llwk2, & + lrwmin, lwmin + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. liwork==-1 .or. lrwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1 + lrwmin = 1 + liwmin = 1 + else + if( wantz ) then + lwmin = 2*n**2 + lrwmin = 1 + 5*n + 2*n**2 + liwmin = 3 + 5*n + else + lwmin = n + lrwmin = n + liwmin = 1 + end if + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( kd<0 ) then + info = -4 + else if( ldabzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_zlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_zlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + end if + ! call stdlib_zhbtrd to reduce hermitian band matrix to tridiagonal form. + inde = 1 + indwrk = inde + n + indwk2 = 1 + n*n + llwk2 = lwork - indwk2 + 1 + llrwk = lrwork - indwrk + 1 + call stdlib_zhbtrd( jobz, uplo, n, kd, ab, ldab, w, rwork( inde ), z,ldz, work, iinfo ) + + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, call stdlib_zstedc. + if( .not.wantz ) then + call stdlib_dsterf( n, w, rwork( inde ), info ) + else + call stdlib_zstedc( 'I', n, w, rwork( inde ), work, n, work( indwk2 ),llwk2, rwork( & + indwrk ), llrwk, iwork, liwork,info ) + call stdlib_zgemm( 'N', 'N', n, n, n, cone, z, ldz, work, n, czero,work( indwk2 ), & + n ) + call stdlib_zlacpy( 'A', n, n, work( indwk2 ), n, z, ldz ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_zhbevd + + !> ZHBEVX: computes selected eigenvalues and, optionally, eigenvectors + !> of a complex Hermitian band matrix A. Eigenvalues and eigenvectors + !> can be selected by specifying either a range of values or a range of + !> indices for the desired eigenvalues. + + subroutine stdlib_zhbevx( jobz, range, uplo, n, kd, ab, ldab, q, ldq, vl,vu, il, iu, abstol, & + m, w, z, ldz, work, rwork,iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, kd, ldab, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ab(ldab,*) + complex(dp), intent(out) :: q(ldq,*), work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, lower, test, valeig, wantz + character :: order + integer(ilp) :: i, iinfo, imax, indd, inde, indee, indibl, indisp, indiwk, indrwk, & + indwrk, iscale, itmp1, j, jj, nsplit + real(dp) :: abstll, anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum, tmp1, vll, & + vuu + complex(dp) :: ctmp1 + ! Intrinsic Functions + intrinsic :: real,max,min,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + lower = stdlib_lsame( uplo, 'L' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( kd<0 ) then + info = -5 + else if( ldab0 .and. vu<=vl )info = -11 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -12 + else if( iun ) then + info = -13 + end if + end if + end if + if( info==0 ) then + if( ldz<1 .or. ( wantz .and. ldz=tmp1 ) )m = 0 + end if + if( m==1 ) then + w( 1 ) = real( ctmp1,KIND=dp) + if( wantz )z( 1, 1 ) = cone + end if + return + end if + ! get machine constants. + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + eps = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin / eps + bignum = one / smlnum + rmin = sqrt( smlnum ) + rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) ) + ! scale matrix to allowable range, if necessary. + iscale = 0 + abstll = abstol + if( valeig ) then + vll = vl + vuu = vu + else + vll = zero + vuu = zero + end if + anrm = stdlib_zlanhb( 'M', uplo, n, kd, ab, ldab, rwork ) + if( anrm>zero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + if( lower ) then + call stdlib_zlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info ) + else + call stdlib_zlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info ) + end if + if( abstol>0 )abstll = abstol*sigma + if( valeig ) then + vll = vl*sigma + vuu = vu*sigma + end if + end if + ! call stdlib_zhbtrd to reduce hermitian band matrix to tridiagonal form. + indd = 1 + inde = indd + n + indrwk = inde + n + indwrk = 1 + call stdlib_zhbtrd( jobz, uplo, n, kd, ab, ldab, rwork( indd ),rwork( inde ), q, ldq, & + work( indwrk ), iinfo ) + ! if all eigenvalues are desired and abstol is less than or equal + ! to zero, then call stdlib_dsterf or stdlib_zsteqr. if this fails for some + ! eigenvalue, then try stdlib_dstebz. + test = .false. + if (indeig) then + if (il==1 .and. iu==n) then + test = .true. + end if + end if + if ((alleig .or. test) .and. (abstol<=zero)) then + call stdlib_dcopy( n, rwork( indd ), 1, w, 1 ) + indee = indrwk + 2*n + if( .not.wantz ) then + call stdlib_dcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_dsterf( n, w, rwork( indee ), info ) + else + call stdlib_zlacpy( 'A', n, n, q, ldq, z, ldz ) + call stdlib_dcopy( n-1, rwork( inde ), 1, rwork( indee ), 1 ) + call stdlib_zsteqr( jobz, n, w, rwork( indee ), z, ldz,rwork( indrwk ), info ) + + if( info==0 ) then + do i = 1, n + ifail( i ) = 0 + end do + end if + end if + if( info==0 ) then + m = n + go to 30 + end if + info = 0 + end if + ! otherwise, call stdlib_dstebz and, if eigenvectors are desired, stdlib_zstein. + if( wantz ) then + order = 'B' + else + order = 'E' + end if + indibl = 1 + indisp = indibl + n + indiwk = indisp + n + call stdlib_dstebz( range, order, n, vll, vuu, il, iu, abstll,rwork( indd ), rwork( & + inde ), m, nsplit, w,iwork( indibl ), iwork( indisp ), rwork( indrwk ),iwork( indiwk ),& + info ) + if( wantz ) then + call stdlib_zstein( n, rwork( indd ), rwork( inde ), m, w,iwork( indibl ), iwork( & + indisp ), z, ldz,rwork( indrwk ), iwork( indiwk ), ifail, info ) + ! apply unitary matrix used in reduction to tridiagonal + ! form to eigenvectors returned by stdlib_zstein. + do j = 1, m + call stdlib_zcopy( n, z( 1, j ), 1, work( 1 ), 1 ) + call stdlib_zgemv( 'N', n, n, cone, q, ldq, work, 1, czero,z( 1, j ), 1 ) + end do + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + 30 continue + if( iscale==1 ) then + if( info==0 ) then + imax = m + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + ! if eigenvalues are not in order, then sort them, along with + ! eigenvectors. + if( wantz ) then + do j = 1, m - 1 + i = 0 + tmp1 = w( j ) + do jj = j + 1, m + if( w( jj ) ZHBGV: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. + + pure subroutine stdlib_zhbgv( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w, z,ldz, work, & + rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, n + ! Array Arguments + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwrk + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab ZHBGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + pure subroutine stdlib_zhbgvd( jobz, uplo, n, ka, kb, ab, ldab, bb, ldbb, w,z, ldz, work, & + lwork, rwork, lrwork, iwork,liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ka, kb, ldab, ldbb, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: vect + integer(ilp) :: iinfo, inde, indwk2, indwrk, liwmin, llrwk, llwk2, lrwmin, & + lwmin + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1+n + lrwmin = 1+n + liwmin = 1 + else if( wantz ) then + lwmin = 2*n**2 + lrwmin = 1 + 5*n + 2*n**2 + liwmin = 3 + 5*n + else + lwmin = n + lrwmin = n + liwmin = 1 + end if + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ka<0 ) then + info = -4 + else if( kb<0 .or. kb>ka ) then + info = -5 + else if( ldab ZHBGVX: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite banded eigenproblem, of + !> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian + !> and banded, and B is also positive definite. Eigenvalues and + !> eigenvectors can be selected by specifying either all eigenvalues, + !> a range of values or a range of indices for the desired eigenvalues. + + pure subroutine stdlib_zhbgvx( jobz, range, uplo, n, ka, kb, ab, ldab, bb,ldbb, q, ldq, vl, & + vu, il, iu, abstol, m, w, z,ldz, work, rwork, iwork, ifail, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, range, uplo + integer(ilp), intent(in) :: il, iu, ka, kb, ldab, ldbb, ldq, ldz, n + integer(ilp), intent(out) :: info, m + real(dp), intent(in) :: abstol, vl, vu + ! Array Arguments + integer(ilp), intent(out) :: ifail(*), iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ab(ldab,*), bb(ldbb,*) + complex(dp), intent(out) :: q(ldq,*), work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: alleig, indeig, test, upper, valeig, wantz + character :: order, vect + integer(ilp) :: i, iinfo, indd, inde, indee, indibl, indisp, indiwk, indrwk, indwrk, & + itmp1, j, jj, nsplit + real(dp) :: tmp1 + ! Intrinsic Functions + intrinsic :: min + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + alleig = stdlib_lsame( range, 'A' ) + valeig = stdlib_lsame( range, 'V' ) + indeig = stdlib_lsame( range, 'I' ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( alleig .or. valeig .or. indeig ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ka<0 ) then + info = -5 + else if( kb<0 .or. kb>ka ) then + info = -6 + else if( ldab0 .and. vu<=vl )info = -14 + else if( indeig ) then + if( il<1 .or. il>max( 1, n ) ) then + info = -15 + else if ( iun ) then + info = -16 + end if + end if + end if + if( info==0) then + if( ldz<1 .or. ( wantz .and. ldz ZHEEVD: computes all eigenvalues and, optionally, eigenvectors of a + !> complex Hermitian matrix A. If eigenvectors are desired, it uses a + !> divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zheevd( jobz, uplo, n, a, lda, w, work, lwork, rwork,lrwork, iwork, liwork,& + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lower, lquery, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwk2, indwrk, iscale, liopt, & + liwmin, llrwk, llwork, llwrk2, lopt, lropt, lrwmin, lwmin + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lower = stdlib_lsame( uplo, 'L' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( lower .or. stdlib_lsame( uplo, 'U' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 )call stdlib_zlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info ) + ! call stdlib_zhetrd to reduce hermitian matrix to tridiagonal form. + inde = 1 + indtau = 1 + indwrk = indtau + n + indrwk = inde + n + indwk2 = indwrk + n*n + llwork = lwork - indwrk + 1 + llwrk2 = lwork - indwk2 + 1 + llrwk = lrwork - indrwk + 1 + call stdlib_zhetrd( uplo, n, a, lda, w, rwork( inde ), work( indtau ),work( indwrk ), & + llwork, iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_zstedc to generate the eigenvector matrix, work(indwrk), of the + ! tridiagonal matrix, then call stdlib_zunmtr to multiply it to the + ! householder transformations represented as householder vectors in + ! a. + if( .not.wantz ) then + call stdlib_dsterf( n, w, rwork( inde ), info ) + else + call stdlib_zstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,work( indwk2 ), & + llwrk2, rwork( indrwk ), llrwk,iwork, liwork, info ) + call stdlib_zunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),work( indwrk ), n, & + work( indwk2 ), llwrk2, iinfo ) + call stdlib_zlacpy( 'A', n, n, work( indwrk ), n, a, lda ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lopt + rwork( 1 ) = lropt + iwork( 1 ) = liopt + return + end subroutine stdlib_zheevd + + !> ZHEGVD: computes all the eigenvalues, and optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian and B is also positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zhegvd( itype, jobz, uplo, n, a, lda, b, ldb, w, work,lwork, rwork, lrwork,& + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, lda, ldb, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: liopt, liwmin, lopt, lropt, lrwmin, lwmin + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( n<=1 ) then + lwmin = 1 + lrwmin = 1 + liwmin = 1 + else if( wantz ) then + lwmin = 2*n + n*n + lrwmin = 1 + 5*n + 2*n*n + liwmin = 3 + 5*n + else + lwmin = n + 1 + lrwmin = n + liwmin = 1 + end if + lopt = lwmin + lropt = lrwmin + liopt = liwmin + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( lda ZHPEVD: computes all the eigenvalues and, optionally, eigenvectors of + !> a complex Hermitian matrix A in packed storage. If eigenvectors are + !> desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zhpevd( jobz, uplo, n, ap, w, z, ldz, work, lwork,rwork, lrwork, iwork, & + liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ap(*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: lquery, wantz + integer(ilp) :: iinfo, imax, inde, indrwk, indtau, indwrk, iscale, liwmin, llrwk, & + llwrk, lrwmin, lwmin + real(dp) :: anrm, bignum, eps, rmax, rmin, safmin, sigma, smlnum + ! Intrinsic Functions + intrinsic :: sqrt + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -1 + else if( .not.( stdlib_lsame( uplo, 'L' ) .or. stdlib_lsame( uplo, 'U' ) ) )& + then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldz<1 .or. ( wantz .and. ldzzero .and. anrmrmax ) then + iscale = 1 + sigma = rmax / anrm + end if + if( iscale==1 ) then + call stdlib_zdscal( ( n*( n+1 ) ) / 2, sigma, ap, 1 ) + end if + ! call stdlib_zhptrd to reduce hermitian packed matrix to tridiagonal form. + inde = 1 + indtau = 1 + indrwk = inde + n + indwrk = indtau + n + llwrk = lwork - indwrk + 1 + llrwk = lrwork - indrwk + 1 + call stdlib_zhptrd( uplo, n, ap, w, rwork( inde ), work( indtau ),iinfo ) + ! for eigenvalues only, call stdlib_dsterf. for eigenvectors, first call + ! stdlib_zupgtr to generate the orthogonal matrix, then call stdlib_zstedc. + if( .not.wantz ) then + call stdlib_dsterf( n, w, rwork( inde ), info ) + else + call stdlib_zstedc( 'I', n, w, rwork( inde ), z, ldz, work( indwrk ),llwrk, rwork( & + indrwk ), llrwk, iwork, liwork,info ) + call stdlib_zupmtr( 'L', uplo, 'N', n, n, ap, work( indtau ), z, ldz,work( indwrk ),& + iinfo ) + end if + ! if matrix was scaled, then rescale eigenvalues appropriately. + if( iscale==1 ) then + if( info==0 ) then + imax = n + else + imax = info - 1 + end if + call stdlib_dscal( imax, one / sigma, w, 1 ) + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_zhpevd + + !> ZHPGVD: computes all the eigenvalues and, optionally, the eigenvectors + !> of a complex generalized Hermitian-definite eigenproblem, of the form + !> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and + !> B are assumed to be Hermitian, stored in packed format, and B is also + !> positive definite. + !> If eigenvectors are desired, it uses a divide and conquer algorithm. + !> The divide and conquer algorithm makes very mild assumptions about + !> floating point arithmetic. It will work on machines with a guard + !> digit in add/subtract, or on those binary machines without guard + !> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + !> Cray-2. It could conceivably fail on hexadecimal or decimal machines + !> without guard digits, but we know of none. + + subroutine stdlib_zhpgvd( itype, jobz, uplo, n, ap, bp, w, z, ldz, work,lwork, rwork, lrwork,& + iwork, liwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobz, uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: itype, ldz, liwork, lrwork, lwork, n + ! Array Arguments + integer(ilp), intent(out) :: iwork(*) + real(dp), intent(out) :: rwork(*), w(*) + complex(dp), intent(inout) :: ap(*), bp(*) + complex(dp), intent(out) :: work(*), z(ldz,*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery, upper, wantz + character :: trans + integer(ilp) :: j, liwmin, lrwmin, lwmin, neig + ! Intrinsic Functions + intrinsic :: real,max + ! Executable Statements + ! test the input parameters. + wantz = stdlib_lsame( jobz, 'V' ) + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 .or. lrwork==-1 .or. liwork==-1 ) + info = 0 + if( itype<1 .or. itype>3 ) then + info = -1 + else if( .not.( wantz .or. stdlib_lsame( jobz, 'N' ) ) ) then + info = -2 + else if( .not.( upper .or. stdlib_lsame( uplo, 'L' ) ) ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ldz<1 .or. ( wantz .and. ldz0 )neig = info - 1 + if( itype==1 .or. itype==2 ) then + ! for a*x=(lambda)*b*x and a*b*x=(lambda)*x; + ! backtransform eigenvectors: x = inv(l)**h *y or inv(u)*y + if( upper ) then + trans = 'N' + else + trans = 'C' + end if + do j = 1, neig + call stdlib_ztpsv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + else if( itype==3 ) then + ! for b*a*x=(lambda)*x; + ! backtransform eigenvectors: x = l*y or u**h *y + if( upper ) then + trans = 'C' + else + trans = 'N' + end if + do j = 1, neig + call stdlib_ztpmv( uplo, trans, 'NON-UNIT', n, bp, z( 1, j ),1 ) + end do + end if + end if + work( 1 ) = lwmin + rwork( 1 ) = lrwmin + iwork( 1 ) = liwmin + return + end subroutine stdlib_zhpgvd + + !> ZGEES: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left. + !> The leading columns of Z then form an orthonormal basis for the + !> invariant subspace corresponding to the selected eigenvalues. + !> A complex matrix is in Schur form if it is upper triangular. + + subroutine stdlib_zgees( jobvs, sort, select, n, a, lda, sdim, w, vs,ldvs, work, lwork, & + rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: vs(ldvs,*), w(*), work(*) + ! Function Arguments + procedure(stdlib_select_z) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantst, wantvs + integer(ilp) :: hswork, i, ibal, icond, ierr, ieval, ihi, ilo, itau, iwrk, maxwrk, & + minwrk + real(dp) :: anrm, bignum, cscale, eps, s, sep, smlnum + ! Local Arrays + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -4 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_zlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_zgebal( 'P', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = n + itau + call stdlib_zgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_zlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate unitary matrix in vs + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zunghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'S', jobvs, n, ilo, ihi, a, lda, w, vs, ldvs,work( iwrk ), lwork-& + iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea )call stdlib_zlascl( 'G', 0, 0, cscale, anrm, n, 1, w, n, ierr ) + do i = 1, n + bwork( i ) = select( w( i ) ) + end do + ! reorder eigenvalues and transform schur vectors + ! (cworkspace: none) + ! (rworkspace: none) + call stdlib_ztrsen( 'N', jobvs, bwork, n, a, lda, vs, ldvs, w, sdim,s, sep, work( & + iwrk ), lwork-iwrk+1, icond ) + end if + if( wantvs ) then + ! undo balancing + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_zgebak( 'P', 'R', n, ilo, ihi, rwork( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_zlascl( 'U', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_zcopy( n, a, lda+1, w, 1 ) + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_zgees + + !> ZGEESX: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues, the Schur form T, and, optionally, the matrix of Schur + !> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). + !> Optionally, it also orders the eigenvalues on the diagonal of the + !> Schur form so that selected eigenvalues are at the top left; + !> computes a reciprocal condition number for the average of the + !> selected eigenvalues (RCONDE); and computes a reciprocal condition + !> number for the right invariant subspace corresponding to the + !> selected eigenvalues (RCONDV). The leading columns of Z form an + !> orthonormal basis for this invariant subspace. + !> For further explanation of the reciprocal condition numbers RCONDE + !> and RCONDV, see Section 4.10_dp of the LAPACK Users' Guide (where + !> these quantities are called s and sep respectively). + !> A complex matrix is in Schur form if it is upper triangular. + + subroutine stdlib_zgeesx( jobvs, sort, select, sense, n, a, lda, sdim, w,vs, ldvs, rconde, & + rcondv, work, lwork, rwork,bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvs, sense, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldvs, lwork, n + real(dp), intent(out) :: rconde, rcondv + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: vs(ldvs,*), w(*), work(*) + ! Function Arguments + procedure(stdlib_select_z) :: select + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantsb, wantse, wantsn, wantst, wantsv, wantvs + integer(ilp) :: hswork, i, ibal, icond, ierr, ieval, ihi, ilo, itau, iwrk, lwrk, & + maxwrk, minwrk + real(dp) :: anrm, bignum, cscale, eps, smlnum + ! Local Arrays + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + wantvs = stdlib_lsame( jobvs, 'V' ) + wantst = stdlib_lsame( sort, 'S' ) + wantsn = stdlib_lsame( sense, 'N' ) + wantse = stdlib_lsame( sense, 'E' ) + wantsv = stdlib_lsame( sense, 'V' ) + wantsb = stdlib_lsame( sense, 'B' ) + lquery = ( lwork==-1 ) + if( ( .not.wantvs ) .and. ( .not.stdlib_lsame( jobvs, 'N' ) ) ) then + info = -1 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -2 + else if( .not.( wantsn .or. wantse .or. wantsv .or. wantsb ) .or.( .not.wantst .and. & + .not.wantsn ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_zlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! permute the matrix to make it more nearly triangular + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_zgebal( 'P', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = n + itau + call stdlib_zgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvs ) then + ! copy householder vectors to vs + call stdlib_zlacpy( 'L', n, n, a, lda, vs, ldvs ) + ! generate unitary matrix in vs + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zunghr( n, ilo, ihi, vs, ldvs, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + end if + sdim = 0 + ! perform qr iteration, accumulating schur vectors in vs if desired + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'S', jobvs, n, ilo, ihi, a, lda, w, vs, ldvs,work( iwrk ), lwork-& + iwrk+1, ieval ) + if( ieval>0 )info = ieval + ! sort eigenvalues if desired + if( wantst .and. info==0 ) then + if( scalea )call stdlib_zlascl( 'G', 0, 0, cscale, anrm, n, 1, w, n, ierr ) + do i = 1, n + bwork( i ) = select( w( i ) ) + end do + ! reorder eigenvalues, transform schur vectors, and compute + ! reciprocal condition numbers + ! (cworkspace: if sense is not 'n', need 2*sdim*(n-sdim) + ! otherwise, need none ) + ! (rworkspace: none) + call stdlib_ztrsen( sense, jobvs, bwork, n, a, lda, vs, ldvs, w, sdim,rconde, & + rcondv, work( iwrk ), lwork-iwrk+1,icond ) + if( .not.wantsn )maxwrk = max( maxwrk, 2*sdim*( n-sdim ) ) + if( icond==-14 ) then + ! not enough complex workspace + info = -15 + end if + end if + if( wantvs ) then + ! undo balancing + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_zgebak( 'P', 'R', n, ilo, ihi, rwork( ibal ), n, vs, ldvs,ierr ) + end if + if( scalea ) then + ! undo scaling for the schur form of a + call stdlib_zlascl( 'U', 0, 0, cscale, anrm, n, n, a, lda, ierr ) + call stdlib_zcopy( n, a, lda+1, w, 1 ) + if( ( wantsv .or. wantsb ) .and. info==0 ) then + dum( 1 ) = rcondv + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + rcondv = dum( 1 ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_zgeesx + + !> ZGEEV: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + + subroutine stdlib_zgeev( jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr,work, lwork, rwork, & + info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: vl(ldvl,*), vr(ldvr,*), w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr + character :: side + integer(ilp) :: hswork, i, ibal, ierr, ihi, ilo, irwork, itau, iwrk, k, lwork_trevc, & + maxwrk, minwrk, nout + real(dp) :: anrm, bignum, cscale, eps, scl, smlnum + complex(dp) :: tmp + ! Local Arrays + logical(lk) :: select(1) + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag,max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -1 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_zlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix + ! (cworkspace: none) + ! (rworkspace: need n) + ibal = 1 + call stdlib_zgebal( 'B', n, a, lda, ilo, ihi, rwork( ibal ), ierr ) + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = itau + n + call stdlib_zgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_zlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate unitary matrix in vl + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zunghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vl, ldvl,work( iwrk ), lwork-& + iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_zlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_zlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate unitary matrix in vr + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zunghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + else + ! compute eigenvalues only + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'E', 'N', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + end if + ! if info /= 0 from stdlib_zhseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (cworkspace: need 2*n, prefer n + 2*n*nb) + ! (rworkspace: need 2*n) + irwork = ibal + n + call stdlib_ztrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1,rwork( irwork ), n, ierr ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_zgebak( 'B', 'L', n, ilo, ihi, rwork( ibal ), n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_dznrm2( n, vl( 1, i ), 1 ) + call stdlib_zdscal( n, scl, vl( 1, i ), 1 ) + do k = 1, n + rwork( irwork+k-1 ) = real( vl( k, i ),KIND=dp)**2 +aimag( vl( k, i ) )& + **2 + end do + k = stdlib_idamax( n, rwork( irwork ), 1 ) + tmp = conjg( vl( k, i ) ) / sqrt( rwork( irwork+k-1 ) ) + call stdlib_zscal( n, tmp, vl( 1, i ), 1 ) + vl( k, i ) = cmplx( real( vl( k, i ),KIND=dp), zero,KIND=dp) + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + ! (cworkspace: none) + ! (rworkspace: need n) + call stdlib_zgebak( 'B', 'R', n, ilo, ihi, rwork( ibal ), n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_dznrm2( n, vr( 1, i ), 1 ) + call stdlib_zdscal( n, scl, vr( 1, i ), 1 ) + do k = 1, n + rwork( irwork+k-1 ) = real( vr( k, i ),KIND=dp)**2 +aimag( vr( k, i ) )& + **2 + end do + k = stdlib_idamax( n, rwork( irwork ), 1 ) + tmp = conjg( vr( k, i ) ) / sqrt( rwork( irwork+k-1 ) ) + call stdlib_zscal( n, tmp, vr( 1, i ), 1 ) + vr( k, i ) = cmplx( real( vr( k, i ),KIND=dp), zero,KIND=dp) + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_zlascl( 'G', 0, 0, cscale, anrm, n-info, 1, w( info+1 ),max( n-info, 1 )& + , ierr ) + if( info>0 ) then + call stdlib_zlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, w, n, ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_zgeev + + !> ZGEEVX: computes for an N-by-N complex nonsymmetric matrix A, the + !> eigenvalues and, optionally, the left and/or right eigenvectors. + !> Optionally also, it computes a balancing transformation to improve + !> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, + !> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues + !> (RCONDE), and reciprocal condition numbers for the right + !> eigenvectors (RCONDV). + !> The right eigenvector v(j) of A satisfies + !> A * v(j) = lambda(j) * v(j) + !> where lambda(j) is its eigenvalue. + !> The left eigenvector u(j) of A satisfies + !> u(j)**H * A = lambda(j) * u(j)**H + !> where u(j)**H denotes the conjugate transpose of u(j). + !> The computed eigenvectors are normalized to have Euclidean norm + !> equal to 1 and largest component real. + !> Balancing a matrix means permuting the rows and columns to make it + !> more nearly upper triangular, and applying a diagonal similarity + !> transformation D * A * D**(-1), where D is a diagonal matrix, to + !> make its rows and columns closer in norm and the condition numbers + !> of its eigenvalues and eigenvectors smaller. The computed + !> reciprocal condition numbers correspond to the balanced matrix. + !> Permuting rows and columns will not change the condition numbers + !> (in exact arithmetic) but diagonal scaling will. For further + !> explanation of balancing, see section 4.10.2_dp of the LAPACK + !> Users' Guide. + + subroutine stdlib_zgeevx( balanc, jobvl, jobvr, sense, n, a, lda, w, vl,ldvl, vr, ldvr, ilo, & + ihi, scale, abnrm, rconde,rcondv, work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: balanc, jobvl, jobvr, sense + integer(ilp), intent(out) :: ihi, ilo, info + integer(ilp), intent(in) :: lda, ldvl, ldvr, lwork, n + real(dp), intent(out) :: abnrm + ! Array Arguments + real(dp), intent(out) :: rconde(*), rcondv(*), rwork(*), scale(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: vl(ldvl,*), vr(ldvr,*), w(*), work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, scalea, wantvl, wantvr, wntsnb, wntsne, wntsnn, wntsnv + character :: job, side + integer(ilp) :: hswork, i, icond, ierr, itau, iwrk, k, lwork_trevc, maxwrk, minwrk, & + nout + real(dp) :: anrm, bignum, cscale, eps, scl, smlnum + complex(dp) :: tmp + ! Local Arrays + logical(lk) :: select(1) + real(dp) :: dum(1) + ! Intrinsic Functions + intrinsic :: real,cmplx,conjg,aimag,max,sqrt + ! Executable Statements + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + wantvl = stdlib_lsame( jobvl, 'V' ) + wantvr = stdlib_lsame( jobvr, 'V' ) + wntsnn = stdlib_lsame( sense, 'N' ) + wntsne = stdlib_lsame( sense, 'E' ) + wntsnv = stdlib_lsame( sense, 'V' ) + wntsnb = stdlib_lsame( sense, 'B' ) + if( .not.( stdlib_lsame( balanc, 'N' ) .or. stdlib_lsame( balanc, 'S' ) & + .or.stdlib_lsame( balanc, 'P' ) .or. stdlib_lsame( balanc, 'B' ) ) ) then + info = -1 + else if( ( .not.wantvl ) .and. ( .not.stdlib_lsame( jobvl, 'N' ) ) ) then + info = -2 + else if( ( .not.wantvr ) .and. ( .not.stdlib_lsame( jobvr, 'N' ) ) ) then + info = -3 + else if( .not.( wntsnn .or. wntsne .or. wntsnb .or. wntsnv ) .or.( ( wntsne .or. & + wntsnb ) .and. .not.( wantvl .and.wantvr ) ) ) then + info = -4 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + scalea = .true. + cscale = bignum + end if + if( scalea )call stdlib_zlascl( 'G', 0, 0, anrm, cscale, n, n, a, lda, ierr ) + ! balance the matrix and compute abnrm + call stdlib_zgebal( balanc, n, a, lda, ilo, ihi, scale, ierr ) + abnrm = stdlib_zlange( '1', n, n, a, lda, dum ) + if( scalea ) then + dum( 1 ) = abnrm + call stdlib_dlascl( 'G', 0, 0, cscale, anrm, 1, 1, dum, 1, ierr ) + abnrm = dum( 1 ) + end if + ! reduce to upper hessenberg form + ! (cworkspace: need 2*n, prefer n+n*nb) + ! (rworkspace: none) + itau = 1 + iwrk = itau + n + call stdlib_zgehrd( n, ilo, ihi, a, lda, work( itau ), work( iwrk ),lwork-iwrk+1, ierr & + ) + if( wantvl ) then + ! want left eigenvectors + ! copy householder vectors to vl + side = 'L' + call stdlib_zlacpy( 'L', n, n, a, lda, vl, ldvl ) + ! generate unitary matrix in vl + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zunghr( n, ilo, ihi, vl, ldvl, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vl + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vl, ldvl,work( iwrk ), lwork-& + iwrk+1, info ) + if( wantvr ) then + ! want left and right eigenvectors + ! copy schur vectors to vr + side = 'B' + call stdlib_zlacpy( 'F', n, n, vl, ldvl, vr, ldvr ) + end if + else if( wantvr ) then + ! want right eigenvectors + ! copy householder vectors to vr + side = 'R' + call stdlib_zlacpy( 'L', n, n, a, lda, vr, ldvr ) + ! generate unitary matrix in vr + ! (cworkspace: need 2*n-1, prefer n+(n-1)*nb) + ! (rworkspace: none) + call stdlib_zunghr( n, ilo, ihi, vr, ldvr, work( itau ), work( iwrk ),lwork-iwrk+1, & + ierr ) + ! perform qr iteration, accumulating schur vectors in vr + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( 'S', 'V', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + else + ! compute eigenvalues only + ! if condition numbers desired, compute schur form + if( wntsnn ) then + job = 'E' + else + job = 'S' + end if + ! (cworkspace: need 1, prefer hswork (see comments) ) + ! (rworkspace: none) + iwrk = itau + call stdlib_zhseqr( job, 'N', n, ilo, ihi, a, lda, w, vr, ldvr,work( iwrk ), lwork-& + iwrk+1, info ) + end if + ! if info /= 0 from stdlib_zhseqr, then quit + if( info/=0 )go to 50 + if( wantvl .or. wantvr ) then + ! compute left and/or right eigenvectors + ! (cworkspace: need 2*n, prefer n + 2*n*nb) + ! (rworkspace: need n) + call stdlib_ztrevc3( side, 'B', select, n, a, lda, vl, ldvl, vr, ldvr,n, nout, work(& + iwrk ), lwork-iwrk+1,rwork, n, ierr ) + end if + ! compute condition numbers if desired + ! (cworkspace: need n*n+2*n unless sense = 'e') + ! (rworkspace: need 2*n unless sense = 'e') + if( .not.wntsnn ) then + call stdlib_ztrsna( sense, 'A', select, n, a, lda, vl, ldvl, vr, ldvr,rconde, & + rcondv, n, nout, work( iwrk ), n, rwork,icond ) + end if + if( wantvl ) then + ! undo balancing of left eigenvectors + call stdlib_zgebak( balanc, 'L', n, ilo, ihi, scale, n, vl, ldvl,ierr ) + ! normalize left eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_dznrm2( n, vl( 1, i ), 1 ) + call stdlib_zdscal( n, scl, vl( 1, i ), 1 ) + do k = 1, n + rwork( k ) = real( vl( k, i ),KIND=dp)**2 +aimag( vl( k, i ) )**2 + end do + k = stdlib_idamax( n, rwork, 1 ) + tmp = conjg( vl( k, i ) ) / sqrt( rwork( k ) ) + call stdlib_zscal( n, tmp, vl( 1, i ), 1 ) + vl( k, i ) = cmplx( real( vl( k, i ),KIND=dp), zero,KIND=dp) + end do + end if + if( wantvr ) then + ! undo balancing of right eigenvectors + call stdlib_zgebak( balanc, 'R', n, ilo, ihi, scale, n, vr, ldvr,ierr ) + ! normalize right eigenvectors and make largest component real + do i = 1, n + scl = one / stdlib_dznrm2( n, vr( 1, i ), 1 ) + call stdlib_zdscal( n, scl, vr( 1, i ), 1 ) + do k = 1, n + rwork( k ) = real( vr( k, i ),KIND=dp)**2 +aimag( vr( k, i ) )**2 + end do + k = stdlib_idamax( n, rwork, 1 ) + tmp = conjg( vr( k, i ) ) / sqrt( rwork( k ) ) + call stdlib_zscal( n, tmp, vr( 1, i ), 1 ) + vr( k, i ) = cmplx( real( vr( k, i ),KIND=dp), zero,KIND=dp) + end do + end if + ! undo scaling if necessary + 50 continue + if( scalea ) then + call stdlib_zlascl( 'G', 0, 0, cscale, anrm, n-info, 1, w( info+1 ),max( n-info, 1 )& + , ierr ) + if( info==0 ) then + if( ( wntsnv .or. wntsnb ) .and. icond==0 )call stdlib_dlascl( 'G', 0, 0, cscale,& + anrm, n, 1, rcondv, n,ierr ) + else + call stdlib_zlascl( 'G', 0, 0, cscale, anrm, ilo-1, 1, w, n, ierr ) + end if + end if + work( 1 ) = maxwrk + return + end subroutine stdlib_zgeevx + + !> ZGEJSV: computes the singular value decomposition (SVD) of a complex M-by-N + !> matrix [A], where M >= N. The SVD of [A] is written as + !> [A] = [U] * [SIGMA] * [V]^*, + !> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N + !> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and + !> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are + !> the singular values of [A]. The columns of [U] and [V] are the left and + !> the right singular vectors of [A], respectively. The matrices [U] and [V] + !> are computed and stored in the arrays U and V, respectively. The diagonal + !> of [SIGMA] is computed and stored in the array SVA. + + pure subroutine stdlib_zgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & + v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldu, ldv, lwork, lrwork, m, n + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: u(ldu,*), v(ldv,*), cwork(lwork) + real(dp), intent(out) :: sva(n), rwork(lrwork) + integer(ilp), intent(out) :: iwork(*) + character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv + ! =========================================================================== + + + ! Local Scalars + complex(dp) :: ctemp + real(dp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & + entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc + integer(ilp) :: ierr, n1, nr, numrank, p, q, warning + logical(lk) :: almort, defr, errest, goscal, jracc, kill, lquery, lsvec, l2aber, & + l2kill, l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp + integer(ilp) :: optwrk, minwrk, minrwrk, miniwrk + integer(ilp) :: lwcon, lwlqf, lwqp3, lwqrf, lwunmlq, lwunmqr, lwunmqrm, lwsvdj, & + lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff + integer(ilp) :: lwrk_zgelqf, lwrk_zgeqp3, lwrk_zgeqp3n, lwrk_zgeqrf, lwrk_zgesvj, & + lwrk_zgesvjv, lwrk_zgesvju, lwrk_zunmlq, lwrk_zunmqr, lwrk_zunmqrm + ! Local Arrays + complex(dp) :: cdummy(1) + real(dp) :: rdummy(1) + ! Intrinsic Functions + intrinsic :: abs,cmplx,conjg,log,max,min,real,nint,sqrt + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + jracc = stdlib_lsame( jobv, 'J' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc + rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) + l2rank = stdlib_lsame( joba, 'R' ) + l2aber = stdlib_lsame( joba, 'A' ) + errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) + l2tran = stdlib_lsame( jobt, 'T' ) .and. ( m == n ) + l2kill = stdlib_lsame( jobr, 'R' ) + defr = stdlib_lsame( jobr, 'N' ) + l2pert = stdlib_lsame( jobp, 'P' ) + lquery = ( lwork == -1 ) .or. ( lrwork == -1 ) + if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & + then + info = - 1 + else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.( stdlib_lsame( jobu, 'W' ) & + .and. rsvec .and. l2tran ) ) ) then + info = - 2 + else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.( stdlib_lsame( jobv, 'W' ) & + .and. lsvec .and. l2tran ) ) ) then + info = - 3 + else if ( .not. ( l2kill .or. defr ) ) then + info = - 4 + else if ( .not. ( stdlib_lsame(jobt,'T') .or. stdlib_lsame(jobt,'N') ) ) then + info = - 5 + else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then + info = - 6 + else if ( m < 0 ) then + info = - 7 + else if ( ( n < 0 ) .or. ( n > m ) ) then + info = - 8 + else if ( lda < m ) then + info = - 10 + else if ( lsvec .and. ( ldu < m ) ) then + info = - 13 + else if ( rsvec .and. ( ldv < n ) ) then + info = - 15 + else + ! #:) + info = 0 + end if + if ( info == 0 ) then + ! Compute The Minimal And The Optimal Workspace Lengths + ! [[the expressions for computing the minimal and the optimal + ! values of lcwork, lrwork are written with a lot of redundancy and + ! can be simplified. however, this verbose form is useful for + ! maintenance and modifications of the code.]] + ! .. minimal workspace length for stdlib_zgeqp3 of an m x n matrix, + ! stdlib_zgeqrf of an n x n matrix, stdlib_zgelqf of an n x n matrix, + ! stdlib_zunmlq for computing n x n matrix, stdlib_zunmqr for computing n x n + ! matrix, stdlib_zunmqr for computing m x n matrix, respectively. + lwqp3 = n+1 + lwqrf = max( 1, n ) + lwlqf = max( 1, n ) + lwunmlq = max( 1, n ) + lwunmqr = max( 1, n ) + lwunmqrm = max( 1, m ) + ! Minimal Workspace Length For Stdlib_Zpocon Of An N X N Matrix + lwcon = 2 * n + ! .. minimal workspace length for stdlib_zgesvj of an n x n matrix, + ! without and with explicit accumulation of jacobi rotations + lwsvdj = max( 2 * n, 1 ) + lwsvdjv = max( 2 * n, 1 ) + ! .. minimal real workspace length for stdlib_zgeqp3, stdlib_zpocon, stdlib_zgesvj + lrwqp3 = 2 * n + lrwcon = n + lrwsvdj = n + if ( lquery ) then + call stdlib_zgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,rdummy, ierr ) + + lwrk_zgeqp3 = real( cdummy(1),KIND=dp) + call stdlib_zgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr ) + lwrk_zgeqrf = real( cdummy(1),KIND=dp) + call stdlib_zgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr ) + lwrk_zgelqf = real( cdummy(1),KIND=dp) + end if + minwrk = 2 + optwrk = 2 + miniwrk = n + if ( .not. (lsvec .or. rsvec ) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If + ! only the singular values are requested + if ( errest ) then + minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj ) + else + minwrk = max( n+lwqp3, n+lwqrf, lwsvdj ) + end if + if ( lquery ) then + call stdlib_zgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,ldv, cdummy, -1,& + rdummy, -1, ierr ) + lwrk_zgesvj = real( cdummy(1),KIND=dp) + if ( errest ) then + optwrk = max( n+lwrk_zgeqp3, n**2+lwcon,n+lwrk_zgeqrf, lwrk_zgesvj ) + + else + optwrk = max( n+lwrk_zgeqp3, n+lwrk_zgeqrf,lwrk_zgesvj ) + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else if ( rsvec .and. (.not.lsvec) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! singular values and the right singular vectors are requested + if ( errest ) then + minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,2*n+lwqrf, n+lwsvdj, n+& + lwunmlq ) + else + minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,n+lwsvdj, n+lwunmlq ) + + end if + if ( lquery ) then + call stdlib_zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1, & + rdummy, -1, ierr ) + lwrk_zgesvj = real( cdummy(1),KIND=dp) + call stdlib_zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -1, & + ierr ) + lwrk_zunmlq = real( cdummy(1),KIND=dp) + if ( errest ) then + optwrk = max( n+lwrk_zgeqp3, lwcon, lwrk_zgesvj,n+lwrk_zgelqf, 2*n+& + lwrk_zgeqrf,n+lwrk_zgesvj, n+lwrk_zunmlq ) + else + optwrk = max( n+lwrk_zgeqp3, lwrk_zgesvj,n+lwrk_zgelqf,2*n+lwrk_zgeqrf, n+& + lwrk_zgesvj,n+lwrk_zunmlq ) + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else if ( lsvec .and. (.not.rsvec) ) then + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! singular values and the left singular vectors are requested + if ( errest ) then + minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm ) + else + minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm ) + end if + if ( lquery ) then + call stdlib_zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1, & + rdummy, -1, ierr ) + lwrk_zgesvj = real( cdummy(1),KIND=dp) + call stdlib_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_zunmqrm = real( cdummy(1),KIND=dp) + if ( errest ) then + optwrk = n + max( lwrk_zgeqp3, lwcon, n+lwrk_zgeqrf,lwrk_zgesvj, & + lwrk_zunmqrm ) + else + optwrk = n + max( lwrk_zgeqp3, n+lwrk_zgeqrf,lwrk_zgesvj, lwrk_zunmqrm ) + + end if + end if + if ( l2tran .or. rowpiv ) then + if ( errest ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj ) + end if + else + if ( errest ) then + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj ) + end if + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else + ! Minimal And Optimal Sizes Of The Complex Workspace If The + ! full svd is requested + if ( .not. jracc ) then + if ( errest ) then + minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,2*n+lwqrf, 2*n+& + lwqp3,2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,2*n+n**2+n+lwsvdj, 2*n+& + n**2+n+lwsvdjv,2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,n+n**2+lwsvdj, n+& + lwunmqrm ) + else + minwrk = max( n+lwqp3, 2*n+n**2+lwcon,2*n+lwqrf, 2*n+& + lwqp3,2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,2*n+n**2+n+lwsvdj, 2*n+& + n**2+n+lwsvdjv,2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,n+n**2+lwsvdj, & + n+lwunmqrm ) + end if + miniwrk = miniwrk + n + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + else + if ( errest ) then + minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,2*n+n**2+lwsvdjv, 2*n+n**2+n+& + lwunmqr,n+lwunmqrm ) + else + minwrk = max( n+lwqp3, 2*n+lwqrf,2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,n+& + lwunmqrm ) + end if + if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m + end if + if ( lquery ) then + call stdlib_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_zunmqrm = real( cdummy(1),KIND=dp) + call stdlib_zunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,ldu, cdummy, -1, & + ierr ) + lwrk_zunmqr = real( cdummy(1),KIND=dp) + if ( .not. jracc ) then + call stdlib_zgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1,rdummy, ierr ) + + lwrk_zgeqp3n = real( cdummy(1),KIND=dp) + call stdlib_zgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_zgesvj = real( cdummy(1),KIND=dp) + call stdlib_zgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_zgesvju = real( cdummy(1),KIND=dp) + call stdlib_zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_zgesvjv = real( cdummy(1),KIND=dp) + call stdlib_zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -& + 1, ierr ) + lwrk_zunmlq = real( cdummy(1),KIND=dp) + if ( errest ) then + optwrk = max( n+lwrk_zgeqp3, n+lwcon,2*n+n**2+lwcon, 2*n+lwrk_zgeqrf,& + 2*n+lwrk_zgeqp3n,2*n+n**2+n+lwrk_zgelqf,2*n+n**2+n+n**2+lwcon,2*n+& + n**2+n+lwrk_zgesvj,2*n+n**2+n+lwrk_zgesvjv,2*n+n**2+n+lwrk_zunmqr,2*n+& + n**2+n+lwrk_zunmlq,n+n**2+lwrk_zgesvju,n+lwrk_zunmqrm ) + else + optwrk = max( n+lwrk_zgeqp3,2*n+n**2+lwcon, 2*n+lwrk_zgeqrf,2*n+& + lwrk_zgeqp3n,2*n+n**2+n+lwrk_zgelqf,2*n+n**2+n+n**2+lwcon,2*n+n**2+n+& + lwrk_zgesvj,2*n+n**2+n+lwrk_zgesvjv,2*n+n**2+n+lwrk_zunmqr,2*n+n**2+n+& + lwrk_zunmlq,n+n**2+lwrk_zgesvju,n+lwrk_zunmqrm ) + end if + else + call stdlib_zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & + -1, rdummy, -1, ierr ) + lwrk_zgesvjv = real( cdummy(1),KIND=dp) + call stdlib_zunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,v, ldv, cdummy,& + -1, ierr ) + lwrk_zunmqr = real( cdummy(1),KIND=dp) + call stdlib_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -& + 1, ierr ) + lwrk_zunmqrm = real( cdummy(1),KIND=dp) + if ( errest ) then + optwrk = max( n+lwrk_zgeqp3, n+lwcon,2*n+lwrk_zgeqrf, 2*n+n**2,2*n+& + n**2+lwrk_zgesvjv,2*n+n**2+n+lwrk_zunmqr,n+lwrk_zunmqrm ) + else + optwrk = max( n+lwrk_zgeqp3, 2*n+lwrk_zgeqrf,2*n+n**2, 2*n+n**2+& + lwrk_zgesvjv,2*n+n**2+n+lwrk_zunmqr,n+lwrk_zunmqrm ) + end if + end if + end if + if ( l2tran .or. rowpiv ) then + minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon ) + else + minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon ) + end if + end if + minwrk = max( 2, minwrk ) + optwrk = max( minwrk, optwrk ) + if ( lwork < minwrk .and. (.not.lquery) ) info = - 17 + if ( lrwork < minrwrk .and. (.not.lquery) ) info = - 19 + end if + if ( info /= 0 ) then + ! #:( + call stdlib_xerbla( 'ZGEJSV', - info ) + return + else if ( lquery ) then + cwork(1) = optwrk + cwork(2) = minwrk + rwork(1) = minrwrk + iwork(1) = max( 4, miniwrk ) + return + end if + ! quick return for void matrix (y3k safe) + ! #:) + if ( ( m == 0 ) .or. ( n == 0 ) ) then + iwork(1:4) = 0 + rwork(1:7) = 0 + return + endif + ! determine whether the matrix u should be m x n or m x m + if ( lsvec ) then + n1 = n + if ( stdlib_lsame( jobu, 'F' ) ) n1 = m + end if + ! set numerical parameters + ! ! note: make sure stdlib_dlamch() does not fail on the target architecture. + epsln = stdlib_dlamch('EPSILON') + sfmin = stdlib_dlamch('SAFEMINIMUM') + small = sfmin / epsln + big = stdlib_dlamch('O') + ! big = one / sfmin + ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n + ! (!) if necessary, scale sva() to protect the largest norm from + ! overflow. it is possible that this scaling pushes the smallest + ! column norm left from the underflow threshold (extreme case). + scalem = one / sqrt(real(m,KIND=dp)*real(n,KIND=dp)) + noscal = .true. + goscal = .true. + do p = 1, n + aapp = zero + aaqq = one + call stdlib_zlassq( m, a(1,p), 1, aapp, aaqq ) + if ( aapp > big ) then + info = - 9 + call stdlib_xerbla( 'ZGEJSV', -info ) + return + end if + aaqq = sqrt(aaqq) + if ( ( aapp < (big / aaqq) ) .and. noscal ) then + sva(p) = aapp * aaqq + else + noscal = .false. + sva(p) = aapp * ( aaqq * scalem ) + if ( goscal ) then + goscal = .false. + call stdlib_dscal( p-1, scalem, sva, 1 ) + end if + end if + end do + if ( noscal ) scalem = one + aapp = zero + aaqq = big + do p = 1, n + aapp = max( aapp, sva(p) ) + if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) + end do + ! quick return for zero m x n matrix + ! #:) + if ( aapp == zero ) then + if ( lsvec ) call stdlib_zlaset( 'G', m, n1, czero, cone, u, ldu ) + if ( rsvec ) call stdlib_zlaset( 'G', n, n, czero, cone, v, ldv ) + rwork(1) = one + rwork(2) = one + if ( errest ) rwork(3) = one + if ( lsvec .and. rsvec ) then + rwork(4) = one + rwork(5) = one + end if + if ( l2tran ) then + rwork(6) = zero + rwork(7) = zero + end if + iwork(1) = 0 + iwork(2) = 0 + iwork(3) = 0 + iwork(4) = -1 + return + end if + ! issue warning if denormalized column norms detected. override the + ! high relative accuracy request. issue licence to kill nonzero columns + ! (set them to zero) whose norm is less than sigma_max / big (roughly). + ! #:( + warning = 0 + if ( aaqq <= sfmin ) then + l2rank = .true. + l2kill = .true. + warning = 1 + end if + ! quick return for one-column matrix + ! #:) + if ( n == 1 ) then + if ( lsvec ) then + call stdlib_zlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr ) + call stdlib_zlacpy( 'A', m, 1, a, lda, u, ldu ) + ! computing all m left singular vectors of the m x 1 matrix + if ( n1 /= n ) then + call stdlib_zgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr ) + call stdlib_zungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr ) + call stdlib_zcopy( m, a(1,1), 1, u(1,1), 1 ) + end if + end if + if ( rsvec ) then + v(1,1) = cone + end if + if ( sva(1) < (big*scalem) ) then + sva(1) = sva(1) / scalem + scalem = one + end if + rwork(1) = one / scalem + rwork(2) = one + if ( sva(1) /= zero ) then + iwork(1) = 1 + if ( ( sva(1) / scalem) >= sfmin ) then + iwork(2) = 1 + else + iwork(2) = 0 + end if + else + iwork(1) = 0 + iwork(2) = 0 + end if + iwork(3) = 0 + iwork(4) = -1 + if ( errest ) rwork(3) = one + if ( lsvec .and. rsvec ) then + rwork(4) = one + rwork(5) = one + end if + if ( l2tran ) then + rwork(6) = zero + rwork(7) = zero + end if + return + end if + transp = .false. + aatmax = -one + aatmin = big + if ( rowpiv .or. l2tran ) then + ! compute the row norms, needed to determine row pivoting sequence + ! (in the case of heavily row weighted a, row pivoting is strongly + ! advised) and to collect information needed to compare the + ! structures of a * a^* and a^* * a (in the case l2tran==.true.). + if ( l2tran ) then + do p = 1, m + xsc = zero + temp1 = one + call stdlib_zlassq( n, a(p,1), lda, xsc, temp1 ) + ! stdlib_zlassq gets both the ell_2 and the ell_infinity norm + ! in one pass through the vector + rwork(m+p) = xsc * scalem + rwork(p) = xsc * (scalem*sqrt(temp1)) + aatmax = max( aatmax, rwork(p) ) + if (rwork(p) /= zero)aatmin = min(aatmin,rwork(p)) + end do + else + do p = 1, m + rwork(m+p) = scalem*abs( a(p,stdlib_izamax(n,a(p,1),lda)) ) + aatmax = max( aatmax, rwork(m+p) ) + aatmin = min( aatmin, rwork(m+p) ) + end do + end if + end if + ! for square matrix a try to determine whether a^* would be better + ! input for the preconditioned jacobi svd, with faster convergence. + ! the decision is based on an o(n) function of the vector of column + ! and row norms of a, based on the shannon entropy. this should give + ! the right choice in most cases when the difference actually matters. + ! it may fail and pick the slower converging side. + entra = zero + entrat = zero + if ( l2tran ) then + xsc = zero + temp1 = one + call stdlib_dlassq( n, sva, 1, xsc, temp1 ) + temp1 = one / temp1 + entra = zero + do p = 1, n + big1 = ( ( sva(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entra = entra + big1 * log(big1) + end do + entra = - entra / log(real(n,KIND=dp)) + ! now, sva().^2/trace(a^* * a) is a point in the probability simplex. + ! it is derived from the diagonal of a^* * a. do the same with the + ! diagonal of a * a^*, compute the entropy of the corresponding + ! probability distribution. note that a * a^* and a^* * a have the + ! same trace. + entrat = zero + do p = 1, m + big1 = ( ( rwork(p) / xsc )**2 ) * temp1 + if ( big1 /= zero ) entrat = entrat + big1 * log(big1) + end do + entrat = - entrat / log(real(m,KIND=dp)) + ! analyze the entropies and decide a or a^*. smaller entropy + ! usually means better input for the algorithm. + transp = ( entrat < entra ) + ! if a^* is better than a, take the adjoint of a. this is allowed + ! only for square matrices, m=n. + if ( transp ) then + ! in an optimal implementation, this trivial transpose + ! should be replaced with faster transpose. + do p = 1, n - 1 + a(p,p) = conjg(a(p,p)) + do q = p + 1, n + ctemp = conjg(a(q,p)) + a(q,p) = conjg(a(p,q)) + a(p,q) = ctemp + end do + end do + a(n,n) = conjg(a(n,n)) + do p = 1, n + rwork(m+p) = sva(p) + sva(p) = rwork(p) + ! previously computed row 2-norms are now column 2-norms + ! of the transposed matrix + end do + temp1 = aapp + aapp = aatmax + aatmax = temp1 + temp1 = aaqq + aaqq = aatmin + aatmin = temp1 + kill = lsvec + lsvec = rsvec + rsvec = kill + if ( lsvec ) n1 = n + rowpiv = .true. + end if + end if + ! end if l2tran + ! scale the matrix so that its maximal singular value remains less + ! than sqrt(big) -- the matrix is scaled so that its maximal column + ! has euclidean norm equal to sqrt(big/n). the only reason to keep + ! sqrt(big) instead of big is the fact that stdlib_zgejsv uses lapack and + ! blas routines that, in some implementations, are not capable of + ! working in the full interval [sfmin,big] and that they may provoke + ! overflows in the intermediate results. if the singular values spread + ! from sfmin to big, then stdlib_zgesvj will compute them. so, in that case, + ! one should use stdlib_zgesvj instead of stdlib_zgejsv. + ! >> change in the april 2016 update: allow bigger range, i.e. the + ! largest column is allowed up to big/n and stdlib_zgesvj will do the rest. + big1 = sqrt( big ) + temp1 = sqrt( big / real(n,KIND=dp) ) + ! temp1 = big/real(n,KIND=dp) + call stdlib_dlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr ) + if ( aaqq > (aapp * sfmin) ) then + aaqq = ( aaqq / aapp ) * temp1 + else + aaqq = ( aaqq * temp1 ) / aapp + end if + temp1 = temp1 * scalem + call stdlib_zlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr ) + ! to undo scaling at the end of this procedure, multiply the + ! computed singular values with uscal2 / uscal1. + uscal1 = temp1 + uscal2 = aapp + if ( l2kill ) then + ! l2kill enforces computation of nonzero singular values in + ! the restricted range of condition number of the initial a, + ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). + xsc = sqrt( sfmin ) + else + xsc = small + ! now, if the condition number of a is too big, + ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, + ! as a precaution measure, the full svd is computed using stdlib_zgesvj + ! with accumulated jacobi rotations. this provides numerically + ! more robust computation, at the cost of slightly increased run + ! time. depending on the concrete implementation of blas and lapack + ! (i.e. how they behave in presence of extreme ill-conditioning) the + ! implementor may decide to remove this switch. + if ( ( aaqq= (temp1*abs(a(1,1))) ) then + nr = nr + 1 + else + go to 3002 + end if + end do + 3002 continue + else if ( l2rank ) then + ! .. similarly as above, only slightly more gentle (less aggressive). + ! sudden drop on the diagonal of r1 is used as the criterion for + ! close-to-rank-deficient. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & + l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 + nr = nr + 1 + end do + 3402 continue + else + ! the goal is high relative accuracy. however, if the matrix + ! has high scaled condition number the relative accuracy is in + ! general not feasible. later on, a condition number estimator + ! will be deployed to estimate the scaled condition number. + ! here we just remove the underflowed part of the triangular + ! factor. this prevents the situation in which the code is + ! working hard to get the accuracy not warranted by the data. + temp1 = sqrt(sfmin) + do p = 2, n + if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to & + 3302 + nr = nr + 1 + end do + 3302 continue + end if + almort = .false. + if ( nr == n ) then + maxprj = one + do p = 2, n + temp1 = abs(a(p,p)) / sva(iwork(p)) + maxprj = min( maxprj, temp1 ) + end do + if ( maxprj**2 >= one - real(n,KIND=dp)*epsln ) almort = .true. + end if + sconda = - one + condr1 = - one + condr2 = - one + if ( errest ) then + if ( n == nr ) then + if ( rsvec ) then + ! V Is Available As Workspace + call stdlib_zlacpy( 'U', n, n, a, lda, v, ldv ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_zdscal( p, one/temp1, v(1,p), 1 ) + end do + if ( lsvec )then + call stdlib_zpocon( 'U', n, v, ldv, one, temp1,cwork(n+1), rwork, ierr ) + + else + call stdlib_zpocon( 'U', n, v, ldv, one, temp1,cwork, rwork, ierr ) + + end if + else if ( lsvec ) then + ! U Is Available As Workspace + call stdlib_zlacpy( 'U', n, n, a, lda, u, ldu ) + do p = 1, n + temp1 = sva(iwork(p)) + call stdlib_zdscal( p, one/temp1, u(1,p), 1 ) + end do + call stdlib_zpocon( 'U', n, u, ldu, one, temp1,cwork(n+1), rwork, ierr ) + + else + call stdlib_zlacpy( 'U', n, n, a, lda, cwork, n ) + ! [] call stdlib_zlacpy( 'u', n, n, a, lda, cwork(n+1), n ) + ! change: here index shifted by n to the left, cwork(1:n) + ! not needed for sigma only computation + do p = 1, n + temp1 = sva(iwork(p)) + ! [] call stdlib_zdscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 ) + call stdlib_zdscal( p, one/temp1, cwork((p-1)*n+1), 1 ) + end do + ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths + ! [] call stdlib_zpocon( 'u', n, cwork(n+1), n, one, temp1, + ! [] $ cwork(n+n*n+1), rwork, ierr ) + call stdlib_zpocon( 'U', n, cwork, n, one, temp1,cwork(n*n+1), rwork, ierr ) + + end if + if ( temp1 /= zero ) then + sconda = one / sqrt(temp1) + else + sconda = - one + end if + ! sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1). + ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda + else + sconda = - one + end if + end if + l2pert = l2pert .and. ( abs( a(1,1)/a(nr,nr) ) > sqrt(big1) ) + ! if there is no violent scaling, artificial perturbation is not needed. + ! phase 3: + if ( .not. ( rsvec .or. lsvec ) ) then + ! singular values only + ! .. transpose a(1:nr,1:n) + do p = 1, min( n-1, nr ) + call stdlib_zcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 ) + call stdlib_zlacgv( n-p+1, a(p,p), 1 ) + end do + if ( nr == n ) a(n,n) = conjg(a(n,n)) + ! the following two do-loops introduce small relative perturbation + ! into the strict upper triangle of the lower triangular matrix. + ! small entries below the main diagonal are also changed. + ! this modification is useful if the computing environment does not + ! provide/allow flush to zero underflow, for it prevents many + ! annoying denormalized numbers in case of strongly scaled matrices. + ! the perturbation is structured so that it does not introduce any + ! new perturbation of the singular values, and it does not destroy + ! the job done by the preconditioner. + ! the licence for this perturbation is in the variable l2pert, which + ! should be .false. if flush to zero underflow is active. + if ( .not. almort ) then + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=dp) + do q = 1, nr + ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=dp) + do p = 1, n + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & + ctemp + ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) + end do + end do + else + call stdlib_zlaset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda ) + end if + ! Second Preconditioning Using The Qr Factorization + call stdlib_zgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr ) + ! And Transpose Upper To Lower Triangular + do p = 1, nr - 1 + call stdlib_zcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 ) + call stdlib_zlacgv( nr-p+1, a(p,p), 1 ) + end do + end if + ! row-cyclic jacobi svd algorithm with column pivoting + ! .. again some perturbation (a "background noise") is added + ! to drown denormals + if ( l2pert ) then + ! xsc = sqrt(small) + xsc = epsln / real(n,KIND=dp) + do q = 1, nr + ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=dp) + do p = 1, nr + if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & + ctemp + ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) + end do + end do + else + call stdlib_zlaset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda ) + end if + ! .. and one-sided jacobi rotations are started on a lower + ! triangular matrix (plus perturbation which is ignored in + ! the part which destroys triangular form (confusing?!)) + call stdlib_zgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,n, v, ldv, cwork, lwork, & + rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + else if ( ( rsvec .and. ( .not. lsvec ) .and. ( .not. jracc ) ).or.( jracc .and. ( & + .not. lsvec ) .and. ( nr /= n ) ) ) then + ! -> singular values and right singular vectors <- + if ( almort ) then + ! In This Case Nr Equals N + do p = 1, nr + call stdlib_zcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_zlacgv( n-p+1, v(p,p), 1 ) + end do + call stdlib_zlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv ) + call stdlib_zgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,cwork, lwork, & + rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + else + ! .. two more qr factorizations ( one qrf is not enough, two require + ! accumulated product of jacobi rotations, three are perfect ) + call stdlib_zlaset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda ) + call stdlib_zgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr) + call stdlib_zlacpy( 'L', nr, nr, a, lda, v, ldv ) + call stdlib_zlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv ) + call stdlib_zgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr + call stdlib_zcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 ) + call stdlib_zlacgv( nr-p+1, v(p,p), 1 ) + end do + call stdlib_zlaset('U', nr-1, nr-1, czero, czero, v(1,2), ldv) + call stdlib_zgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, cwork(n+1), & + lwork-n, rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_zlaset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv ) + call stdlib_zlaset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv ) + call stdlib_zlaset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv ) + end if + call stdlib_zunmlq( 'L', 'C', n, n, nr, a, lda, cwork,v, ldv, cwork(n+1), lwork-n, & + ierr ) + end if + ! Permute The Rows Of V + ! do 8991 p = 1, n + ! call stdlib_zcopy( n, v(p,1), ldv, a(iwork(p),1), lda ) + 8991 continue + ! call stdlib_zlacpy( 'all', n, n, a, lda, v, ldv ) + call stdlib_zlapmr( .false., n, n, v, ldv, iwork ) + if ( transp ) then + call stdlib_zlacpy( 'A', n, n, v, ldv, u, ldu ) + end if + else if ( jracc .and. (.not. lsvec) .and. ( nr== n ) ) then + call stdlib_zlaset( 'L', n-1,n-1, czero, czero, a(2,1), lda ) + call stdlib_zgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,cwork, lwork, rwork, & + lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + call stdlib_zlapmr( .false., n, n, v, ldv, iwork ) + else if ( lsvec .and. ( .not. rsvec ) ) then + ! Singular Values And Left Singular Vectors + ! Second Preconditioning Step To Avoid Need To Accumulate + ! jacobi rotations in the jacobi iterations. + do p = 1, nr + call stdlib_zcopy( n-p+1, a(p,p), lda, u(p,p), 1 ) + call stdlib_zlacgv( n-p+1, u(p,p), 1 ) + end do + call stdlib_zlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + call stdlib_zgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + do p = 1, nr - 1 + call stdlib_zcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 ) + call stdlib_zlacgv( n-p+1, u(p,p), 1 ) + end do + call stdlib_zlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + call stdlib_zgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, cwork(n+1), lwork-& + n, rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < m ) then + call stdlib_zlaset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_zlaset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu ) + call stdlib_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu ) + end if + end if + call stdlib_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & + ierr ) + if ( rowpiv )call stdlib_zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + do p = 1, n1 + xsc = one / stdlib_dznrm2( m, u(1,p), 1 ) + call stdlib_zdscal( m, xsc, u(1,p), 1 ) + end do + if ( transp ) then + call stdlib_zlacpy( 'A', n, n, u, ldu, v, ldv ) + end if + else + ! Full Svd + if ( .not. jracc ) then + if ( .not. almort ) then + ! second preconditioning step (qrf [with pivoting]) + ! note that the composition of transpose, qrf and transpose is + ! equivalent to an lqf call. since in many libraries the qrf + ! seems to be better optimized than the lqf, we do explicit + ! transpose and use the qrf. this is subject to changes in an + ! optimized implementation of stdlib_zgejsv. + do p = 1, nr + call stdlib_zcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_zlacgv( n-p+1, v(p,p), 1 ) + end do + ! The Following Two Loops Perturb Small Entries To Avoid + ! denormals in the second qr factorization, where they are + ! as good as zeros. this is done to avoid painfully slow + ! computation with denormals. the relative size of the perturbation + ! is a parameter that can be changed by the implementer. + ! this perturbation device will be obsolete on machines with + ! properly implemented arithmetic. + ! to switch it off, set l2pert=.false. to remove it from the + ! code, remove the action under l2pert=.true., leave the else part. + ! the following two loops should be blocked and fused with the + ! transposed copy above. + if ( l2pert ) then + xsc = sqrt(small) + do q = 1, nr + ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=dp) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + ctemp + ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv ) + end if + ! estimate the row scaled condition number of r1 + ! (if r1 is rectangular, n > nr, then the condition number + ! of the leading nr x nr submatrix is estimated.) + call stdlib_zlacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr ) + do p = 1, nr + temp1 = stdlib_dznrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1) + call stdlib_zdscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1) + end do + call stdlib_zpocon('L',nr,cwork(2*n+1),nr,one,temp1,cwork(2*n+nr*nr+1),rwork,& + ierr) + condr1 = one / sqrt(temp1) + ! Here Need A Second Opinion On The Condition Number + ! Then Assume Worst Case Scenario + ! r1 is ok for inverse <=> condr1 < real(n,KIND=dp) + ! more conservative <=> condr1 < sqrt(real(n,KIND=dp)) + cond_ok = sqrt(sqrt(real(nr,KIND=dp))) + ! [tp] cond_ok is a tuning parameter. + if ( condr1 < cond_ok ) then + ! .. the second qrf without pivoting. note: in an optimized + ! implementation, this qrf should be implemented as the qrf + ! of a lower triangular matrix. + ! r1^* = q2 * r2 + call stdlib_zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + if ( l2pert ) then + xsc = sqrt(small)/epsln + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=dp) + if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp + ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) + end do + end do + end if + if ( nr /= n )call stdlib_zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n ) + + ! .. save ... + ! This Transposed Copy Should Be Better Than Naive + do p = 1, nr - 1 + call stdlib_zcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 ) + call stdlib_zlacgv(nr-p+1, v(p,p), 1 ) + end do + v(nr,nr)=conjg(v(nr,nr)) + condr2 = condr1 + else + ! .. ill-conditioned case: second qrf with pivoting + ! note that windowed pivoting would be equally good + ! numerically, and more run-time efficient. so, in + ! an optimal implementation, the next call to stdlib_zgeqp3 + ! should be replaced with eg. call zgeqpx (acm toms #782) + ! with properly (carefully) chosen parameters. + ! r1^* * p2 = q2 * r2 + do p = 1, nr + iwork(n+p) = 0 + end do + call stdlib_zgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),cwork(2*n+1), lwork-& + 2*n, rwork, ierr ) + ! * call stdlib_zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1), + ! * $ lwork-2*n, ierr ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=dp) + if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp + ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) + end do + end do + end if + call stdlib_zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, nr + do q = 1, p - 1 + ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=dp) + ! v(p,q) = - temp1*( v(q,p) / abs(v(q,p)) ) + v(p,q) = - ctemp + end do + end do + else + call stdlib_zlaset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv ) + end if + ! now, compute r2 = l3 * q3, the lq factorization. + call stdlib_zgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),cwork(2*n+n*nr+nr+1), & + lwork-2*n-n*nr-nr, ierr ) + ! And Estimate The Condition Number + call stdlib_zlacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr ) + do p = 1, nr + temp1 = stdlib_dznrm2( p, cwork(2*n+n*nr+nr+p), nr ) + call stdlib_zdscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr ) + end do + call stdlib_zpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,cwork(2*n+n*nr+& + nr+nr*nr+1),rwork,ierr ) + condr2 = one / sqrt(temp1) + if ( condr2 >= cond_ok ) then + ! Save The Householder Vectors Used For Q3 + ! (this overwrites the copy of r2, as it will not be + ! needed in this branch, but it does not overwritte the + ! huseholder vectors of q2.). + call stdlib_zlacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n ) + ! And The Rest Of The Information On Q3 Is In + ! work(2*n+n*nr+1:2*n+n*nr+n) + end if + end if + if ( l2pert ) then + xsc = sqrt(small) + do q = 2, nr + ctemp = xsc * v(q,q) + do p = 1, q - 1 + ! v(p,q) = - temp1*( v(p,q) / abs(v(p,q)) ) + v(p,q) = - ctemp + end do + end do + else + call stdlib_zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv ) + end if + ! second preconditioning finished; continue with jacobi svd + ! the input matrix is lower trinagular. + ! recover the right singular vectors as solution of a well + ! conditioned triangular matrix equation. + if ( condr1 < cond_ok ) then + call stdlib_zgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,cwork(2*n+n*nr+nr+1)& + ,lwork-2*n-n*nr-nr,rwork,lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, nr + call stdlib_zcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_zdscal( nr, sva(p), v(1,p), 1 ) + end do + ! Pick The Right Matrix Equation And Solve It + if ( nr == n ) then + ! :)) .. best case, r1 is inverted. the solution of this matrix + ! equation is q2*v2 = the product of the jacobi rotations + ! used in stdlib_zgesvj, premultiplied with the orthogonal matrix + ! from the second qr factorization. + call stdlib_ztrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv) + else + ! .. r1 is well conditioned, but non-square. adjoint of r2 + ! is inverted to get the product of the jacobi rotations + ! used in stdlib_zgesvj. the q-factor from the second qr + ! factorization is then built in explicitly. + call stdlib_ztrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),n,v,ldv) + if ( nr < n ) then + call stdlib_zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv) + call stdlib_zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv) + call stdlib_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_zunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(& + 2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) + end if + else if ( condr2 < cond_ok ) then + ! the matrix r2 is inverted. the solution of the matrix equation + ! is q3^* * v3 = the product of the jacobi rotations (appplied to + ! the lower triangular l3 from the lq factorization of + ! r2=l3*q3), pre-multiplied with the transposed q3. + call stdlib_zgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, nr + call stdlib_zcopy( nr, v(1,p), 1, u(1,p), 1 ) + call stdlib_zdscal( nr, sva(p), u(1,p), 1 ) + end do + call stdlib_ztrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,u,ldu) + ! Apply The Permutation From The Second Qr Factorization + do q = 1, nr + do p = 1, nr + cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = cwork(2*n+n*nr+nr+p) + end do + end do + if ( nr < n ) then + call stdlib_zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + else + ! last line of defense. + ! #:( this is a rather pathological case: no scaled condition + ! improvement after two pivoted qr factorizations. other + ! possibility is that the rank revealing qr factorization + ! or the condition estimator has failed, or the cond_ok + ! is set very close to one (which is unnecessary). normally, + ! this branch should never be executed, but in rare cases of + ! failure of the rrqr or condition estimator, the last line of + ! defense ensures that stdlib_zgejsv completes the task. + ! compute the full svd of l3 using stdlib_zgesvj with explicit + ! accumulation of jacobi rotations. + call stdlib_zgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2*n+& + n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) + end if + call stdlib_zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+& + n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) + call stdlib_zunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,cwork(2*n+n*nr+1), & + u, ldu, cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) + do q = 1, nr + do p = 1, nr + cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q) + end do + do p = 1, nr + u(p,q) = cwork(2*n+n*nr+nr+p) + end do + end do + end if + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=dp)) * epsln + do q = 1, n + do p = 1, n + cwork(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = cwork(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_dznrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_zdscal( n, xsc,& + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_zlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu) + if ( nr < n1 ) then + call stdlib_zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu) + call stdlib_zlaset('A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) + end if + end if + ! the q matrix from the first qrf is built into the left singular + ! matrix u. this applies to all cases. + call stdlib_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& + n, ierr ) + ! the columns of u are normalized. the cost is o(m*n) flops. + temp1 = sqrt(real(m,KIND=dp)) * epsln + do p = 1, nr + xsc = one / stdlib_dznrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_zdscal( m, xsc,& + u(1,p), 1 ) + end do + ! if the initial qrf is computed with row pivoting, the left + ! singular vectors must be adjusted. + if ( rowpiv )call stdlib_zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + else + ! The Initial Matrix A Has Almost Orthogonal Columns And + ! the second qrf is not needed + call stdlib_zlacpy( 'U', n, n, a, lda, cwork(n+1), n ) + if ( l2pert ) then + xsc = sqrt(small) + do p = 2, n + ctemp = xsc * cwork( n + (p-1)*n + p ) + do q = 1, p - 1 + ! cwork(n+(q-1)*n+p)=-temp1 * ( cwork(n+(p-1)*n+q) / + ! $ abs(cwork(n+(p-1)*n+q)) ) + cwork(n+(q-1)*n+p)=-ctemp + end do + end do + else + call stdlib_zlaset( 'L',n-1,n-1,czero,czero,cwork(n+2),n ) + end if + call stdlib_zgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,n, u, ldu, cwork(n+& + n*n+1), lwork-n-n*n, rwork, lrwork,info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + do p = 1, n + call stdlib_zcopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 ) + call stdlib_zdscal( n, sva(p), cwork(n+(p-1)*n+1), 1 ) + end do + call stdlib_ztrsm( 'L', 'U', 'N', 'N', n, n,cone, a, lda, cwork(n+1), n ) + do p = 1, n + call stdlib_zcopy( n, cwork(n+p), n, v(iwork(p),1), ldv ) + end do + temp1 = sqrt(real(n,KIND=dp))*epsln + do p = 1, n + xsc = one / stdlib_dznrm2( n, v(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_zdscal( n, xsc,& + v(1,p), 1 ) + end do + ! assemble the left singular vector matrix u (m x n). + if ( n < m ) then + call stdlib_zlaset( 'A', m-n, n, czero, czero, u(n+1,1), ldu ) + if ( n < n1 ) then + call stdlib_zlaset('A',n, n1-n, czero, czero, u(1,n+1),ldu) + call stdlib_zlaset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu) + end if + end if + call stdlib_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& + n, ierr ) + temp1 = sqrt(real(m,KIND=dp))*epsln + do p = 1, n1 + xsc = one / stdlib_dznrm2( m, u(1,p), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_zdscal( m, xsc,& + u(1,p), 1 ) + end do + if ( rowpiv )call stdlib_zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + end if + ! end of the >> almost orthogonal case << in the full svd + else + ! this branch deploys a preconditioned jacobi svd with explicitly + ! accumulated rotations. it is included as optional, mainly for + ! experimental purposes. it does perform well, and can also be used. + ! in this implementation, this branch will be automatically activated + ! if the condition number sigma_max(a) / sigma_min(a) is predicted + ! to be greater than the overflow threshold. this is because the + ! a posteriori computation of the singular vectors assumes robust + ! implementation of blas and some lapack procedures, capable of working + ! in presence of extreme values, e.g. when the singular values spread from + ! the underflow to the overflow threshold. + do p = 1, nr + call stdlib_zcopy( n-p+1, a(p,p), lda, v(p,p), 1 ) + call stdlib_zlacgv( n-p+1, v(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 1, nr + ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=dp) + do p = 1, n + if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & + ctemp + ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) + if ( p < q ) v(p,q) = - v(p,q) + end do + end do + else + call stdlib_zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv ) + end if + call stdlib_zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),lwork-2*n, ierr ) + + call stdlib_zlacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n ) + do p = 1, nr + call stdlib_zcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 ) + call stdlib_zlacgv( nr-p+1, u(p,p), 1 ) + end do + if ( l2pert ) then + xsc = sqrt(small/epsln) + do q = 2, nr + do p = 1, q - 1 + ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),zero,KIND=dp) + ! u(p,q) = - temp1 * ( u(q,p) / abs(u(q,p)) ) + u(p,q) = - ctemp + end do + end do + else + call stdlib_zlaset('U', nr-1, nr-1, czero, czero, u(1,2), ldu ) + end if + call stdlib_zgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, cwork(2*n+n*nr+1),& + lwork-2*n-n*nr,rwork, lrwork, info ) + scalem = rwork(1) + numrank = nint(rwork(2),KIND=ilp) + if ( nr < n ) then + call stdlib_zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv ) + call stdlib_zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv ) + call stdlib_zlaset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv ) + end if + call stdlib_zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),v,ldv,cwork(2*n+n*nr+& + nr+1),lwork-2*n-n*nr-nr,ierr ) + ! permute the rows of v using the (column) permutation from the + ! first qrf. also, scale the columns to make them unit in + ! euclidean norm. this applies to all cases. + temp1 = sqrt(real(n,KIND=dp)) * epsln + do q = 1, n + do p = 1, n + cwork(2*n+n*nr+nr+iwork(p)) = v(p,q) + end do + do p = 1, n + v(p,q) = cwork(2*n+n*nr+nr+p) + end do + xsc = one / stdlib_dznrm2( n, v(1,q), 1 ) + if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib_zdscal( n, xsc,& + v(1,q), 1 ) + end do + ! at this moment, v contains the right singular vectors of a. + ! next, assemble the left singular vector matrix u (m x n). + if ( nr < m ) then + call stdlib_zlaset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu ) + if ( nr < n1 ) then + call stdlib_zlaset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu) + call stdlib_zlaset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu) + end if + end if + call stdlib_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & + ierr ) + if ( rowpiv )call stdlib_zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 ) + end if + if ( transp ) then + ! .. swap u and v because the procedure worked on a^* + do p = 1, n + call stdlib_zswap( n, u(1,p), 1, v(1,p), 1 ) + end do + end if + end if + ! end of the full svd + ! undo scaling, if necessary (and possible) + if ( uscal2 <= (big/sva(1))*uscal1 ) then + call stdlib_dlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr ) + uscal1 = one + uscal2 = one + end if + if ( nr < n ) then + do p = nr+1, n + sva(p) = zero + end do + end if + rwork(1) = uscal2 * scalem + rwork(2) = uscal1 + if ( errest ) rwork(3) = sconda + if ( lsvec .and. rsvec ) then + rwork(4) = condr1 + rwork(5) = condr2 + end if + if ( l2tran ) then + rwork(6) = entra + rwork(7) = entrat + end if + iwork(1) = nr + iwork(2) = numrank + iwork(3) = warning + if ( transp ) then + iwork(4) = 1 + else + iwork(4) = -1 + end if + return + end subroutine stdlib_zgejsv + + !> ZGESVJ: computes the singular value decomposition (SVD) of a complex + !> M-by-N matrix A, where M >= N. The SVD of A is written as + !> [++] [xx] [x0] [xx] + !> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] + !> [++] [xx] + !> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal + !> matrix, and V is an N-by-N unitary matrix. The diagonal elements + !> of SIGMA are the singular values of A. The columns of U and V are the + !> left and the right singular vectors of A, respectively. + + pure subroutine stdlib_zgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, lwork, & + rwork, lrwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, lrwork, m, mv, n + character, intent(in) :: joba, jobu, jobv + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), v(ldv,*), cwork(lwork) + real(dp), intent(inout) :: rwork(lrwork) + real(dp), intent(out) :: sva(n) + ! ===================================================================== + ! Local Parameters + integer(ilp), parameter :: nsweep = 30 + + + + ! Local Scalars + complex(dp) :: aapq, ompq + real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & + mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, temp1, & + theta, thsign, tol + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, goscale, lower, lquery, lsvec, noscale, rotok, rsvec, uctol, & + upper + ! Intrinsic Functions + intrinsic :: abs,max,min,conjg,real,sign,sqrt + ! from lapack + ! from lapack + ! Executable Statements + ! test the input arguments + lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) + uctol = stdlib_lsame( jobu, 'C' ) + rsvec = stdlib_lsame( jobv, 'V' ) .or. stdlib_lsame( jobv, 'J' ) + applv = stdlib_lsame( jobv, 'A' ) + upper = stdlib_lsame( joba, 'U' ) + lower = stdlib_lsame( joba, 'L' ) + lquery = ( lwork == -1 ) .or. ( lrwork == -1 ) + if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then + info = -1 + else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then + info = -2 + else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -3 + else if( m<0 ) then + info = -4 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -5 + else if( ldaj}|a(:,i)^* * a(:,j)| / (||a(:,i)||*||a(:,j)||) < ctol*eps + ! where eps is the round-off and ctol is defined as follows: + if( uctol ) then + ! ... user controlled + ctol = rwork( 1 ) + else + ! ... default + if( lsvec .or. rsvec .or. applv ) then + ctol = sqrt( real( m,KIND=dp) ) + else + ctol = real( m,KIND=dp) + end if + end if + ! ... and the machine dependent parameters are + ! [!] (make sure that stdlib_slamch() works properly on the target machine.) + epsln = stdlib_dlamch( 'EPSILON' ) + rooteps = sqrt( epsln ) + sfmin = stdlib_dlamch( 'SAFEMINIMUM' ) + rootsfmin = sqrt( sfmin ) + small = sfmin / epsln + big = stdlib_dlamch( 'OVERFLOW' ) + ! big = one / sfmin + rootbig = one / rootsfmin + ! large = big / sqrt( real( m*n,KIND=dp) ) + bigtheta = one / rooteps + tol = ctol*epsln + roottol = sqrt( tol ) + if( real( m,KIND=dp)*epsln>=one ) then + info = -4 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + ! initialize the right singular vector matrix. + if( rsvec ) then + mvl = n + call stdlib_zlaset( 'A', mvl, n, czero, cone, v, ldv ) + else if( applv ) then + mvl = mv + end if + rsvec = rsvec .or. applv + ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) + ! (!) if necessary, scale a to protect the largest singular value + ! from overflow. it is possible that saving the largest singular + ! value destroys the information about the small ones. + ! this initial scaling is almost minimal in the sense that the + ! goal is to make sure that no column norm overflows, and that + ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries + ! in a are detected, the procedure returns with info=-6. + skl = one / sqrt( real( m,KIND=dp)*real( n,KIND=dp) ) + noscale = .true. + goscale = .true. + if( lower ) then + ! the input matrix is m-by-n lower triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_zlassq( m-p+1, a( p, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else if( upper ) then + ! the input matrix is m-by-n upper triangular (trapezoidal) + do p = 1, n + aapp = zero + aaqq = one + call stdlib_zlassq( p, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + else + ! the input matrix is m-by-n general dense + do p = 1, n + aapp = zero + aaqq = one + call stdlib_zlassq( m, a( 1, p ), 1, aapp, aaqq ) + if( aapp>big ) then + info = -6 + call stdlib_xerbla( 'ZGESVJ', -info ) + return + end if + aaqq = sqrt( aaqq ) + if( ( aapp<( big / aaqq ) ) .and. noscale ) then + sva( p ) = aapp*aaqq + else + noscale = .false. + sva( p ) = aapp*( aaqq*skl ) + if( goscale ) then + goscale = .false. + do q = 1, p - 1 + sva( q ) = sva( q )*skl + end do + end if + end if + end do + end if + if( noscale )skl = one + ! move the smaller part of the spectrum from the underflow threshold + ! (!) start by determining the position of the nonzero entries of the + ! array sva() relative to ( sfmin, big ). + aapp = zero + aaqq = big + do p = 1, n + if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) + aapp = max( aapp, sva( p ) ) + end do + ! #:) quick return for zero matrix + if( aapp==zero ) then + if( lsvec )call stdlib_zlaset( 'G', m, n, czero, cone, a, lda ) + rwork( 1 ) = one + rwork( 2 ) = zero + rwork( 3 ) = zero + rwork( 4 ) = zero + rwork( 5 ) = zero + rwork( 6 ) = zero + return + end if + ! #:) quick return for one-column matrix + if( n==1 ) then + if( lsvec )call stdlib_zlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,a( 1, 1 ), lda, ierr ) + + rwork( 1 ) = one / skl + if( sva( 1 )>=sfmin ) then + rwork( 2 ) = one + else + rwork( 2 ) = zero + end if + rwork( 3 ) = zero + rwork( 4 ) = zero + rwork( 5 ) = zero + rwork( 6 ) = zero + return + end if + ! protect small singular values from underflow, and try to + ! avoid underflows/overflows in computing jacobi rotations. + sn = sqrt( sfmin / epsln ) + temp1 = sqrt( big / real( n,KIND=dp) ) + if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & + then + temp1 = min( big, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then + temp1 = min( sn / aaqq, big / (aapp*sqrt( real(n,KIND=dp)) ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = max( sn / aaqq, temp1 / aapp ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then + temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=dp) )*aapp ) ) + ! aaqq = aaqq*temp1 + ! aapp = aapp*temp1 + else + temp1 = one + end if + ! scale, if necessary + if( temp1/=one ) then + call stdlib_dlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr ) + end if + skl = temp1*skl + if( skl/=one ) then + call stdlib_zlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr ) + skl = one / skl + end if + ! row-cyclic jacobi svd algorithm with column pivoting + emptsw = ( n*( n-1 ) ) / 2 + notrot = 0 + do q = 1, n + cwork( q ) = cone + end do + swband = 3 + ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective + ! if stdlib_zgesvj is used as a computational routine in the preconditioned + ! jacobi svd algorithm stdlib_zgejsv. for sweeps i=1:swband the procedure + ! works on pivots inside a band-like region around the diagonal. + ! the boundaries are determined dynamically, based on the number of + ! pivots above a threshold. + kbl = min( 8, n ) + ! [tp] kbl is a tuning parameter that defines the tile size in the + ! tiling of the p-q loops of pivot pairs. in general, an optimal + ! value of kbl depends on the matrix dimensions and on the + ! parameters of the computer's memory. + nbl = n / kbl + if( ( nbl*kbl )/=n )nbl = nbl + 1 + blskip = kbl**2 + ! [tp] blkskip is a tuning parameter that depends on swband and kbl. + rowskip = min( 5, kbl ) + ! [tp] rowskip is a tuning parameter. + lkahead = 1 + ! [tp] lkahead is a tuning parameter. + ! quasi block transformations, using the lower (upper) triangular + ! structure of the input matrix. the quasi-block-cycling usually + ! invokes cubic convergence. big part of this cycle is done inside + ! canonical subspaces of dimensions less than m. + if( ( lower .or. upper ) .and. ( n>max( 64, 4*kbl ) ) ) then + ! [tp] the number of partition levels and the actual partition are + ! tuning parameters. + n4 = n / 4 + n2 = n / 2 + n34 = 3*n4 + if( applv ) then + q = 0 + else + q = 1 + end if + if( lower ) then + ! this works very well on lower triangular matrices, in particular + ! in the framework of the preconditioned jacobi svd (xgejsv). + ! the idea is simple: + ! [+ 0 0 0] note that jacobi transformations of [0 0] + ! [+ + 0 0] [0 0] + ! [+ + x 0] actually work on [x 0] [x 0] + ! [+ + x x] [x x]. [x x] + call stdlib_zgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,cwork( n34+1 ), & + sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2, cwork( n+1 ), & + lwork-n, ierr ) + call stdlib_zgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,cwork( n2+1 ), sva( & + n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,cwork( n+1 ), lwork-n, & + ierr ) + call stdlib_zgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,cwork( n2+1 ), & + sva( n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), & + lwork-n, ierr ) + call stdlib_zgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,cwork( n4+1 ), sva( & + n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), lwork-n, & + ierr ) + call stdlib_zgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & + tol, 1, cwork( n+1 ), lwork-n,ierr ) + call stdlib_zgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, cwork( n+1 ),lwork-n, ierr ) + else if( upper ) then + call stdlib_zgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & + tol, 2, cwork( n+1 ), lwork-n,ierr ) + call stdlib_zgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, cwork( n4+1 ),sva( n4+1 ), & + mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,ierr ) + + call stdlib_zgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & + sfmin, tol, 1, cwork( n+1 ),lwork-n, ierr ) + call stdlib_zgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,cwork( n2+1 ), sva( n2+1 )& + , mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,cwork( n+1 ), lwork-n, ierr ) + + end if + end if + ! .. row-cyclic pivot strategy with de rijk's pivoting .. + loop_1993: do i = 1, nsweep + ! .. go go go ... + mxaapq = zero + mxsinj = zero + iswrot = 0 + notrot = 0 + pskipped = 0 + ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs + ! 1 <= p < q <= n. this is the first step toward a blocked implementation + ! of the rotations. new implementation, based on block transformations, + ! is under development. + loop_2000: do ibr = 1, nbl + igl = ( ibr-1 )*kbl + 1 + loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) + igl = igl + ir1*kbl + loop_2001: do p = igl, min( igl+kbl-1, n-1 ) + ! .. de rijk's pivoting + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + call stdlib_zswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_zswap( mvl, v( 1, p ), 1,v( 1, q ), 1 ) + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = cwork(p) + cwork(p) = cwork(q) + cwork(q) = aapq + end if + if( ir1==0 ) then + ! column norms are periodically updated by explicit + ! norm computation. + ! [!] caveat: + ! unfortunately, some blas implementations compute stdlib_dznrm2(m,a(1,p),1) + ! as sqrt(s=stdlib_cdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to + ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_dznrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_scnrm2 is available, the if-then-else-end if + ! below should be replaced with "aapp = stdlib_dznrm2( m, a(1,p), 1 )". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + temp1 = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_zcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, cwork(n+1), & + lda, ierr ) + aapq = stdlib_zdotc( m, cwork(n+1), 1,a( 1, q ), 1 ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aapp ) / aaqq + else + call stdlib_zcopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_zdotc( m, a(1, p ), 1,cwork(n+1), 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg( cwork(p) ) * cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq1 + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if ( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if ( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + cwork(p) = -cwork(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_zcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp, one, m,1, cwork(n+1), & + lda,ierr ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + call stdlib_zaxpy( m, -aapq, cwork(n+1), 1,a( 1, q ), 1 ) + + call stdlib_zlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dznrm2( m, a( 1, q ), 1 ) + else + t = zero + aaqq = one + call stdlib_zlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_zcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_zdotc( m, cwork(n+1), 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_zcopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq,one, m, 1,cwork(n+1), & + lda, ierr ) + aapq = stdlib_zdotc( m, a( 1, p ), 1,cwork(n+1), 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + cwork(p) = -cwork(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_zcopy( m, a( 1, p ), 1,cwork(n+1), 1 ) + + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, cwork(n+1)& + ,lda,ierr ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_zaxpy( m, -aapq, cwork(n+1),1, a( 1, q ), 1 ) + + call stdlib_zlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_zcopy( m, a( 1, q ), 1,cwork(n+1), 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one,m, 1, cwork(n+1)& + ,lda,ierr ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_zaxpy( m, -conjg(aapq),cwork(n+1), 1, a( 1, & + p ), 1 ) + call stdlib_zlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dznrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_zlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_dznrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the singular values and find how many are above + ! the underflow threshold. + n2 = 0 + n4 = 0 + do p = 1, n - 1 + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + call stdlib_zswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_zswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + if( sva( p )/=zero ) then + n4 = n4 + 1 + if( sva( p )*skl>sfmin )n2 = n2 + 1 + end if + end do + if( sva( n )/=zero ) then + n4 = n4 + 1 + if( sva( n )*skl>sfmin )n2 = n2 + 1 + end if + ! normalize the left singular vectors. + if( lsvec .or. uctol ) then + do p = 1, n4 + ! call stdlib_zdscal( m, one / sva( p ), a( 1, p ), 1 ) + call stdlib_zlascl( 'G',0,0, sva(p), one, m, 1, a(1,p), m, ierr ) + end do + end if + ! scale the product of jacobi rotations. + if( rsvec ) then + do p = 1, n + temp1 = one / stdlib_dznrm2( mvl, v( 1, p ), 1 ) + call stdlib_zdscal( mvl, temp1, v( 1, p ), 1 ) + end do + end if + ! undo scaling, if necessary (and possible). + if( ( ( skl>one ) .and. ( sva( 1 )<( big / skl ) ) ).or. ( ( skl( sfmin / skl ) ) ) ) then + do p = 1, n + sva( p ) = skl*sva( p ) + end do + skl = one + end if + rwork( 1 ) = skl + ! the singular values of a are skl*sva(1:n). if skl/=one + ! then some of the singular values may overflow or underflow and + ! the spectrum is given in this factored representation. + rwork( 2 ) = real( n4,KIND=dp) + ! n4 is the number of computed nonzero singular values of a. + rwork( 3 ) = real( n2,KIND=dp) + ! n2 is the number of singular values of a greater than sfmin. + ! if n2 ZGGES3: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, the generalized complex Schur + !> form (S, T), and optionally left and/or right Schur vectors (VSL + !> and VSR). This gives the generalized Schur factorization + !> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) + !> where (VSR)**H is the conjugate-transpose of VSR. + !> Optionally, it also orders the eigenvalues so that a selected cluster + !> of eigenvalues appears in the leading diagonal blocks of the upper + !> triangular matrix S and the upper triangular matrix T. The leading + !> columns of VSL and VSR then form an unitary basis for the + !> corresponding left and right eigenspaces (deflating subspaces). + !> (If only the generalized eigenvalues are needed, use the driver + !> ZGGEV instead, which is faster.) + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w + !> or a ratio alpha/beta = w, such that A - w*B is singular. It is + !> usually represented as the pair (alpha,beta), as there is a + !> reasonable interpretation for beta=0, and even for both being zero. + !> A pair of matrices (S,T) is in generalized complex Schur form if S + !> and T are upper triangular and, in addition, the diagonal elements + !> of T are non-negative real numbers. + + subroutine stdlib_zgges3( jobvsl, jobvsr, sort, selctg, n, a, lda, b,ldb, sdim, alpha, beta, & + vsl, ldvsl, vsr, ldvsr,work, lwork, rwork, bwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvsl, jobvsr, sort + integer(ilp), intent(out) :: info, sdim + integer(ilp), intent(in) :: lda, ldb, ldvsl, ldvsr, lwork, n + ! Array Arguments + logical(lk), intent(out) :: bwork(*) + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: alpha(*), beta(*), vsl(ldvsl,*), vsr(ldvsr,*), work(*) + + ! Function Arguments + procedure(stdlib_selctg_z) :: selctg + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: cursl, ilascl, ilbscl, ilvsl, ilvsr, lastsl, lquery, wantst + integer(ilp) :: i, icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, iright, irows, irwrk, & + itau, iwrk, lwkopt + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, pvsl, pvsr, smlnum + ! Local Arrays + integer(ilp) :: idum(1) + real(dp) :: dif(2) + ! Intrinsic Functions + intrinsic :: max,sqrt + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvsl, 'N' ) ) then + ijobvl = 1 + ilvsl = .false. + else if( stdlib_lsame( jobvsl, 'V' ) ) then + ijobvl = 2 + ilvsl = .true. + else + ijobvl = -1 + ilvsl = .false. + end if + if( stdlib_lsame( jobvsr, 'N' ) ) then + ijobvr = 1 + ilvsr = .false. + else if( stdlib_lsame( jobvsr, 'V' ) ) then + ijobvr = 2 + ilvsr = .true. + else + ijobvr = -1 + ilvsr = .false. + end if + wantst = stdlib_lsame( sort, 'S' ) + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( ( .not.wantst ) .and. ( .not.stdlib_lsame( sort, 'N' ) ) ) then + info = -3 + else if( n<0 ) then + info = -5 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrix to make it more nearly triangular + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_zggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + icols = n + 1 - ilo + itau = 1 + iwrk = itau + irows + call stdlib_zgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_zunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vsl + if( ilvsl ) then + call stdlib_zlaset( 'FULL', n, n, czero, cone, vsl, ldvsl ) + if( irows>1 ) then + call stdlib_zlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vsl( ilo+1, ilo )& + , ldvsl ) + end if + call stdlib_zungqr( irows, irows, irows, vsl( ilo, ilo ), ldvsl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vsr + if( ilvsr )call stdlib_zlaset( 'FULL', n, n, czero, cone, vsr, ldvsr ) + ! reduce to generalized hessenberg form + call stdlib_zgghd3( jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb, vsl,ldvsl, vsr, ldvsr,& + work( iwrk ), lwork+1-iwrk, ierr ) + sdim = 0 + ! perform qz algorithm, computing schur vectors if desired + iwrk = itau + call stdlib_zlaqz0( 'S', jobvsl, jobvsr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vsl, & + ldvsl, vsr, ldvsr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 30 + end if + ! sort eigenvalues alpha/beta if desired + if( wantst ) then + ! undo scaling on eigenvalues before selecting + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, 1, alpha, n, ierr ) + + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, 1, beta, n, ierr ) + + ! select eigenvalues + do i = 1, n + bwork( i ) = selctg( alpha( i ), beta( i ) ) + end do + call stdlib_ztgsen( 0, ilvsl, ilvsr, bwork, n, a, lda, b, ldb, alpha,beta, vsl, & + ldvsl, vsr, ldvsr, sdim, pvsl, pvsr,dif, work( iwrk ), lwork-iwrk+1, idum, 1, ierr ) + + if( ierr==1 )info = n + 3 + end if + ! apply back-permutation to vsl and vsr + if( ilvsl )call stdlib_zggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsl, ldvsl, ierr ) + if( ilvsr )call stdlib_zggbak( 'P', 'R', n, ilo, ihi, rwork( ileft ),rwork( iright ), & + n, vsr, ldvsr, ierr ) + ! undo scaling + if( ilascl ) then + call stdlib_zlascl( 'U', 0, 0, anrmto, anrm, n, n, a, lda, ierr ) + call stdlib_zlascl( 'G', 0, 0, anrmto, anrm, n, 1, alpha, n, ierr ) + end if + if( ilbscl ) then + call stdlib_zlascl( 'U', 0, 0, bnrmto, bnrm, n, n, b, ldb, ierr ) + call stdlib_zlascl( 'G', 0, 0, bnrmto, bnrm, n, 1, beta, n, ierr ) + end if + if( wantst ) then + ! check if reordering is correct + lastsl = .true. + sdim = 0 + do i = 1, n + cursl = selctg( alpha( i ), beta( i ) ) + if( cursl )sdim = sdim + 1 + if( cursl .and. .not.lastsl )info = n + 2 + lastsl = cursl + end do + end if + 30 continue + work( 1 ) = cmplx( lwkopt,KIND=dp) + return + end subroutine stdlib_zgges3 + + !> ZGGEV3: computes for a pair of N-by-N complex nonsymmetric matrices + !> (A,B), the generalized eigenvalues, and optionally, the left and/or + !> right generalized eigenvectors. + !> A generalized eigenvalue for a pair of matrices (A,B) is a scalar + !> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is + !> singular. It is usually represented as the pair (alpha,beta), as + !> there is a reasonable interpretation for beta=0, and even for both + !> being zero. + !> The right generalized eigenvector v(j) corresponding to the + !> generalized eigenvalue lambda(j) of (A,B) satisfies + !> A * v(j) = lambda(j) * B * v(j). + !> The left generalized eigenvector u(j) corresponding to the + !> generalized eigenvalues lambda(j) of (A,B) satisfies + !> u(j)**H * A = lambda(j) * u(j)**H * B + !> where u(j)**H is the conjugate-transpose of u(j). + + subroutine stdlib_zggev3( jobvl, jobvr, n, a, lda, b, ldb, alpha, beta,vl, ldvl, vr, ldvr, & + work, lwork, rwork, info ) + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: jobvl, jobvr + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, ldvl, ldvr, lwork, n + ! Array Arguments + real(dp), intent(out) :: rwork(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: alpha(*), beta(*), vl(ldvl,*), vr(ldvr,*), work(*) + ! ===================================================================== + + + ! Local Scalars + logical(lk) :: ilascl, ilbscl, ilv, ilvl, ilvr, lquery + character :: chtemp + integer(ilp) :: icols, ierr, ihi, ijobvl, ijobvr, ileft, ilo, in, iright, irows, irwrk,& + itau, iwrk, jc, jr, lwkopt + real(dp) :: anrm, anrmto, bignum, bnrm, bnrmto, eps, smlnum, temp + complex(dp) :: x + ! Local Arrays + logical(lk) :: ldumma(1) + ! Intrinsic Functions + intrinsic :: abs,real,aimag,max,sqrt + ! Statement Functions + real(dp) :: abs1 + ! Statement Function Definitions + abs1( x ) = abs( real( x,KIND=dp) ) + abs( aimag( x ) ) + ! Executable Statements + ! decode the input arguments + if( stdlib_lsame( jobvl, 'N' ) ) then + ijobvl = 1 + ilvl = .false. + else if( stdlib_lsame( jobvl, 'V' ) ) then + ijobvl = 2 + ilvl = .true. + else + ijobvl = -1 + ilvl = .false. + end if + if( stdlib_lsame( jobvr, 'N' ) ) then + ijobvr = 1 + ilvr = .false. + else if( stdlib_lsame( jobvr, 'V' ) ) then + ijobvr = 2 + ilvr = .true. + else + ijobvr = -1 + ilvr = .false. + end if + ilv = ilvl .or. ilvr + ! test the input arguments + info = 0 + lquery = ( lwork==-1 ) + if( ijobvl<=0 ) then + info = -1 + else if( ijobvr<=0 ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ldazero .and. anrmbignum ) then + anrmto = bignum + ilascl = .true. + end if + if( ilascl )call stdlib_zlascl( 'G', 0, 0, anrm, anrmto, n, n, a, lda, ierr ) + ! scale b if max element outside range [smlnum,bignum] + bnrm = stdlib_zlange( 'M', n, n, b, ldb, rwork ) + ilbscl = .false. + if( bnrm>zero .and. bnrmbignum ) then + bnrmto = bignum + ilbscl = .true. + end if + if( ilbscl )call stdlib_zlascl( 'G', 0, 0, bnrm, bnrmto, n, n, b, ldb, ierr ) + ! permute the matrices a, b to isolate eigenvalues if possible + ileft = 1 + iright = n + 1 + irwrk = iright + n + call stdlib_zggbal( 'P', n, a, lda, b, ldb, ilo, ihi, rwork( ileft ),rwork( iright ), & + rwork( irwrk ), ierr ) + ! reduce b to triangular form (qr decomposition of b) + irows = ihi + 1 - ilo + if( ilv ) then + icols = n + 1 - ilo + else + icols = irows + end if + itau = 1 + iwrk = itau + irows + call stdlib_zgeqrf( irows, icols, b( ilo, ilo ), ldb, work( itau ),work( iwrk ), lwork+& + 1-iwrk, ierr ) + ! apply the orthogonal transformation to matrix a + call stdlib_zunmqr( 'L', 'C', irows, icols, irows, b( ilo, ilo ), ldb,work( itau ), a( & + ilo, ilo ), lda, work( iwrk ),lwork+1-iwrk, ierr ) + ! initialize vl + if( ilvl ) then + call stdlib_zlaset( 'FULL', n, n, czero, cone, vl, ldvl ) + if( irows>1 ) then + call stdlib_zlacpy( 'L', irows-1, irows-1, b( ilo+1, ilo ), ldb,vl( ilo+1, ilo ),& + ldvl ) + end if + call stdlib_zungqr( irows, irows, irows, vl( ilo, ilo ), ldvl,work( itau ), work( & + iwrk ), lwork+1-iwrk, ierr ) + end if + ! initialize vr + if( ilvr )call stdlib_zlaset( 'FULL', n, n, czero, cone, vr, ldvr ) + ! reduce to generalized hessenberg form + if( ilv ) then + ! eigenvectors requested -- work on whole matrix. + call stdlib_zgghd3( jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb, vl,ldvl, vr, ldvr, & + work( iwrk ), lwork+1-iwrk, ierr ) + else + call stdlib_zgghd3( 'N', 'N', irows, 1, irows, a( ilo, ilo ), lda,b( ilo, ilo ), & + ldb, vl, ldvl, vr, ldvr,work( iwrk ), lwork+1-iwrk, ierr ) + end if + ! perform qz algorithm (compute eigenvalues, and optionally, the + ! schur form and schur vectors) + iwrk = itau + if( ilv ) then + chtemp = 'S' + else + chtemp = 'E' + end if + call stdlib_zlaqz0( chtemp, jobvl, jobvr, n, ilo, ihi, a, lda, b, ldb,alpha, beta, vl, & + ldvl, vr, ldvr, work( iwrk ),lwork+1-iwrk, rwork( irwrk ), 0, ierr ) + if( ierr/=0 ) then + if( ierr>0 .and. ierr<=n ) then + info = ierr + else if( ierr>n .and. ierr<=2*n ) then + info = ierr - n + else + info = n + 1 + end if + go to 70 + end if + ! compute eigenvectors + if( ilv ) then + if( ilvl ) then + if( ilvr ) then + chtemp = 'B' + else + chtemp = 'L' + end if + else + chtemp = 'R' + end if + call stdlib_ztgevc( chtemp, 'B', ldumma, n, a, lda, b, ldb, vl, ldvl,vr, ldvr, n, & + in, work( iwrk ), rwork( irwrk ),ierr ) + if( ierr/=0 ) then + info = n + 2 + go to 70 + end if + ! undo balancing on vl and vr and normalization + if( ilvl ) then + call stdlib_zggbak( 'P', 'L', n, ilo, ihi, rwork( ileft ),rwork( iright ), n, vl,& + ldvl, ierr ) + loop_30: do jc = 1, n + temp = zero + do jr = 1, n + temp = max( temp, abs1( vl( jr, jc ) ) ) + end do + if( temp ZGSVJ0: is called from ZGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but + !> it does not check convergence (stopping criterion). Few tuning + !> parameters (marked by [TP]) are available for the implementer. + + pure subroutine stdlib_zgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep + real(dp), intent(in) :: eps, sfmin, tol + character, intent(in) :: jobv + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), d(n), v(ldv,*) + complex(dp), intent(out) :: work(lwork) + real(dp), intent(inout) :: sva(n) + ! ===================================================================== + + + ! Local Scalars + complex(dp) :: aapq, ompq + real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & + lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Intrinsic Functions + intrinsic :: abs,max,conjg,real,min,sign,sqrt + ! from lapack + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( lda sqrt(overflow_threshold), and to + ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). + ! hence, stdlib_dznrm2 cannot be trusted, not even in the case when + ! the true norm is far from the under(over)flow boundaries. + ! if properly implemented stdlib_dznrm2 is available, the if-then-else-end if + ! below should be replaced with "aapp = stdlib_dznrm2( m, a(1,p), 1 )". + if( ( sva( p )rootsfmin ) ) then + sva( p ) = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + temp1 = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, temp1, aapp ) + sva( p ) = temp1*sqrt( aapp ) + end if + aapp = sva( p ) + else + aapp = sva( p ) + end if + if( aapp>zero ) then + pskipped = 0 + loop_2002: do q = p + 1, min( igl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + if( aaqq>=one ) then + rotok = ( small*aapp )<=aaqq + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_zcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, work, lda, & + ierr ) + aapq = stdlib_zdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + rotok = aapp<=( aaqq / small ) + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aapp ) / aaqq + else + call stdlib_zcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_zdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg( cwork(p) ) * cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + ! Rotate + ! [rtd] rotated = rotated + one + if( ir1==0 ) then + notrot = 0 + pskipped = 0 + iswrot = iswrot + 1 + end if + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/aapq1 + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if ( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if ( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + call stdlib_zcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp, one, m,1, work, lda,& + ierr ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one, m,1, a( 1, q ), & + lda, ierr ) + call stdlib_zaxpy( m, -aapq, work, 1,a( 1, q ), 1 ) + call stdlib_zlascl( 'G', 0, 0, one, aaqq, m,1, a( 1, q ), & + lda, ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + mxsinj = max( mxsinj, sfmin ) + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! recompute sva(q), sva(p). + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dznrm2( m, a( 1, q ), 1 ) + else + t = zero + aaqq = one + call stdlib_zlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + else + ! a(:,p) and a(:,q) already numerically orthogonal + if( ir1==0 )notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + end if + else + ! a(:,q) is zero column + if( ir1==0 )notrot = notrot + 1 + pskipped = pskipped + 1 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + if( ir1==0 )aapp = -aapp + notrot = 0 + go to 2103 + end if + end do loop_2002 + ! end q-loop + 2103 continue + ! bailed out of q-loop + sva( p ) = aapp + else + sva( p ) = aapp + if( ( ir1==0 ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & + n ) - p + end if + end do loop_2001 + ! end of the p-loop + ! end of doing the block ( ibr, ibr ) + end do loop_1002 + ! end of ir1-loop + ! ... go to the off diagonal blocks + igl = ( ibr-1 )*kbl + 1 + loop_2010: do jbc = ibr + 1, nbl + jgl = ( jbc-1 )*kbl + 1 + ! doing the block at ( ibr, jbc ) + ijblsk = 0 + loop_2100: do p = igl, min( igl+kbl-1, n ) + aapp = sva( p ) + if( aapp>zero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_zcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_zdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_zcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_zdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_zcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, work,lda,& + ierr ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_zaxpy( m, -aapq, work,1, a( 1, q ), 1 ) + + call stdlib_zlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_zcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one,m, 1, work,lda,& + ierr ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_zaxpy( m, -conjg(aapq),work, 1, a( 1, p ), 1 & + ) + call stdlib_zlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dznrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_zlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_dznrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector sva() of column norms. + do p = 1, n - 1 + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = d( p ) + d( p ) = d( q ) + d( q ) = aapq + call stdlib_zswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_zswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_zgsvj0 + + !> ZGSVJ1: is called from ZGESVJ as a pre-processor and that is its main + !> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but + !> it targets only particular pivots and it does not check convergence + !> (stopping criterion). Few tuning parameters (marked by [TP]) are + !> available for the implementer. + !> Further Details + !> ~~~~~~~~~~~~~~~ + !> ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of + !> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) + !> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The + !> block-entries (tiles) of the (1,2) off-diagonal block are marked by the + !> [x]'s in the following scheme: + !> | * * * [x] [x] [x]| + !> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. + !> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> |[x] [x] [x] * * * | + !> In terms of the columns of A, the first N1 columns are rotated 'against' + !> the remaining N-N1 columns, trying to increase the angle between the + !> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is + !> tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. + !> The number of sweeps is given in NSWEEP and the orthogonality threshold + !> is given in TOL. + + pure subroutine stdlib_zgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & + nsweep, work, lwork, info ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + real(dp), intent(in) :: eps, sfmin, tol + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep + character, intent(in) :: jobv + ! Array Arguments + complex(dp), intent(inout) :: a(lda,*), d(n), v(ldv,*) + complex(dp), intent(out) :: work(lwork) + real(dp), intent(inout) :: sva(n) + ! ===================================================================== + + ! Local Scalars + complex(dp) :: aapq, ompq + real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & + rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign + integer(ilp) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & + notrot, nblc, nblr, p, pskipped, q, rowskip, swband + logical(lk) :: applv, rotok, rsvec + ! Intrinsic Functions + intrinsic :: abs,conjg,max,real,min,sign,sqrt + ! From Lapack + ! Executable Statements + ! test the input parameters. + applv = stdlib_lsame( jobv, 'A' ) + rsvec = stdlib_lsame( jobv, 'V' ) + if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then + info = -1 + else if( m<0 ) then + info = -2 + else if( ( n<0 ) .or. ( n>m ) ) then + info = -3 + else if( n1<0 ) then + info = -4 + else if( ldazero ) then + pskipped = 0 + loop_2200: do q = jgl, min( jgl+kbl-1, n ) + aaqq = sva( q ) + if( aaqq>zero ) then + aapp0 = aapp + ! M X 2 Jacobi Svd + ! safe gram matrix computation + if( aaqq>=one ) then + if( aapp>=aaqq ) then + rotok = ( small*aapp )<=aaqq + else + rotok = ( small*aaqq )<=aapp + end if + if( aapp<( big / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / & + aaqq ) / aapp + else + call stdlib_zcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_zdotc( m, work, 1,a( 1, q ), 1 ) / & + aaqq + end if + else + if( aapp>=aaqq ) then + rotok = aapp<=( aaqq / small ) + else + rotok = aaqq<=( aapp / small ) + end if + if( aapp>( small / aaqq ) ) then + aapq = ( stdlib_zdotc( m, a( 1, p ), 1,a( 1, q ), 1 ) / max(& + aaqq,aapp) )/ min(aaqq,aapp) + else + call stdlib_zcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq,one, m, 1,work, lda, & + ierr ) + aapq = stdlib_zdotc( m, a( 1, p ), 1,work, 1 ) / & + aapp + end if + end if + ! aapq = aapq * conjg(cwork(p))*cwork(q) + aapq1 = -abs(aapq) + mxaapq = max( mxaapq, -aapq1 ) + ! to rotate or not to rotate, that is the question ... + if( abs( aapq1 )>tol ) then + ompq = aapq / abs(aapq) + notrot = 0 + ! [rtd] rotated = rotated + 1 + pskipped = 0 + iswrot = iswrot + 1 + if( rotok ) then + aqoap = aaqq / aapp + apoaq = aapp / aaqq + theta = -half*abs( aqoap-apoaq )/ aapq1 + if( aaqq>aapp0 )theta = -theta + if( abs( theta )>bigtheta ) then + t = half / theta + cs = one + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *t ) + if( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*t ) + end if + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + mxsinj = max( mxsinj, abs( t ) ) + else + ! Choose Correct Signum For Theta And Rotate + thsign = -sign( one, aapq1 ) + if( aaqq>aapp0 )thsign = -thsign + t = one / ( theta+thsign*sqrt( one+theta*theta ) ) + + cs = sqrt( one / ( one+t*t ) ) + sn = t*cs + mxsinj = max( mxsinj, abs( sn ) ) + sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) + + aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) + call stdlib_zrot( m, a(1,p), 1, a(1,q), 1,cs, conjg(ompq)& + *sn ) + if( rsvec ) then + call stdlib_zrot( mvl, v(1,p), 1,v(1,q), 1, cs, & + conjg(ompq)*sn ) + end if + end if + d(p) = -d(q) * ompq + else + ! .. have to use modified gram-schmidt like transformation + if( aapp>aaqq ) then + call stdlib_zcopy( m, a( 1, p ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, work,lda,& + ierr ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one,m, 1, a( 1, q ),& + lda,ierr ) + call stdlib_zaxpy( m, -aapq, work,1, a( 1, q ), 1 ) + + call stdlib_zlascl( 'G', 0, 0, one, aaqq,m, 1, a( 1, q ),& + lda,ierr ) + sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + else + call stdlib_zcopy( m, a( 1, q ), 1,work, 1 ) + call stdlib_zlascl( 'G', 0, 0, aaqq, one,m, 1, work,lda,& + ierr ) + call stdlib_zlascl( 'G', 0, 0, aapp, one,m, 1, a( 1, p ),& + lda,ierr ) + call stdlib_zaxpy( m, -conjg(aapq),work, 1, a( 1, p ), 1 & + ) + call stdlib_zlascl( 'G', 0, 0, one, aapp,m, 1, a( 1, p ),& + lda,ierr ) + sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) + + mxsinj = max( mxsinj, sfmin ) + end if + end if + ! end if rotok then ... else + ! in the case of cancellation in updating sva(q), sva(p) + ! .. recompute sva(q), sva(p) + if( ( sva( q ) / aaqq )**2<=rooteps )then + if( ( aaqqrootsfmin ) ) then + sva( q ) = stdlib_dznrm2( m, a( 1, q ), 1) + else + t = zero + aaqq = one + call stdlib_zlassq( m, a( 1, q ), 1, t,aaqq ) + sva( q ) = t*sqrt( aaqq ) + end if + end if + if( ( aapp / aapp0 )**2<=rooteps ) then + if( ( aapprootsfmin ) ) then + aapp = stdlib_dznrm2( m, a( 1, p ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, p ), 1, t,aapp ) + aapp = t*sqrt( aapp ) + end if + sva( p ) = aapp + end if + ! end of ok rotation + else + notrot = notrot + 1 + ! [rtd] skipped = skipped + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + else + notrot = notrot + 1 + pskipped = pskipped + 1 + ijblsk = ijblsk + 1 + end if + if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then + sva( p ) = aapp + notrot = 0 + go to 2011 + end if + if( ( i<=swband ) .and.( pskipped>rowskip ) ) then + aapp = -aapp + notrot = 0 + go to 2203 + end if + end do loop_2200 + ! end of the q-loop + 2203 continue + sva( p ) = aapp + else + if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1 + if( aapprootsfmin ) )then + sva( n ) = stdlib_dznrm2( m, a( 1, n ), 1 ) + else + t = zero + aapp = one + call stdlib_zlassq( m, a( 1, n ), 1, t, aapp ) + sva( n ) = t*sqrt( aapp ) + end if + ! additional steering devices + if( ( iswband+1 ) .and. ( mxaapq=emptsw )go to 1994 + end do loop_1993 + ! end i=1:nsweep loop + ! #:( reaching this point means that the procedure has not converged. + info = nsweep - 1 + go to 1995 + 1994 continue + ! #:) reaching this point means numerical convergence after the i-th + ! sweep. + info = 0 + ! #:) info = 0 confirms successful iterations. + 1995 continue + ! sort the vector sva() of column norms. + do p = 1, n - 1 + q = stdlib_idamax( n-p+1, sva( p ), 1 ) + p - 1 + if( p/=q ) then + temp1 = sva( p ) + sva( p ) = sva( q ) + sva( q ) = temp1 + aapq = d( p ) + d( p ) = d( q ) + d( q ) = aapq + call stdlib_zswap( m, a( 1, p ), 1, a( 1, q ), 1 ) + if( rsvec )call stdlib_zswap( mvl, v( 1, p ), 1, v( 1, q ), 1 ) + end if + end do + return + end subroutine stdlib_zgsvj1 + + !> ZHESV_AA: computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**H * T * U, if UPLO = 'U', or + !> A = L * T * L**H, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is Hermitian and tridiagonal. The factored form + !> of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_zhesv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_hetrf, lwkopt_hetrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZHETRF_AA: computes the factorization of a complex hermitian matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**H*T*U or A = L*T*L**H + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a hermitian tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zhetrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'ZHETRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_zlahef_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_zswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = conjg( a( j, j+1 ) ) + a( j, j+1 ) = cone + call stdlib_zcopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=0 and k2=1 for the first panel, + ! and k1=1 and k2=0 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_zgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_zgemm( 'CONJUGATE TRANSPOSE', 'TRANSPOSE',1, mj, jb+1,-cone,& + a( j1-k2, j3 ), lda,work( (j3-j1+1)+k1*n ), n,cone, a( j3, j3 ), lda ) + + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_zgemm + call stdlib_zgemm( 'CONJUGATE TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-& + cone, a( j1-k2, j2 ), lda,work( (j3-j1+1)+k1*n ), n,cone, a( j2, j3 ), lda & + ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = conjg( alpha ) + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**h using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_zlahef; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_zlahef_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_zswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = conjg( a( j+1, j ) ) + a( j+1, j ) = cone + call stdlib_zcopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=0 and k2=1 for the first panel, + ! and k1=1 and k2=0 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_zgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',mj, 1, jb+1,-& + cone, work( (j3-j1+1)+k1*n ), n,a( j3, j1-k2 ), lda,cone, a( j3, j3 ), & + lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block column with stdlib_zgemm + call stdlib_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE',n-j3+1, nj, jb+1,-& + cone, work( (j3-j1+1)+k1*n ), n,a( j2, j1-k2 ), lda,cone, a( j3, j2 ), lda & + ) + end do + ! recover t( j+1, j ) + a( j+1, j ) = conjg( alpha ) + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zhetrf_aa + + !> ZHSEQR: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. + + pure subroutine stdlib_zhseqr( job, compz, n, ilo, ihi, h, ldh, w, z, ldz,work, lwork, info ) + + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ilo, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + character, intent(in) :: compz, job + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(dp), intent(out) :: w(*), work(*) + ! ===================================================================== + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: nl = 49 + real(dp), parameter :: rzero = 0.0_dp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_zlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== nl allocates some local workspace to help small matrices + ! . through a rare stdlib_zlahqr failure. nl > ntiny = 15 is + ! . required and nl <= nmin = stdlib_ilaenv(ispec=12,...) is recom- + ! . mended. (the default value of nmin is 75.) using nl = 49 + ! . allows up to six simultaneous shifts and a 16-by-16 + ! . deflation window. ==== + + + + ! Local Arrays + complex(dp) :: hl(nl,nl), workl(nl) + ! Local Scalars + integer(ilp) :: kbot, nmin + logical(lk) :: initz, lquery, wantt, wantz + ! Intrinsic Functions + intrinsic :: real,cmplx,max,min + ! Executable Statements + ! ==== decode and check the input parameters. ==== + wantt = stdlib_lsame( job, 'S' ) + initz = stdlib_lsame( compz, 'I' ) + wantz = initz .or. stdlib_lsame( compz, 'V' ) + work( 1 ) = cmplx( real( max( 1, n ),KIND=dp), rzero,KIND=dp) + lquery = lwork==-1 + info = 0 + if( .not.stdlib_lsame( job, 'E' ) .and. .not.wantt ) then + info = -1 + else if( .not.stdlib_lsame( compz, 'N' ) .and. .not.wantz ) then + info = -2 + else if( n<0 ) then + info = -3 + else if( ilo<1 .or. ilo>max( 1, n ) ) then + info = -4 + else if( ihin ) then + info = -5 + else if( ldh1 )call stdlib_zcopy( ilo-1, h, ldh+1, w, 1 ) + if( ihinmin ) then + call stdlib_zlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, ilo, ihi,z, ldz, work, & + lwork, info ) + else + ! ==== small matrix ==== + call stdlib_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, ilo, ihi,z, ldz, info ) + + if( info>0 ) then + ! ==== a rare stdlib_zlahqr failure! stdlib_zlaqr0 sometimes succeeds + ! . when stdlib_zlahqr fails. ==== + kbot = info + if( n>=nl ) then + ! ==== larger matrices have enough subdiagonal scratch + ! . space to call stdlib_zlaqr0 directly. ==== + call stdlib_zlaqr0( wantt, wantz, n, ilo, kbot, h, ldh, w,ilo, ihi, z, ldz,& + work, lwork, info ) + else + ! ==== tiny matrices don't have enough subdiagonal + ! . scratch space to benefit from stdlib_zlaqr0. hence, + ! . tiny matrices must be copied into a larger + ! . array before calling stdlib_zlaqr0. ==== + call stdlib_zlacpy( 'A', n, n, h, ldh, hl, nl ) + hl( n+1, n ) = czero + call stdlib_zlaset( 'A', nl, nl-n, czero, czero, hl( 1, n+1 ),nl ) + call stdlib_zlaqr0( wantt, wantz, nl, ilo, kbot, hl, nl, w,ilo, ihi, z, & + ldz, workl, nl, info ) + if( wantt .or. info/=0 )call stdlib_zlacpy( 'A', n, n, hl, nl, h, ldh ) + + end if + end if + end if + ! ==== clear out the trash, if necessary. ==== + if( ( wantt .or. info/=0 ) .and. n>2 )call stdlib_zlaset( 'L', n-2, n-2, czero, & + czero, h( 3, 1 ), ldh ) + ! ==== ensure reported workspace size is backward-compatible with + ! . previous lapack versions. ==== + work( 1 ) = cmplx( max( real( max( 1, n ),KIND=dp),real( work( 1 ),KIND=dp) ), & + rzero,KIND=dp) + end if + end subroutine stdlib_zhseqr + + !> DLAHEF_AA factorizes a panel of a complex hermitian matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_zlahef_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), h(ldh,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + complex(dp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: real,conjg,max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_zhetrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:n, j) := a(j, j:n) - h(j:n, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:n, j) has been initialized to be a(j, j:n) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_zlacgv( j-k1, a( 1, j ), 1 ) + call stdlib_zgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( 1, j ), 1,& + cone, h( j, j ), 1 ) + call stdlib_zlacgv( j-k1, a( 1, j ), 1 ) + end if + ! copy h(i:n, i) into work + call stdlib_zcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:n) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:n) stores u(j-1, j:n) + alpha = -conjg( a( k-1, j ) ) + call stdlib_zaxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = real( work( 1 ),KIND=dp) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_zaxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:n)|) + i2 = stdlib_izamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply hermitian pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:n) with a(i1+1:n, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_zswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + call stdlib_zlacgv( i2-i1, a( j1+i1-1, i1+1 ), lda ) + call stdlib_zlacgv( i2-i1-1, a( j1+i1, i2 ), 1 ) + ! swap a(i1, i2+1:n) with a(i2, i2+1:n) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_zswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_zhetrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:n, j) := a(j:n, j) - h(j:n, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:n, j) has been initialized to be a(j:n, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_zlacgv( j-k1, a( j, 1 ), lda ) + call stdlib_zgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( j, 1 ), & + lda,cone, h( j, j ), 1 ) + call stdlib_zlacgv( j-k1, a( j, 1 ), lda ) + end if + ! copy h(j:n, j) into work + call stdlib_zcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:n, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -conjg( a( j, k-1 ) ) + call stdlib_zaxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = real( work( 1 ),KIND=dp) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_zaxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:n)|) + i2 = stdlib_izamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply hermitian pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:n, i1) with a(i2, i1+1:n) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_zswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + call stdlib_zlacgv( i2-i1, a( i1+1, j1+i1-1 ), 1 ) + call stdlib_zlacgv( i2-i1-1, a( i2, j1+i1 ), lda ) + ! swap a(i2+1:n, i1) with a(i2+1:n, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_zswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j ZLAQR0: computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + pure subroutine stdlib_zlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(dp), intent(out) :: w(*), work(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(dp), parameter :: wilk1 = 0.75_dp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_zlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constant wilk1 is used to form the exceptional + ! . shifts. ==== + + + + ! Local Scalars + complex(dp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 + real(dp) :: s + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + complex(dp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,int,max,min,mod,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = cone + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_zlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & + z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_zlaqr3 ==== + call stdlib_zlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_zlaqr5, stdlib_zlaqr3) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + return + end if + ! ==== stdlib_zlahqr/stdlib_zlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_70: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_zlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, w, h( kv, 1 ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh, work,& + lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_zlaqr3 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_zlaqr3 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, ks + 1, -2 + w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) + w( i-1 ) = w( i ) + end do + else + ! ==== got ns/2 or fewer shifts? use stdlib_zlaqr4 or + ! . stdlib_zlahqr on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_zlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + if( ns>nmin ) then + call stdlib_zlaqr4( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( & + ks ), 1, 1,zdum, 1, work, lwork, inf ) + else + call stdlib_zlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( & + ks ), 1, 1,zdum, 1, inf ) + end if + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. scale to avoid + ! . overflows, underflows and subnormals. + ! . (the scale factor s can not be czero, + ! . because h(kbot,kbot-1) is nonzero.) ==== + if( ks>=kbot ) then + s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & + h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) + aa = h( kbot-1, kbot-1 ) / s + cc = h( kbot, kbot-1 ) / s + bb = h( kbot-1, kbot ) / s + dd = h( kbot, kbot ) / s + tr2 = ( aa+dd ) / two + det = ( aa-tr2 )*( dd-tr2 ) - bb*cc + rtdisc = sqrt( -det ) + w( kbot-1 ) = ( tr2+rtdisc )*s + w( kbot ) = ( tr2-rtdisc )*s + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( cabs1( w( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_70 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 80 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + end subroutine stdlib_zlaqr0 + + !> Aggressive early deflation: + !> ZLAQR3: accepts as input an upper Hessenberg matrix + !> H and performs an unitary similarity transformation + !> designed to detect and deflate fully converged eigenvalues from + !> a trailing principal submatrix. On output H has been over- + !> written by a new Hessenberg matrix that is a perturbation of + !> an unitary similarity transformation of H. It is to be + !> hoped that the final version of H has many zero subdiagonal + !> entries. + + pure subroutine stdlib_zlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& + nh, nv, nw + integer(ilp), intent(out) :: nd, ns + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(dp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) + ! ================================================================ + ! Parameters + real(dp), parameter :: rzero = 0.0_dp + real(dp), parameter :: rone = 1.0_dp + + + ! Local Scalars + complex(dp) :: beta, cdum, s, tau + real(dp) :: foo, safmax, safmin, smlnum, ulp + integer(ilp) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & + lwk1, lwk2, lwk3, lwkopt, nmin + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,conjg,aimag,int,max,min + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + ! ==== estimate optimal workspace. ==== + jw = min( nw, kbot-ktop+1 ) + if( jw<=2 ) then + lwkopt = 1 + else + ! ==== workspace query call to stdlib_zgehrd ==== + call stdlib_zgehrd( jw, 1, jw-1, t, ldt, work, work, -1, info ) + lwk1 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_zunmhr ==== + call stdlib_zunmhr( 'R', 'N', jw, jw, 1, jw-1, t, ldt, work, v, ldv,work, -1, info ) + + lwk2 = int( work( 1 ),KIND=ilp) + ! ==== workspace query call to stdlib_zlaqr4 ==== + call stdlib_zlaqr4( .true., .true., jw, 1, jw, t, ldt, sh, 1, jw, v,ldv, work, -1, & + infqr ) + lwk3 = int( work( 1 ),KIND=ilp) + ! ==== optimal workspace ==== + lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) + end if + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + return + end if + ! ==== nothing to do ... + ! ... for an empty active block ... ==== + ns = 0 + nd = 0 + work( 1 ) = cone + if( ktop>kbot )return + ! ... nor for an empty deflation window. ==== + if( nw<1 )return + ! ==== machine constants ==== + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = rone / safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp) / ulp ) + ! ==== setup deflation window ==== + jw = min( nw, kbot-ktop+1 ) + kwtop = kbot - jw + 1 + if( kwtop==ktop ) then + s = czero + else + s = h( kwtop, kwtop-1 ) + end if + if( kbot==kwtop ) then + ! ==== 1-by-1 deflation window: not much to do ==== + sh( kwtop ) = h( kwtop, kwtop ) + ns = 1 + nd = 0 + if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero + end if + work( 1 ) = cone + return + end if + ! ==== convert to spike-triangular form. (in case of a + ! . rare qr failure, this routine continues to do + ! . aggressive early deflation using that part of + ! . the deflation window that converged using infqr + ! . here and there to keep track.) ==== + call stdlib_zlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) + call stdlib_zcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2, 1 ), ldt+1 ) + call stdlib_zlaset( 'A', jw, jw, czero, cone, v, ldv ) + nmin = stdlib_ilaenv( 12, 'ZLAQR3', 'SV', jw, 1, jw, lwork ) + if( jw>nmin ) then + call stdlib_zlaqr4( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + work, lwork, infqr ) + else + call stdlib_zlahqr( .true., .true., jw, 1, jw, t, ldt, sh( kwtop ), 1,jw, v, ldv, & + infqr ) + end if + ! ==== deflation detection loop ==== + ns = jw + ilst = infqr + 1 + do knt = infqr + 1, jw + ! ==== small spike tip deflation test ==== + foo = cabs1( t( ns, ns ) ) + if( foo==rzero )foo = cabs1( s ) + if( cabs1( s )*cabs1( v( 1, ns ) )<=max( smlnum, ulp*foo ) )then + ! ==== cone more converged eigenvalue ==== + ns = ns - 1 + else + ! ==== cone undeflatable eigenvalue. move it up out of the + ! . way. (stdlib_ztrexc can not fail in this case.) ==== + ifst = ns + call stdlib_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + ilst = ilst + 1 + end if + end do + ! ==== return to hessenberg form ==== + if( ns==0 )s = czero + if( nscabs1( t( ifst, ifst ) ) )ifst = j + end do + ilst = i + if( ifst/=ilst )call stdlib_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) + + end do + end if + ! ==== restore shift/eigenvalue array from t ==== + do i = infqr + 1, jw + sh( kwtop+i-1 ) = t( i, i ) + end do + if( ns1 .and. s/=czero ) then + ! ==== reflect spike back into lower triangle ==== + call stdlib_zcopy( ns, v, ldv, work, 1 ) + do i = 1, ns + work( i ) = conjg( work( i ) ) + end do + beta = work( 1 ) + call stdlib_zlarfg( ns, beta, work( 2 ), 1, tau ) + work( 1 ) = cone + call stdlib_zlaset( 'L', jw-2, jw-2, czero, czero, t( 3, 1 ), ldt ) + call stdlib_zlarf( 'L', ns, jw, work, 1, conjg( tau ), t, ldt,work( jw+1 ) ) + + call stdlib_zlarf( 'R', ns, ns, work, 1, tau, t, ldt,work( jw+1 ) ) + call stdlib_zlarf( 'R', jw, ns, work, 1, tau, v, ldv,work( jw+1 ) ) + call stdlib_zgehrd( jw, 1, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) + + end if + ! ==== copy updated reduced window into place ==== + if( kwtop>1 )h( kwtop, kwtop-1 ) = s*conjg( v( 1, 1 ) ) + call stdlib_zlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) + call stdlib_zcopy( jw-1, t( 2, 1 ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) + ! ==== accumulate orthogonal matrix in order update + ! . h and z, if requested. ==== + if( ns>1 .and. s/=czero )call stdlib_zunmhr( 'R', 'N', jw, ns, 1, ns, t, ldt, work, & + v, ldv,work( jw+1 ), lwork-jw, info ) + ! ==== update vertical slab in h ==== + if( wantt ) then + ltop = 1 + else + ltop = ktop + end if + do krow = ltop, kwtop - 1, nv + kln = min( nv, kwtop-krow ) + call stdlib_zgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & + czero, wv, ldwv ) + call stdlib_zlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) + end do + ! ==== update horizontal slab in h ==== + if( wantt ) then + do kcol = kbot + 1, n, nh + kln = min( nh, n-kcol+1 ) + call stdlib_zgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & + czero, t, ldt ) + call stdlib_zlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) + end do + end if + ! ==== update vertical slab in z ==== + if( wantz ) then + do krow = iloz, ihiz, nv + kln = min( nv, ihiz-krow+1 ) + call stdlib_zgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & + czero, wv, ldwv ) + call stdlib_zlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) + end do + end if + end if + ! ==== return the number of deflations ... ==== + nd = jw - ns + ! ==== ... and the number of shifts. (subtracting + ! . infqr from the spike length takes care + ! . of the case of a rare qr failure while + ! . calculating eigenvalues of the deflation + ! . window.) ==== + ns = ns - infqr + ! ==== return optimal workspace. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + end subroutine stdlib_zlaqr3 + + !> ZLAQR4: implements one level of recursion for ZLAQR0. + !> It is a complete implementation of the small bulge multi-shift + !> QR algorithm. It may be called by ZLAQR0 and, for large enough + !> deflation window size, it may be called by ZLAQR3. This + !> subroutine is identical to ZLAQR0 except that it calls ZLAQR2 + !> instead of ZLAQR3. + !> ZLAQR4 computes the eigenvalues of a Hessenberg matrix H + !> and, optionally, the matrices T and Z from the Schur decomposition + !> H = Z T Z**H, where T is an upper triangular matrix (the + !> Schur form), and Z is the unitary matrix of Schur vectors. + !> Optionally Z may be postmultiplied into an input unitary + !> matrix Q so that this routine can give the Schur factorization + !> of a matrix A which has been reduced to the Hessenberg form H + !> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + pure subroutine stdlib_zlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& + lwork, info ) + ! -- lapack auxiliary routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + integer(ilp), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n + integer(ilp), intent(out) :: info + logical(lk), intent(in) :: wantt, wantz + ! Array Arguments + complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) + complex(dp), intent(out) :: w(*), work(*) + ! ================================================================ + ! Parameters + integer(ilp), parameter :: ntiny = 15 + integer(ilp), parameter :: kexnw = 5 + integer(ilp), parameter :: kexsh = 6 + real(dp), parameter :: wilk1 = 0.75_dp + ! ==== matrices of order ntiny or smaller must be processed by + ! . stdlib_zlahqr because of insufficient subdiagonal scratch space. + ! . (this is a hard limit.) ==== + + ! ==== exceptional deflation windows: try to cure rare + ! . slow convergence by varying the size of the + ! . deflation window after kexnw iterations. ==== + + ! ==== exceptional shifts: try to cure rare slow convergence + ! . with ad-hoc exceptional shifts every kexsh iterations. + ! . ==== + + ! ==== the constant wilk1 is used to form the exceptional + ! . shifts. ==== + + + + ! Local Scalars + complex(dp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 + real(dp) :: s + integer(ilp) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & + kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& + nwmax, nwr, nwupbd + logical(lk) :: sorted + character :: jbcmpz*2 + ! Local Arrays + complex(dp) :: zdum(1,1) + ! Intrinsic Functions + intrinsic :: abs,real,cmplx,aimag,int,max,min,mod,sqrt + ! Statement Functions + real(dp) :: cabs1 + ! Statement Function Definitions + cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) + ! Executable Statements + info = 0 + ! ==== quick return for n = 0: nothing to do. ==== + if( n==0 ) then + work( 1 ) = cone + return + end if + if( n<=ntiny ) then + ! ==== tiny matrices must use stdlib_zlahqr. ==== + lwkopt = 1 + if( lwork/=-1 )call stdlib_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & + z, ldz, info ) + else + ! ==== use small bulge multi-shift qr with aggressive early + ! . deflation on larger-than-tiny matrices. ==== + ! ==== hope for the best. ==== + info = 0 + ! ==== set up job flags for stdlib_ilaenv. ==== + if( wantt ) then + jbcmpz( 1: 1 ) = 'S' + else + jbcmpz( 1: 1 ) = 'E' + end if + if( wantz ) then + jbcmpz( 2: 2 ) = 'V' + else + jbcmpz( 2: 2 ) = 'N' + end if + ! ==== nwr = recommended deflation window size. at this + ! . point, n > ntiny = 15, so there is enough + ! . subdiagonal workspace for nwr>=2 as required. + ! . (in fact, there is enough subdiagonal space for + ! . nwr>=4.) ==== + nwr = stdlib_ilaenv( 13, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nwr = max( 2, nwr ) + nwr = min( ihi-ilo+1, ( n-1 ) / 3, nwr ) + ! ==== nsr = recommended number of simultaneous shifts. + ! . at this point n > ntiny = 15, so there is at + ! . enough subdiagonal workspace for nsr to be even + ! . and greater than or equal to two as required. ==== + nsr = stdlib_ilaenv( 15, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nsr = min( nsr, ( n-3 ) / 6, ihi-ilo ) + nsr = max( 2, nsr-mod( nsr, 2 ) ) + ! ==== estimate optimal workspace ==== + ! ==== workspace query call to stdlib_zlaqr2 ==== + call stdlib_zlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& + ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1 ) + ! ==== optimal workspace = max(stdlib_zlaqr5, stdlib_zlaqr2) ==== + lwkopt = max( 3*nsr / 2, int( work( 1 ),KIND=ilp) ) + ! ==== quick return in case of workspace query. ==== + if( lwork==-1 ) then + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + return + end if + ! ==== stdlib_zlahqr/stdlib_zlaqr0 crossover point ==== + nmin = stdlib_ilaenv( 12, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nmin = max( ntiny, nmin ) + ! ==== nibble crossover point ==== + nibble = stdlib_ilaenv( 14, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + nibble = max( 0, nibble ) + ! ==== accumulate reflections during ttswp? use block + ! . 2-by-2 structure during matrix-matrix multiply? ==== + kacc22 = stdlib_ilaenv( 16, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) + kacc22 = max( 0, kacc22 ) + kacc22 = min( 2, kacc22 ) + ! ==== nwmax = the largest possible deflation window for + ! . which there is sufficient workspace. ==== + nwmax = min( ( n-1 ) / 3, lwork / 2 ) + nw = nwmax + ! ==== nsmax = the largest number of simultaneous shifts + ! . for which there is sufficient workspace. ==== + nsmax = min( ( n-3 ) / 6, 2*lwork / 3 ) + nsmax = nsmax - mod( nsmax, 2 ) + ! ==== ndfl: an iteration count restarted at deflation. ==== + ndfl = 1 + ! ==== itmax = iteration limit ==== + itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) ) + ! ==== last row and column in the active block ==== + kbot = ihi + ! ==== main loop ==== + loop_70: do it = 1, itmax + ! ==== done when kbot falls below ilo ==== + if( kbot=nh-1 ) then + nw = nh + else + kwtop = kbot - nw + 1 + if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & + 1 + end if + end if + if( ndfl=0 .or. nw>=nwupbd ) then + ndec = ndec + 1 + if( nw-ndec<2 )ndec = 0 + nw = nw - ndec + end if + ! ==== aggressive early deflation: + ! . split workspace under the subdiagonal into + ! . - an nw-by-nw work array v in the lower + ! . left-hand-corner, + ! . - an nw-by-at-least-nw-but-more-is-better + ! . (nw-by-nho) horizontal work array along + ! . the bottom edge, + ! . - an at-least-nw-but-more-is-better (nhv-by-nw) + ! . vertical work array along the left-hand-edge. + ! . ==== + kv = n - nw + 1 + kt = nw + 1 + nho = ( n-nw-1 ) - kt + 1 + kwv = nw + 2 + nve = ( n-nw ) - kwv + 1 + ! ==== aggressive early deflation ==== + call stdlib_zlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & + ls, ld, w, h( kv, 1 ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1 ), ldh, work,& + lwork ) + ! ==== adjust kbot accounting for new deflations. ==== + kbot = kbot - ld + ! ==== ks points to the shifts. ==== + ks = kbot - ls + 1 + ! ==== skip an expensive qr sweep if there is a (partly + ! . heuristic) reason to expect that many eigenvalues + ! . will deflate without it. here, the qr sweep is + ! . skipped if many eigenvalues have just been deflated + ! . or if the remaining active block is small. + if( ( ld==0 ) .or. ( ( 100*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& + ) ) ) then + ! ==== ns = nominal number of simultaneous shifts. + ! . this may be lowered (slightly) if stdlib_zlaqr2 + ! . did not provide that many shifts. ==== + ns = min( nsmax, nsr, max( 2, kbot-ktop ) ) + ns = ns - mod( ns, 2 ) + ! ==== if there have been no deflations + ! . in a multiple of kexsh iterations, + ! . then try exceptional shifts. + ! . otherwise use shifts provided by + ! . stdlib_zlaqr2 above or from the eigenvalues + ! . of a trailing principal submatrix. ==== + if( mod( ndfl, kexsh )==0 ) then + ks = kbot - ns + 1 + do i = kbot, ks + 1, -2 + w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) + w( i-1 ) = w( i ) + end do + else + ! ==== got ns/2 or fewer shifts? use stdlib_zlahqr + ! . on a trailing principal submatrix to + ! . get more. (since ns<=nsmax<=(n-3)/6, + ! . there is enough space below the subdiagonal + ! . to fit an ns-by-ns scratch array.) ==== + if( kbot-ks+1<=ns / 2 ) then + ks = kbot - ns + 1 + kt = n - ns + 1 + call stdlib_zlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1 ), ldh ) + + call stdlib_zlahqr( .false., .false., ns, 1, ns,h( kt, 1 ), ldh, w( ks )& + , 1, 1, zdum,1, inf ) + ks = ks + inf + ! ==== in case of a rare qr failure use + ! . eigenvalues of the trailing 2-by-2 + ! . principal submatrix. scale to avoid + ! . overflows, underflows and subnormals. + ! . (the scale factor s can not be czero, + ! . because h(kbot,kbot-1) is nonzero.) ==== + if( ks>=kbot ) then + s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & + h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) + aa = h( kbot-1, kbot-1 ) / s + cc = h( kbot, kbot-1 ) / s + bb = h( kbot-1, kbot ) / s + dd = h( kbot, kbot ) / s + tr2 = ( aa+dd ) / two + det = ( aa-tr2 )*( dd-tr2 ) - bb*cc + rtdisc = sqrt( -det ) + w( kbot-1 ) = ( tr2+rtdisc )*s + w( kbot ) = ( tr2-rtdisc )*s + ks = kbot - 1 + end if + end if + if( kbot-ks+1>ns ) then + ! ==== sort the shifts (helps a little) ==== + sorted = .false. + do k = kbot, ks + 1, -1 + if( sorted )go to 60 + sorted = .true. + do i = ks, k - 1 + if( cabs1( w( i ) )0 ) then + ndfl = 1 + else + ndfl = ndfl + 1 + end if + ! ==== end of main loop ==== + end do loop_70 + ! ==== iteration limit exceeded. set info to show where + ! . the problem occurred and exit. ==== + info = kbot + 80 continue + end if + ! ==== return the optimal value of lwork. ==== + work( 1 ) = cmplx( lwkopt, 0,KIND=dp) + end subroutine stdlib_zlaqr4 + + !> ZLAQZ0: computes the eigenvalues of a real matrix pair (H,T), + !> where H is an upper Hessenberg matrix and T is upper triangular, + !> using the double-shift QZ method. + !> Matrix pairs of this type are produced by the reduction to + !> generalized upper Hessenberg form of a real matrix pair (A,B): + !> A = Q1*H*Z1**H, B = Q1*T*Z1**H, + !> as computed by ZGGHRD. + !> If JOB='S', then the Hessenberg-triangular pair (H,T) is + !> also reduced to generalized Schur form, + !> H = Q*S*Z**H, T = Q*P*Z**H, + !> where Q and Z are unitary matrices, P and S are an upper triangular + !> matrices. + !> Optionally, the unitary matrix Q from the generalized Schur + !> factorization may be postmultiplied into an input matrix Q1, and the + !> unitary matrix Z may be postmultiplied into an input matrix Z1. + !> If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced + !> the matrix pair (A,B) to generalized upper Hessenberg form, then the + !> output matrices Q1*Q and Z1*Z are the unitary factors from the + !> generalized Schur factorization of (A,B): + !> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. + !> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, + !> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is + !> complex and beta real. + !> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the + !> generalized nonsymmetric eigenvalue problem (GNEP) + !> A*x = lambda*B*x + !> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the + !> alternate form of the GNEP + !> mu*A*y = B*y. + !> Eigenvalues can be read directly from the generalized Schur + !> form: + !> alpha = S(i,i), beta = P(i,i). + !> Ref: C.B. Moler + !> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), + !> pp. 241--256. + !> Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ + !> Algorithm with Aggressive Early Deflation", SIAM J. Numer. + !> Anal., 29(2006), pp. 199--227. + !> Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, + !> multipole rational QZ method with agressive early deflation" + + recursive subroutine stdlib_zlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, & + beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) + ! arguments + character, intent( in ) :: wants, wantq, wantz + integer(ilp), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec + integer(ilp), intent( out ) :: info + complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & + alpha( * ), beta( * ), work( * ) + real(dp), intent( out ) :: rwork( * ) + + + ! local scalars + real(dp) :: smlnum, ulp, safmin, safmax, c1, tempr + complex(dp) :: eshift, s1, temp + integer(ilp) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& + nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & + istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & + rcost + logical(lk) :: ilschur, ilq, ilz + character :: jbcmpz*3 + if( stdlib_lsame( wants, 'E' ) ) then + ilschur = .false. + iwants = 1 + else if( stdlib_lsame( wants, 'S' ) ) then + ilschur = .true. + iwants = 2 + else + iwants = 0 + end if + if( stdlib_lsame( wantq, 'N' ) ) then + ilq = .false. + iwantq = 1 + else if( stdlib_lsame( wantq, 'V' ) ) then + ilq = .true. + iwantq = 2 + else if( stdlib_lsame( wantq, 'I' ) ) then + ilq = .true. + iwantq = 3 + else + iwantq = 0 + end if + if( stdlib_lsame( wantz, 'N' ) ) then + ilz = .false. + iwantz = 1 + else if( stdlib_lsame( wantz, 'V' ) ) then + ilz = .true. + iwantz = 2 + else if( stdlib_lsame( wantz, 'I' ) ) then + ilz = .true. + iwantz = 3 + else + iwantz = 0 + end if + ! check argument values + info = 0 + if( iwants==0 ) then + info = -1 + else if( iwantq==0 ) then + info = -2 + else if( iwantz==0 ) then + info = -3 + else if( n<0 ) then + info = -4 + else if( ilo<1 ) then + info = -5 + else if( ihi>n .or. ihi= 2 ) then + call stdlib_zhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,& + ldq, z, ldz, work, lwork, rwork,info ) + return + end if + ! find out required workspace + ! workspace query to stdlib_zlaqz2 + nw = max( nwr, nmin ) + call stdlib_zlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & + n_undeflated, n_deflated, alpha,beta, work, nw, work, nw, work, -1, rwork, rec,& + aed_info ) + itemp1 = int( work( 1 ),KIND=ilp) + ! workspace query to stdlib_zlaqz3 + call stdlib_zlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, & + ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1, sweep_info ) + itemp2 = int( work( 1 ),KIND=ilp) + lworkreq = max( itemp1+2*nw**2, itemp2+2*nbr**2 ) + if ( lwork ==-1 ) then + work( 1 ) = real( lworkreq,KIND=dp) + return + else if ( lwork < lworkreq ) then + info = -19 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLAQZ0', info ) + return + end if + ! initialize q and z + if( iwantq==3 ) call stdlib_zlaset( 'FULL', n, n, czero, cone, q,ldq ) + if( iwantz==3 ) call stdlib_zlaset( 'FULL', n, n, czero, cone, z,ldz ) + ! get machine constants + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp)/ulp ) + istart = ilo + istop = ihi + maxit = 30*( ihi-ilo+1 ) + ld = 0 + do iiter = 1, maxit + if( iiter >= maxit ) then + info = istop+1 + goto 80 + end if + if ( istart+1 >= istop ) then + istop = istart + exit + end if + ! check deflations at the end + if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( & + a( istop-1,istop-1 ) ) ) ) ) then + a( istop, istop-1 ) = czero + istop = istop-1 + ld = 0 + eshift = czero + end if + ! check deflations at the start + if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+& + abs( a( istart+1,istart+1 ) ) ) ) ) then + a( istart+1, istart ) = czero + istart = istart+1 + ld = 0 + eshift = czero + end if + if ( istart+1 >= istop ) then + exit + end if + ! check interior deflations + istart2 = istart + do k = istop, istart+1, -1 + if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & + ) ) ) ) then + a( k, k-1 ) = czero + istart2 = k + exit + end if + end do + ! get range to apply rotations to + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = istart2 + istopm = istop + end if + ! check infinite eigenvalues, this is done without blocking so might + ! slow down the method when many infinite eigenvalues are present + k = istop + do while ( k>=istart2 ) + tempr = zero + if( k < istop ) then + tempr = tempr+abs( b( k, k+1 ) ) + end if + if( k > istart2 ) then + tempr = tempr+abs( b( k-1, k ) ) + end if + if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then + ! a diagonal element of b is negligable, move it + ! to the top and deflate it + do k2 = k, istart2+1, -1 + call stdlib_zlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) + b( k2-1, k2 ) = temp + b( k2-1, k2-1 ) = czero + call stdlib_zrot( k2-2-istartm+1, b( istartm, k2 ), 1,b( istartm, k2-1 ), & + 1, c1, s1 ) + call stdlib_zrot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1, a( & + istartm, k2-1 ), 1, c1, s1 ) + if ( ilz ) then + call stdlib_zrot( n, z( 1, k2 ), 1, z( 1, k2-1 ), 1, c1,s1 ) + end if + if( k2= istop ) then + istop = istart2-1 + ld = 0 + eshift = czero + cycle + end if + nw = nwr + nshifts = nsr + nblock = nbr + if ( istop-istart2+1 < nmin ) then + ! setting nw to the size of the subblock will make aed deflate + ! all the eigenvalues. this is slightly more efficient than just + ! using qz_small because the off diagonal part gets updated via blas. + if ( istop-istart+1 < nmin ) then + nw = istop-istart+1 + istart2 = istart + else + nw = istop-istart2+1 + end if + end if + ! time for aed + call stdlib_zlaqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& + z, ldz, n_undeflated, n_deflated,alpha, beta, work, nw, work( nw**2+1 ), nw,work( & + 2*nw**2+1 ), lwork-2*nw**2, rwork, rec,aed_info ) + if ( n_deflated > 0 ) then + istop = istop-n_deflated + ld = 0 + eshift = czero + end if + if ( 100*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & + ) then + ! aed has uncovered many eigenvalues. skip a qz sweep and run + ! aed again. + cycle + end if + ld = ld+1 + ns = min( nshifts, istop-istart2 ) + ns = min( ns, n_undeflated ) + shiftpos = istop-n_deflated-n_undeflated+1 + if ( mod( ld, 6 ) == 0 ) then + ! exceptional shift. chosen for no particularly good reason. + if( ( real( maxit,KIND=dp)*safmin )*abs( a( istop,istop-1 ) ) ZLAQZ2: performs AED + + recursive subroutine stdlib_zlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & + ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info ) + ! arguments + logical(lk), intent( in ) :: ilschur, ilq, ilz + integer(ilp), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & + rec + complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & + alpha( * ), beta( * ) + integer(ilp), intent( out ) :: ns, nd, info + complex(dp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) + complex(dp), intent(out) :: work(*) + real(dp), intent(out) :: rwork(*) + + + ! local scalars + integer(ilp) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ztgexc_info, ifst, ilst, & + lworkreq, qz_small_info + real(dp) ::smlnum, ulp, safmin, safmax, c1, tempr + complex(dp) :: s, s1, temp + info = 0 + ! set up deflation window + jw = min( nw, ihi-ilo+1 ) + kwtop = ihi-jw+1 + if ( kwtop == ilo ) then + s = czero + else + s = a( kwtop, kwtop-1 ) + end if + ! determine required workspace + ifst = 1 + ilst = jw + call stdlib_zlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1, rwork, rec+1, qz_small_info ) + lworkreq = int( work( 1 ),KIND=ilp)+2*jw**2 + lworkreq = max( lworkreq, n*nw, 2*nw**2+n ) + if ( lwork ==-1 ) then + ! workspace query, quick return + work( 1 ) = lworkreq + return + else if ( lwork < lworkreq ) then + info = -26 + end if + if( info/=0 ) then + call stdlib_xerbla( 'ZLAQZ2', -info ) + return + end if + ! get machine constants + safmin = stdlib_dlamch( 'SAFE MINIMUM' ) + safmax = one/safmin + call stdlib_dlabad( safmin, safmax ) + ulp = stdlib_dlamch( 'PRECISION' ) + smlnum = safmin*( real( n,KIND=dp)/ulp ) + if ( ihi == kwtop ) then + ! 1 by 1 deflation window, just try a regular deflation + alpha( kwtop ) = a( kwtop, kwtop ) + beta( kwtop ) = b( kwtop, kwtop ) + ns = 1 + nd = 0 + if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then + ns = 0 + nd = 1 + if ( kwtop > ilo ) then + a( kwtop, kwtop-1 ) = czero + end if + end if + end if + ! store window in case of convergence failure + call stdlib_zlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) + call stdlib_zlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2+1 ), jw ) + + ! transform window to real schur form + call stdlib_zlaset( 'FULL', jw, jw, czero, cone, qc, ldqc ) + call stdlib_zlaset( 'FULL', jw, jw, czero, cone, zc, ldzc ) + call stdlib_zlaqz0( 'S', 'V', 'V', jw, 1, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& + ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2*jw**2+1 ), lwork-2*jw**2, rwork,rec+1, & + qz_small_info ) + if( qz_small_info /= 0 ) then + ! convergence failure, restore the window and exit + nd = 0 + ns = jw-qz_small_info + call stdlib_zlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) + call stdlib_zlacpy( 'ALL', jw, jw, work( jw**2+1 ), jw, b( kwtop,kwtop ), ldb ) + + return + end if + ! deflation detection loop + if ( kwtop == ilo .or. s == czero ) then + kwbot = kwtop-1 + else + kwbot = ihi + k = 1 + k2 = 1 + do while ( k <= jw ) + ! try to deflate eigenvalue + tempr = abs( a( kwbot, kwbot ) ) + if( tempr == zero ) then + tempr = abs( s ) + end if + if ( ( abs( s*qc( 1, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) & + then + ! deflatable + kwbot = kwbot-1 + else + ! not deflatable, move out of the way + ifst = kwbot-kwtop+1 + ilst = k2 + call stdlib_ztgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & + kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ztgexc_info ) + k2 = k2+1 + end if + k = k+1 + end do + end if + ! store eigenvalues + nd = ihi-kwbot + ns = jw-nd + k = kwtop + do while ( k <= ihi ) + alpha( k ) = a( k, k ) + beta( k ) = b( k, k ) + k = k+1 + end do + if ( kwtop /= ilo .and. s /= czero ) then + ! reflect spike back, this will create optimally packed bulges + a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1,1:jw-nd ) ) + do k = kwbot-1, kwtop, -1 + call stdlib_zlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) + a( k, kwtop-1 ) = temp + a( k+1, kwtop-1 ) = czero + k2 = max( kwtop, k-1 ) + call stdlib_zrot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) + call stdlib_zrot( ihi-( k-1 )+1, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) + + call stdlib_zrot( jw, qc( 1, k-kwtop+1 ), 1, qc( 1, k+1-kwtop+1 ),1, c1, conjg( & + s1 ) ) + end do + ! chase bulges down + istartm = kwtop + istopm = ihi + k = kwbot-1 + do while ( k >= kwtop ) + ! move bulge down and remove it + do k2 = k, kwbot-1 + call stdlib_zlaqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, & + ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc ) + end do + k = k-1 + end do + end if + ! apply qc and zc to rest of the matrix + if ( ilschur ) then + istartm = 1 + istopm = n + else + istartm = ilo + istopm = ihi + end if + if ( istopm-ihi > 0 ) then + call stdlib_zgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), & + lda, czero, work, jw ) + call stdlib_zlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) + call stdlib_zgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), & + ldb, czero, work, jw ) + call stdlib_zlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) + end if + if ( ilq ) then + call stdlib_zgemm( 'N', 'N', n, jw, jw, cone, q( 1, kwtop ), ldq, qc,ldqc, czero, & + work, n ) + call stdlib_zlacpy( 'ALL', n, jw, work, n, q( 1, kwtop ), ldq ) + end if + if ( kwtop-1-istartm+1 > 0 ) then + call stdlib_zgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, & + zc, ldzc, czero, work,kwtop-istartm ) + call stdlib_zlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )& + , lda ) + call stdlib_zgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, & + zc, ldzc, czero, work,kwtop-istartm ) + call stdlib_zlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )& + , ldb ) + end if + if ( ilz ) then + call stdlib_zgemm( 'N', 'N', n, jw, jw, cone, z( 1, kwtop ), ldz, zc,ldzc, czero, & + work, n ) + call stdlib_zlacpy( 'ALL', n, jw, work, n, z( 1, kwtop ), ldz ) + end if + end subroutine stdlib_zlaqz2 + + !> DLATRF_AA factorizes a panel of a complex symmetric matrix A using + !> the Aasen's algorithm. The panel consists of a set of NB rows of A + !> when UPLO is U, or a set of NB columns when UPLO is L. + !> In order to factorize the panel, the Aasen's algorithm requires the + !> last row, or column, of the previous panel. The first row, or column, + !> of A is set to be the first row, or column, of an identity matrix, + !> which is used to factorize the first panel. + !> The resulting J-th row of U, or J-th column of L, is stored in the + !> (J-1)-th row, or column, of A (without the unit diagonals), while + !> the diagonal and subdiagonal of A are overwritten by those of T. + + pure subroutine stdlib_zlasyf_aa( uplo, j1, m, nb, a, lda, ipiv,h, ldh, work ) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: m, nb, j1, lda, ldh + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), h(ldh,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + integer(ilp) :: j, k, k1, i1, i2, mj + complex(dp) :: piv, alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + j = 1 + ! k1 is the first column of the panel to be factorized + ! i.e., k1 is 2 for the first block column, and 1 for the rest of the blocks + k1 = (2-j1)+1 + if( stdlib_lsame( uplo, 'U' ) ) then + ! ..................................................... + ! factorize a as u**t*d*u using the upper triangle of a + ! ..................................................... + 10 continue + if ( j>min(m, nb) )go to 20 + ! k is the column to be factorized + ! when being called from stdlib_zsytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j, j:m) - h(j:m, 1:(j-1)) * l(j1:(j-1), j), + ! where h(j:m, j) has been initialized to be a(j, j:m) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_zgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( 1, j ), 1,& + cone, h( j, j ), 1 ) + end if + ! copy h(i:m, i) into work + call stdlib_zcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j-1, j:m) * t(j-1,j), + ! where a(j-1, j) stores t(j-1, j) and a(j-2, j:m) stores u(j-1, j:m) + alpha = -a( k-1, j ) + call stdlib_zaxpy( mj, alpha, a( k-2, j ), lda, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( k, j ) = work( 1 ) + if( j1 ) then + alpha = -a( k, j ) + call stdlib_zaxpy( m-j, alpha, a( k-1, j+1 ), lda,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_izamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1, i1+1:m) with a(i1+1:m, i2) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_zswap( i2-i1-1, a( j1+i1-1, i1+1 ), lda,a( j1+i1, i2 ), 1 ) + + ! swap a(i1, i2+1:m) with a(i2, i2+1:m) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_zswap( i1-k1+1, a( 1, i1 ), 1,a( 1, i2 ), 1 ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j, j+1) = t(j, j+1) + a( k, j+1 ) = work( 2 ) + if( jmin( m, nb ) )go to 40 + ! k is the column to be factorized + ! when being called from stdlib_zsytrf_aa, + ! > for the first block column, j1 is 1, hence j1+j-1 is j, + ! > for the rest of the columns, j1 is 2, and j1+j-1 is j+1, + k = j1+j-1 + if( j==m ) then + ! only need to compute t(j, j) + mj = 1 + else + mj = m-j+1 + end if + ! h(j:m, j) := a(j:m, j) - h(j:m, 1:(j-1)) * l(j, j1:(j-1))^t, + ! where h(j:m, j) has been initialized to be a(j:m, j) + if( k>2 ) then + ! k is the column to be factorized + ! > for the first block column, k is j, skipping the first two + ! columns + ! > for the rest of the columns, k is j+1, skipping only the + ! first column + call stdlib_zgemv( 'NO TRANSPOSE', mj, j-k1,-cone, h( j, k1 ), ldh,a( j, 1 ), & + lda,cone, h( j, j ), 1 ) + end if + ! copy h(j:m, j) into work + call stdlib_zcopy( mj, h( j, j ), 1, work( 1 ), 1 ) + if( j>k1 ) then + ! compute work := work - l(j:m, j-1) * t(j-1,j), + ! where a(j-1, j) = t(j-1, j) and a(j, j-2) = l(j, j-1) + alpha = -a( j, k-1 ) + call stdlib_zaxpy( mj, alpha, a( j, k-2 ), 1, work( 1 ), 1 ) + end if + ! set a(j, j) = t(j, j) + a( j, k ) = work( 1 ) + if( j1 ) then + alpha = -a( j, k ) + call stdlib_zaxpy( m-j, alpha, a( j+1, k-1 ), 1,work( 2 ), 1 ) + endif + ! find max(|work(2:m)|) + i2 = stdlib_izamax( m-j, work( 2 ), 1 ) + 1 + piv = work( i2 ) + ! apply symmetric pivot + if( (i2/=2) .and. (piv/=0) ) then + ! swap work(i1) and work(i2) + i1 = 2 + work( i2 ) = work( i1 ) + work( i1 ) = piv + ! swap a(i1+1:m, i1) with a(i2, i1+1:m) + i1 = i1+j-1 + i2 = i2+j-1 + call stdlib_zswap( i2-i1-1, a( i1+1, j1+i1-1 ), 1,a( i2, j1+i1 ), lda ) + + ! swap a(i2+1:m, i1) with a(i2+1:m, i2) + if( i2(k1-1) ) then + ! swap l(1:i1-1, i1) with l(1:i1-1, i2), + ! skipping the first column + call stdlib_zswap( i1-k1+1, a( i1, 1 ), lda,a( i2, 1 ), lda ) + end if + else + ipiv( j+1 ) = j+1 + endif + ! set a(j+1, j) = t(j+1, j) + a( j+1, k ) = work( 2 ) + if( j ZSYSV computes the solution to a complex system of linear equations + !> A * X = B, + !> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS + !> matrices. + !> Aasen's algorithm is used to factor A as + !> A = U**T * T * U, if UPLO = 'U', or + !> A = L * T * L**T, if UPLO = 'L', + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is symmetric tridiagonal. The factored + !> form of A is then used to solve the system of equations A * X = B. + + pure subroutine stdlib_zsysv_aa( uplo, n, nrhs, a, lda, ipiv, b, ldb, work,lwork, info ) + + ! -- lapack driver routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(out) :: info + integer(ilp), intent(in) :: lda, ldb, lwork, n, nrhs + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*), b(ldb,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + ! Local Scalars + logical(lk) :: lquery + integer(ilp) :: lwkopt, lwkopt_sytrf, lwkopt_sytrs + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! test the input parameters. + info = 0 + lquery = ( lwork==-1 ) + if( .not.stdlib_lsame( uplo, 'U' ) .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( nrhs<0 ) then + info = -3 + else if( lda ZSYTRF_AA: computes the factorization of a complex symmetric matrix A + !> using the Aasen's algorithm. The form of the factorization is + !> A = U**T*T*U or A = L*T*L**T + !> where U (or L) is a product of permutation and unit upper (lower) + !> triangular matrices, and T is a complex symmetric tridiagonal matrix. + !> This is the blocked version of the algorithm, calling Level 3 BLAS. + + pure subroutine stdlib_zsytrf_aa( uplo, n, a, lda, ipiv, work, lwork, info) + ! -- lapack computational routine -- + ! -- lapack is a software package provided by univ. of tennessee, -- + ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- + ! Scalar Arguments + character, intent(in) :: uplo + integer(ilp), intent(in) :: n, lda, lwork + integer(ilp), intent(out) :: info + ! Array Arguments + integer(ilp), intent(out) :: ipiv(*) + complex(dp), intent(inout) :: a(lda,*) + complex(dp), intent(out) :: work(*) + ! ===================================================================== + + ! Local Scalars + logical(lk) :: lquery, upper + integer(ilp) :: j, lwkopt + integer(ilp) :: nb, mj, nj, k1, k2, j1, j2, j3, jb + complex(dp) :: alpha + ! Intrinsic Functions + intrinsic :: max + ! Executable Statements + ! determine the block size + nb = stdlib_ilaenv( 1, 'ZSYTRF_AA', uplo, n, -1, -1, -1 ) + ! test the input parameters. + info = 0 + upper = stdlib_lsame( uplo, 'U' ) + lquery = ( lwork==-1 ) + if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then + info = -1 + else if( n<0 ) then + info = -2 + else if( lda=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j + 1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_zlasyf_aa( uplo, 2-k1, n-j, jb,a( max(1, j), j+1 ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_zswap( j1-k1-2, a( 1, j2 ), 1,a( 1, ipiv(j2) ), 1 ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! the row a(j1-1, j2-1:n) stores u(j1, j2+1:n) and + ! work stores the current block of the auxiriarly matrix h + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j, j+1 ) + a( j, j+1 ) = cone + call stdlib_zcopy( n-j, a( j-1, j+1 ), lda,work( (j+1-j1+1)+jb*n ), 1 ) + + call stdlib_zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_zgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_zgemv( 'NO TRANSPOSE', mj, jb+1,-cone, work( j3-j1+1+k1*n ),& + n,a( j1-k2, j3 ), 1,cone, a( j3, j3 ), lda ) + j3 = j3 + 1 + end do + ! update off-diagonal block of j2-th block row with stdlib_zgemm + call stdlib_zgemm( 'TRANSPOSE', 'TRANSPOSE',nj, n-j3+1, jb+1,-cone, a( j1-& + k2, j2 ), lda,work( j3-j1+1+k1*n ), n,cone, a( j2, j3 ), lda ) + end do + ! recover t( j, j+1 ) + a( j, j+1 ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 ) + end if + go to 10 + else + ! ..................................................... + ! factorize a as l*d*l**t using the lower triangle of a + ! ..................................................... + ! copy first column a(1:n, 1) into h(1:n, 1) + ! (stored in work(1:n)) + call stdlib_zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 ) + ! j is the main loop index, increasing from 1 to n in steps of + ! jb, where jb is the number of columns factorized by stdlib_zlasyf; + ! jb is either nb, or n-j+1 for the last block + j = 0 + 11 continue + if( j>=n )go to 20 + ! each step of the main loop + ! j is the last column of the previous panel + ! j1 is the first column of the current panel + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 for the first panel, and + ! k1=0 for the rest + j1 = j+1 + jb = min( n-j1+1, nb ) + k1 = max(1, j)-j + ! panel factorization + call stdlib_zlasyf_aa( uplo, 2-k1, n-j, jb,a( j+1, max(1, j) ), lda,ipiv( j+1 ), & + work, n, work( n*nb+1 ) ) + ! adjust ipiv and apply it back (j-th step picks (j+1)-th pivot) + do j2 = j+2, min(n, j+jb+1) + ipiv( j2 ) = ipiv( j2 ) + j + if( (j2/=ipiv(j2)) .and. ((j1-k1)>2) ) then + call stdlib_zswap( j1-k1-2, a( j2, 1 ), lda,a( ipiv(j2), 1 ), lda ) + end if + end do + j = j + jb + ! trailing submatrix update, where + ! a(j2+1, j1-1) stores l(j2+1, j1) and + ! work(j2+1, 1) stores h(j2+1, 1) + if( j1 .or. jb>1 ) then + ! merge rank-1 update with blas-3 update + alpha = a( j+1, j ) + a( j+1, j ) = cone + call stdlib_zcopy( n-j, a( j+1, j-1 ), 1,work( (j+1-j1+1)+jb*n ), 1 ) + call stdlib_zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 ) + ! k1 identifies if the previous column of the panel has been + ! explicitly stored, e.g., k1=1 and k2= 0 for the first panel, + ! while k1=0 and k2=1 for the rest + if( j1>1 ) then + ! not first panel + k2 = 1 + else + ! first panel + k2 = 0 + ! first update skips the first column + jb = jb - 1 + end if + do j2 = j+1, n, nb + nj = min( nb, n-j2+1 ) + ! update (j2, j2) diagonal block with stdlib_zgemv + j3 = j2 + do mj = nj-1, 1, -1 + call stdlib_zgemv( 'NO TRANSPOSE', mj, jb+1,-cone, work( j3-j1+1+k1*n ),& + n,a( j3, j1-k2 ), lda,cone, a( j3, j3 ), 1 ) + j3 = j3 + 1 + end do + ! update off-diagonal block in j2-th block column with stdlib_zgemm + call stdlib_zgemm( 'NO TRANSPOSE', 'TRANSPOSE',n-j3+1, nj, jb+1,-cone, & + work( j3-j1+1+k1*n ), n,a( j2, j1-k2 ), lda,cone, a( j3, j2 ), lda ) + + end do + ! recover t( j+1, j ) + a( j+1, j ) = alpha + end if + ! work(j+1, 1) stores h(j+1, 1) + call stdlib_zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 ) + end if + go to 11 + end if + 20 continue + work( 1 ) = lwkopt + return + end subroutine stdlib_zsytrf_aa + + + +end module stdlib_linalg_lapack_z diff --git a/test/linalg/CMakeLists.txt b/test/linalg/CMakeLists.txt index 4a315f545..3d590a9d2 100644 --- a/test/linalg/CMakeLists.txt +++ b/test/linalg/CMakeLists.txt @@ -1,9 +1,11 @@ set( fppFiles "test_linalg.fypp" + "test_blas_lapack.fypp" "test_linalg_matrix_property_checks.fypp" ) fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) ADDTEST(linalg) ADDTEST(linalg_matrix_property_checks) +ADDTEST(blas_lapack) diff --git a/test/linalg/test_blas_lapack.fypp b/test/linalg/test_blas_lapack.fypp new file mode 100644 index 000000000..e36ac2717 --- /dev/null +++ b/test/linalg/test_blas_lapack.fypp @@ -0,0 +1,147 @@ +#:include "common.fypp" +#:set RC_KINDS_TYPES = REAL_KINDS_TYPES + CMPLX_KINDS_TYPES + +module test_blas_lapack + use testdrive, only : new_unittest, unittest_type, error_type, check, skip_test + use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64 + use stdlib_linalg, only: eye + use stdlib_linalg_blas + use stdlib_linalg_lapack + + implicit none + + real(sp), parameter :: sptol = 1000 * epsilon(1._sp) + real(dp), parameter :: dptol = 1000 * epsilon(1._dp) +#:if WITH_QP + real(qp), parameter :: qptol = 1000 * epsilon(1._qp) +#:endif + + + +contains + + !> Collect all exported unit tests + subroutine collect_blas_lapack(testsuite) + !> Collection of tests + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + #:for k1, t1 in REAL_KINDS_TYPES + new_unittest("test_gemv${t1[0]}$${k1}$", test_gemv${t1[0]}$${k1}$), & + new_unittest("test_getri${t1[0]}$${k1}$", test_getri${t1[0]}$${k1}$), & + #:endfor + new_unittest("test_idamax", test_idamax) & + ] + + end subroutine collect_blas_lapack + + + #:for k1, t1 in REAL_KINDS_TYPES + subroutine test_gemv${t1[0]}$${k1}$(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + +#:if k1=="xdp" + call skip_test(error, "Extended precision is not enabled") +#:else + ${t1}$ :: A(3,3),x(3),y(3),ylap(3),yintr(3),alpha,beta + call random_number(alpha) + call random_number(beta) + call random_number(A) + call random_number(x) + call random_number(y) + ylap = y + call gemv('No transpose',size(A,1),size(A,2),alpha,A,size(A,1),x,1,beta,ylap,1) + yintr = alpha*matmul(A,x)+beta*y + + call check(error, sum(abs(ylap - yintr)) < sptol, & + "blas vs. intrinsics axpy: sum() < sptol failed") + if (allocated(error)) return +#:endif + end subroutine test_gemv${t1[0]}$${k1}$ + + ! Find matrix inverse from LU decomposition + subroutine test_getri${t1[0]}$${k1}$(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + +#:if k1=="xdp" + call skip_test(error, "Extended precision is not enabled") +#:else + + integer(ilp), parameter :: n = 3 + ${t1}$ :: A(n,n) + ${t1}$,allocatable :: work(:) + integer(ilp) :: ipiv(n),info,lwork,nb + + + A = eye(n) + + ! Factorize matrix (overwrite result) + call getrf(size(A,1),size(A,2),A,size(A,1),ipiv,info) + call check(error, info==0, "lapack getrf returned info/=0") + if (allocated(error)) return + + ! Get optimal worksize (returned in work(1)) (apply 2% safety parameter) + nb = stdlib_ilaenv(1,'${t1[0]}$getri',' ',n,-1,-1,-1) + lwork = nint(1.02*n*nb,kind=ilp) + allocate (work(lwork)) + + ! Invert matrix + call getri(n,a,n,ipiv,work,lwork,info) + + call check(error, info==0, "lapack getri returned info/=0") + if (allocated(error)) return + + call check(error, sum(abs(A - eye(3))) < sptol, & + "lapack eye inversion: tolerance check failed") + if (allocated(error)) return +#:endif + end subroutine test_getri${t1[0]}$${k1}$ + #:endfor + + ! Return + subroutine test_idamax(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer(ilp), parameter :: n = 5 + integer(ilp) :: imax + real(dp) :: x(n) + + x = [1,2,3,4,5] + + imax = stdlib_idamax(n,x,1) + + call check(error, imax==5, "blas idamax returned wrong location") + + end subroutine test_idamax + +end module test_blas_lapack + + +program tester + use, intrinsic :: iso_fortran_env, only : error_unit + use testdrive, only : run_testsuite, new_testsuite, testsuite_type + use test_blas_lapack, only : collect_blas_lapack + implicit none + integer :: stat, is + type(testsuite_type), allocatable :: testsuites(:) + character(len=*), parameter :: fmt = '("#", *(1x, a))' + + stat = 0 + testsuites = [ & + new_testsuite("blas_lapack", collect_blas_lapack) & + ] + + do is = 1, size(testsuites) + write(error_unit, fmt) "Testing:", testsuites(is)%name + call run_testsuite(testsuites(is)%collect, error_unit, stat) + end do + + if (stat > 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if +end program + diff --git a/test/stats/array3.dat b/test/stats/array3.dat deleted file mode 100644 index 13b583f89..000000000 --- a/test/stats/array3.dat +++ /dev/null @@ -1,16 +0,0 @@ -1.000000000000000021e-08 9.199998759392489944e+01 -1.024113254885563425e-08 9.199998731474968849e+01 -1.048233721895820948e-08 9.199998703587728244e+01 -1.072361403187881949e-08 9.199998675729767683e+01 -1.096496300919481796e-08 9.199998647900135040e+01 -1.120638417249036630e-08 9.199998620097916557e+01 -1.144787754335570897e-08 9.199998592322251056e+01 -1.168944314338753750e-08 9.199998564572304360e+01 -1.193108099418952317e-08 9.199998536847290609e+01 -1.217279111737088596e-08 9.199998509146449521e+01 -1.241457353454836993e-08 9.199998481469057765e+01 -1.265642826734443823e-08 9.199998453814424693e+01 -1.289835533738818635e-08 9.199998426181879552e+01 -1.314035476631514857e-08 9.199998398570787117e+01 -1.338242657576766519e-08 9.199998370980536322e+01 -1.362457078739434161e-08 9.199998343410533153e+01